illian01 commited on
Commit
808a262
·
1 Parent(s): 95f7f1b

Add weights and config

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_0",
5
+ "model_config": {
6
+ "hidden_size": 4096,
7
+ "intermediate_size": 11008,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 17,
10
+ "rms_norm_eps": 1e-06,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 32,
16
+ "head_dim": 128,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "mean_gen_len": 128,
27
+ "max_gen_len": 512,
28
+ "shift_fill_factor": 0.3,
29
+ "temperature": 0.6,
30
+ "presence_penalty": 0.0,
31
+ "frequency_penalty": 0.0,
32
+ "repetition_penalty": 1.0,
33
+ "top_p": 0.9,
34
+ "tokenizer_files": [
35
+ "tokenizer.model",
36
+ "tokenizer_config.json",
37
+ "tokenizer.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_fallback",
41
+ "prepend_space_in_encode": true,
42
+ "strip_space_in_decode": true
43
+ },
44
+ "conv_template": {
45
+ "name": "st-llm",
46
+ "system_template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{MessagePlaceholders.SYSTEM.value}</s>",
47
+ "system_message": "You are a helpful, respectful and honest assistant.",
48
+ "system_prefix_token_ids": null,
49
+ "add_role_after_system_message": true,
50
+ "roles": {
51
+ "user": "### Input:",
52
+ "assistant": "### Response:"
53
+ },
54
+ "role_templates": {
55
+ "user": "{user_message}",
56
+ "assistant": "{assistant_message}",
57
+ "tool": "{tool_message}"
58
+ },
59
+ "messages": [],
60
+ "seps": [
61
+ "</s>"
62
+ ],
63
+ "role_content_sep": "\n",
64
+ "role_empty_sep": "\n",
65
+ "stop_str": [
66
+ "</s>"
67
+ ],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 0,
75
+ "bos_token_id": 1,
76
+ "eos_token_id": 2
77
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 175,
4
+ "ParamBytes": 2082955264.0,
5
+ "BitsPerParam": 4.50044525764654
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 65536000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 65536000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "0c80f07c0e705a2e2c1cd1d241a4b609"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33357824,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_scale",
34
+ "shape": [
35
+ 32000,
36
+ 128
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 8192000,
41
+ "byteOffset": 0
42
+ },
43
+ {
44
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
45
+ "shape": [
46
+ 512,
47
+ 12288
48
+ ],
49
+ "dtype": "uint32",
50
+ "format": "f32-to-bf16",
51
+ "nbytes": 25165824,
52
+ "byteOffset": 8192000
53
+ }
54
+ ],
55
+ "md5sum": "fa05756d3a4aed82c316579089540aad"
56
+ },
57
+ {
58
+ "dataPath": "params_shard_2.bin",
59
+ "format": "raw-shard",
60
+ "nbytes": 45088768,
61
+ "records": [
62
+ {
63
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
64
+ "shape": [
65
+ 512,
66
+ 22016
67
+ ],
68
+ "dtype": "uint32",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 45088768,
71
+ "byteOffset": 0
72
+ }
73
+ ],
74
+ "md5sum": "19060424c69b3c54f779bbd7392e9be6"
75
+ },
76
+ {
77
+ "dataPath": "params_shard_3.bin",
78
+ "format": "raw-shard",
79
+ "nbytes": 22544384,
80
+ "records": [
81
+ {
82
+ "name": "model.layers.0.mlp.down_proj.q_weight",
83
+ "shape": [
84
+ 1376,
85
+ 4096
86
+ ],
87
+ "dtype": "uint32",
88
+ "format": "f32-to-bf16",
89
+ "nbytes": 22544384,
90
+ "byteOffset": 0
91
+ }
92
+ ],
93
+ "md5sum": "4b814bbb00bc27d9a9e3850331579676"
94
+ },
95
+ {
96
+ "dataPath": "params_shard_4.bin",
97
+ "format": "raw-shard",
98
+ "nbytes": 25165824,
99
+ "records": [
100
+ {
101
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
102
+ "shape": [
103
+ 512,
104
+ 12288
105
+ ],
106
+ "dtype": "uint32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 25165824,
109
+ "byteOffset": 0
110
+ }
111
+ ],
112
+ "md5sum": "fcde8719ca33311adb9e209901a7a1e1"
113
+ },
114
+ {
115
+ "dataPath": "params_shard_5.bin",
116
+ "format": "raw-shard",
117
+ "nbytes": 32587776,
118
+ "records": [
119
+ {
120
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
121
+ "shape": [
122
+ 128,
123
+ 12288
124
+ ],
125
+ "dtype": "float16",
126
+ "format": "f32-to-bf16",
127
+ "nbytes": 3145728,
128
+ "byteOffset": 0
129
+ },
130
+ {
131
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
132
+ "shape": [
133
+ 512,
134
+ 4096
135
+ ],
136
+ "dtype": "uint32",
137
+ "format": "f32-to-bf16",
138
+ "nbytes": 8388608,
139
+ "byteOffset": 3145728
140
+ },
141
+ {
142
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
143
+ "shape": [
144
+ 128,
145
+ 4096
146
+ ],
147
+ "dtype": "float16",
148
+ "format": "f32-to-bf16",
149
+ "nbytes": 1048576,
150
+ "byteOffset": 11534336
151
+ },
152
+ {
153
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
154
+ "shape": [
155
+ 128,
156
+ 22016
157
+ ],
158
+ "dtype": "float16",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 5636096,
161
+ "byteOffset": 12582912
162
+ },
163
+ {
164
+ "name": "model.layers.0.mlp.down_proj.q_scale",
165
+ "shape": [
166
+ 344,
167
+ 4096
168
+ ],
169
+ "dtype": "float16",
170
+ "format": "f32-to-bf16",
171
+ "nbytes": 2818048,
172
+ "byteOffset": 18219008
173
+ },
174
+ {
175
+ "name": "model.layers.0.input_layernorm.weight",
176
+ "shape": [
177
+ 4096
178
+ ],
179
+ "dtype": "float16",
180
+ "format": "f32-to-bf16",
181
+ "nbytes": 8192,
182
+ "byteOffset": 21037056
183
+ },
184
+ {
185
+ "name": "model.layers.0.post_attention_layernorm.weight",
186
+ "shape": [
187
+ 4096
188
+ ],
189
+ "dtype": "float16",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 8192,
192
+ "byteOffset": 21045248
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 128,
198
+ 12288
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 21053440
204
+ },
205
+ {
206
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
207
+ "shape": [
208
+ 512,
209
+ 4096
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 8388608,
214
+ "byteOffset": 24199168
215
+ }
216
+ ],
217
+ "md5sum": "7cce174cd57c88824eb70a4ef149c88c"
218
+ },
219
+ {
220
+ "dataPath": "params_shard_6.bin",
221
+ "format": "raw-shard",
222
+ "nbytes": 45088768,
223
+ "records": [
224
+ {
225
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
226
+ "shape": [
227
+ 512,
228
+ 22016
229
+ ],
230
+ "dtype": "uint32",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 45088768,
233
+ "byteOffset": 0
234
+ }
235
+ ],
236
+ "md5sum": "97b234afce902138e94878f8ef1a629c"
237
+ },
238
+ {
239
+ "dataPath": "params_shard_7.bin",
240
+ "format": "raw-shard",
241
+ "nbytes": 25165824,
242
+ "records": [
243
+ {
244
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
245
+ "shape": [
246
+ 512,
247
+ 12288
248
+ ],
249
+ "dtype": "uint32",
250
+ "format": "f32-to-bf16",
251
+ "nbytes": 25165824,
252
+ "byteOffset": 0
253
+ }
254
+ ],
255
+ "md5sum": "a736145d80831f744644da17850b1bb0"
256
+ },
257
+ {
258
+ "dataPath": "params_shard_8.bin",
259
+ "format": "raw-shard",
260
+ "nbytes": 32063488,
261
+ "records": [
262
+ {
263
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
264
+ "shape": [
265
+ 128,
266
+ 4096
267
+ ],
268
+ "dtype": "float16",
269
+ "format": "f32-to-bf16",
270
+ "nbytes": 1048576,
271
+ "byteOffset": 0
272
+ },
273
+ {
274
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
275
+ "shape": [
276
+ 128,
277
+ 22016
278
+ ],
279
+ "dtype": "float16",
280
+ "format": "f32-to-bf16",
281
+ "nbytes": 5636096,
282
+ "byteOffset": 1048576
283
+ },
284
+ {
285
+ "name": "model.layers.1.mlp.down_proj.q_weight",
286
+ "shape": [
287
+ 1376,
288
+ 4096
289
+ ],
290
+ "dtype": "uint32",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 22544384,
293
+ "byteOffset": 6684672
294
+ },
295
+ {
296
+ "name": "model.layers.1.mlp.down_proj.q_scale",
297
+ "shape": [
298
+ 344,
299
+ 4096
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 2818048,
304
+ "byteOffset": 29229056
305
+ },
306
+ {
307
+ "name": "model.layers.1.input_layernorm.weight",
308
+ "shape": [
309
+ 4096
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 8192,
314
+ "byteOffset": 32047104
315
+ },
316
+ {
317
+ "name": "model.layers.1.post_attention_layernorm.weight",
318
+ "shape": [
319
+ 4096
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 8192,
324
+ "byteOffset": 32055296
325
+ }
326
+ ],
327
+ "md5sum": "0b4f5bc34791a1e54dea410ec4c052b6"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_9.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 45088768,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
336
+ "shape": [
337
+ 512,
338
+ 22016
339
+ ],
340
+ "dtype": "uint32",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 45088768,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "4db94cff0e44289f9eb44649315c1943"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_10.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 22544384,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.2.mlp.down_proj.q_weight",
355
+ "shape": [
356
+ 1376,
357
+ 4096
358
+ ],
359
+ "dtype": "uint32",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 22544384,
362
+ "byteOffset": 0
363
+ }
364
+ ],
365
+ "md5sum": "7c5a83bd1092af3833edef1a9adaa42d"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_11.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 25165824,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
374
+ "shape": [
375
+ 512,
376
+ 12288
377
+ ],
378
+ "dtype": "uint32",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 25165824,
381
+ "byteOffset": 0
382
+ }
383
+ ],
384
+ "md5sum": "98699a3c8ec694dbf09a9381dc979df4"
385
+ },
386
+ {
387
+ "dataPath": "params_shard_12.bin",
388
+ "format": "raw-shard",
389
+ "nbytes": 32587776,
390
+ "records": [
391
+ {
392
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
393
+ "shape": [
394
+ 128,
395
+ 12288
396
+ ],
397
+ "dtype": "float16",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 3145728,
400
+ "byteOffset": 0
401
+ },
402
+ {
403
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
404
+ "shape": [
405
+ 512,
406
+ 4096
407
+ ],
408
+ "dtype": "uint32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 8388608,
411
+ "byteOffset": 3145728
412
+ },
413
+ {
414
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
415
+ "shape": [
416
+ 128,
417
+ 4096
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 1048576,
422
+ "byteOffset": 11534336
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
426
+ "shape": [
427
+ 128,
428
+ 22016
429
+ ],
430
+ "dtype": "float16",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 5636096,
433
+ "byteOffset": 12582912
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 344,
439
+ 4096
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 18219008
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 21037056
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 21045248
465
+ },
466
+ {
467
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
468
+ "shape": [
469
+ 128,
470
+ 12288
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 3145728,
475
+ "byteOffset": 21053440
476
+ },
477
+ {
478
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
479
+ "shape": [
480
+ 512,
481
+ 4096
482
+ ],
483
+ "dtype": "uint32",
484
+ "format": "f32-to-bf16",
485
+ "nbytes": 8388608,
486
+ "byteOffset": 24199168
487
+ }
488
+ ],
489
+ "md5sum": "b5eadcd590ed9992bf56fa27361040a6"
490
+ },
491
+ {
492
+ "dataPath": "params_shard_13.bin",
493
+ "format": "raw-shard",
494
+ "nbytes": 45088768,
495
+ "records": [
496
+ {
497
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
498
+ "shape": [
499
+ 512,
500
+ 22016
501
+ ],
502
+ "dtype": "uint32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 45088768,
505
+ "byteOffset": 0
506
+ }
507
+ ],
508
+ "md5sum": "c0913953918e9ff10b393a2350c541a8"
509
+ },
510
+ {
511
+ "dataPath": "params_shard_14.bin",
512
+ "format": "raw-shard",
513
+ "nbytes": 25165824,
514
+ "records": [
515
+ {
516
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
517
+ "shape": [
518
+ 512,
519
+ 12288
520
+ ],
521
+ "dtype": "uint32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 25165824,
524
+ "byteOffset": 0
525
+ }
526
+ ],
527
+ "md5sum": "1104e4e7bed02d7e43586f5dcb57fbc8"
528
+ },
529
+ {
530
+ "dataPath": "params_shard_15.bin",
531
+ "format": "raw-shard",
532
+ "nbytes": 32063488,
533
+ "records": [
534
+ {
535
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
536
+ "shape": [
537
+ 128,
538
+ 4096
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 1048576,
543
+ "byteOffset": 0
544
+ },
545
+ {
546
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
547
+ "shape": [
548
+ 128,
549
+ 22016
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 5636096,
554
+ "byteOffset": 1048576
555
+ },
556
+ {
557
+ "name": "model.layers.3.mlp.down_proj.q_weight",
558
+ "shape": [
559
+ 1376,
560
+ 4096
561
+ ],
562
+ "dtype": "uint32",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 22544384,
565
+ "byteOffset": 6684672
566
+ },
567
+ {
568
+ "name": "model.layers.3.mlp.down_proj.q_scale",
569
+ "shape": [
570
+ 344,
571
+ 4096
572
+ ],
573
+ "dtype": "float16",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 2818048,
576
+ "byteOffset": 29229056
577
+ },
578
+ {
579
+ "name": "model.layers.3.input_layernorm.weight",
580
+ "shape": [
581
+ 4096
582
+ ],
583
+ "dtype": "float16",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 8192,
586
+ "byteOffset": 32047104
587
+ },
588
+ {
589
+ "name": "model.layers.3.post_attention_layernorm.weight",
590
+ "shape": [
591
+ 4096
592
+ ],
593
+ "dtype": "float16",
594
+ "format": "f32-to-bf16",
595
+ "nbytes": 8192,
596
+ "byteOffset": 32055296
597
+ }
598
+ ],
599
+ "md5sum": "b99070c18ef1502bf8efbd1f9c680bbd"
600
+ },
601
+ {
602
+ "dataPath": "params_shard_16.bin",
603
+ "format": "raw-shard",
604
+ "nbytes": 45088768,
605
+ "records": [
606
+ {
607
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
608
+ "shape": [
609
+ 512,
610
+ 22016
611
+ ],
612
+ "dtype": "uint32",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 45088768,
615
+ "byteOffset": 0
616
+ }
617
+ ],
618
+ "md5sum": "1e5a41fbee06aa0c682d28fe5f2a7f8e"
619
+ },
620
+ {
621
+ "dataPath": "params_shard_17.bin",
622
+ "format": "raw-shard",
623
+ "nbytes": 22544384,
624
+ "records": [
625
+ {
626
+ "name": "model.layers.4.mlp.down_proj.q_weight",
627
+ "shape": [
628
+ 1376,
629
+ 4096
630
+ ],
631
+ "dtype": "uint32",
632
+ "format": "f32-to-bf16",
633
+ "nbytes": 22544384,
634
+ "byteOffset": 0
635
+ }
636
+ ],
637
+ "md5sum": "fd8b195c0fe64cc679731e416fcbda52"
638
+ },
639
+ {
640
+ "dataPath": "params_shard_18.bin",
641
+ "format": "raw-shard",
642
+ "nbytes": 25165824,
643
+ "records": [
644
+ {
645
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
646
+ "shape": [
647
+ 512,
648
+ 12288
649
+ ],
650
+ "dtype": "uint32",
651
+ "format": "f32-to-bf16",
652
+ "nbytes": 25165824,
653
+ "byteOffset": 0
654
+ }
655
+ ],
656
+ "md5sum": "c8a5c784a179b2d26121e20ef4518e52"
657
+ },
658
+ {
659
+ "dataPath": "params_shard_19.bin",
660
+ "format": "raw-shard",
661
+ "nbytes": 32587776,
662
+ "records": [
663
+ {
664
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
665
+ "shape": [
666
+ 128,
667
+ 12288
668
+ ],
669
+ "dtype": "float16",
670
+ "format": "f32-to-bf16",
671
+ "nbytes": 3145728,
672
+ "byteOffset": 0
673
+ },
674
+ {
675
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
676
+ "shape": [
677
+ 512,
678
+ 4096
679
+ ],
680
+ "dtype": "uint32",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 8388608,
683
+ "byteOffset": 3145728
684
+ },
685
+ {
686
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
687
+ "shape": [
688
+ 128,
689
+ 4096
690
+ ],
691
+ "dtype": "float16",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 1048576,
694
+ "byteOffset": 11534336
695
+ },
696
+ {
697
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 128,
700
+ 22016
701
+ ],
702
+ "dtype": "float16",
703
+ "format": "f32-to-bf16",
704
+ "nbytes": 5636096,
705
+ "byteOffset": 12582912
706
+ },
707
+ {
708
+ "name": "model.layers.4.mlp.down_proj.q_scale",
709
+ "shape": [
710
+ 344,
711
+ 4096
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 2818048,
716
+ "byteOffset": 18219008
717
+ },
718
+ {
719
+ "name": "model.layers.4.input_layernorm.weight",
720
+ "shape": [
721
+ 4096
722
+ ],
723
+ "dtype": "float16",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 8192,
726
+ "byteOffset": 21037056
727
+ },
728
+ {
729
+ "name": "model.layers.4.post_attention_layernorm.weight",
730
+ "shape": [
731
+ 4096
732
+ ],
733
+ "dtype": "float16",
734
+ "format": "f32-to-bf16",
735
+ "nbytes": 8192,
736
+ "byteOffset": 21045248
737
+ },
738
+ {
739
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
740
+ "shape": [
741
+ 128,
742
+ 12288
743
+ ],
744
+ "dtype": "float16",
745
+ "format": "f32-to-bf16",
746
+ "nbytes": 3145728,
747
+ "byteOffset": 21053440
748
+ },
749
+ {
750
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
751
+ "shape": [
752
+ 512,
753
+ 4096
754
+ ],
755
+ "dtype": "uint32",
756
+ "format": "f32-to-bf16",
757
+ "nbytes": 8388608,
758
+ "byteOffset": 24199168
759
+ }
760
+ ],
761
+ "md5sum": "48d58ccb88b0d6d82622304daffd17cc"
762
+ },
763
+ {
764
+ "dataPath": "params_shard_20.bin",
765
+ "format": "raw-shard",
766
+ "nbytes": 45088768,
767
+ "records": [
768
+ {
769
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
770
+ "shape": [
771
+ 512,
772
+ 22016
773
+ ],
774
+ "dtype": "uint32",
775
+ "format": "f32-to-bf16",
776
+ "nbytes": 45088768,
777
+ "byteOffset": 0
778
+ }
779
+ ],
780
+ "md5sum": "7bb64fee1fb04ebad5911123414e7fb5"
781
+ },
782
+ {
783
+ "dataPath": "params_shard_21.bin",
784
+ "format": "raw-shard",
785
+ "nbytes": 25165824,
786
+ "records": [
787
+ {
788
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
789
+ "shape": [
790
+ 512,
791
+ 12288
792
+ ],
793
+ "dtype": "uint32",
794
+ "format": "f32-to-bf16",
795
+ "nbytes": 25165824,
796
+ "byteOffset": 0
797
+ }
798
+ ],
799
+ "md5sum": "79d7bba862e0627a3ce87c78fd0dde4b"
800
+ },
801
+ {
802
+ "dataPath": "params_shard_22.bin",
803
+ "format": "raw-shard",
804
+ "nbytes": 32063488,
805
+ "records": [
806
+ {
807
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
808
+ "shape": [
809
+ 128,
810
+ 4096
811
+ ],
812
+ "dtype": "float16",
813
+ "format": "f32-to-bf16",
814
+ "nbytes": 1048576,
815
+ "byteOffset": 0
816
+ },
817
+ {
818
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
819
+ "shape": [
820
+ 128,
821
+ 22016
822
+ ],
823
+ "dtype": "float16",
824
+ "format": "f32-to-bf16",
825
+ "nbytes": 5636096,
826
+ "byteOffset": 1048576
827
+ },
828
+ {
829
+ "name": "model.layers.5.mlp.down_proj.q_weight",
830
+ "shape": [
831
+ 1376,
832
+ 4096
833
+ ],
834
+ "dtype": "uint32",
835
+ "format": "f32-to-bf16",
836
+ "nbytes": 22544384,
837
+ "byteOffset": 6684672
838
+ },
839
+ {
840
+ "name": "model.layers.5.mlp.down_proj.q_scale",
841
+ "shape": [
842
+ 344,
843
+ 4096
844
+ ],
845
+ "dtype": "float16",
846
+ "format": "f32-to-bf16",
847
+ "nbytes": 2818048,
848
+ "byteOffset": 29229056
849
+ },
850
+ {
851
+ "name": "model.layers.5.input_layernorm.weight",
852
+ "shape": [
853
+ 4096
854
+ ],
855
+ "dtype": "float16",
856
+ "format": "f32-to-bf16",
857
+ "nbytes": 8192,
858
+ "byteOffset": 32047104
859
+ },
860
+ {
861
+ "name": "model.layers.5.post_attention_layernorm.weight",
862
+ "shape": [
863
+ 4096
864
+ ],
865
+ "dtype": "float16",
866
+ "format": "f32-to-bf16",
867
+ "nbytes": 8192,
868
+ "byteOffset": 32055296
869
+ }
870
+ ],
871
+ "md5sum": "71dddb4c8aca779927e9085c74a4bcf1"
872
+ },
873
+ {
874
+ "dataPath": "params_shard_23.bin",
875
+ "format": "raw-shard",
876
+ "nbytes": 45088768,
877
+ "records": [
878
+ {
879
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
880
+ "shape": [
881
+ 512,
882
+ 22016
883
+ ],
884
+ "dtype": "uint32",
885
+ "format": "f32-to-bf16",
886
+ "nbytes": 45088768,
887
+ "byteOffset": 0
888
+ }
889
+ ],
890
+ "md5sum": "d5e0d9fc20163dcc5ab77757afbd0acf"
891
+ },
892
+ {
893
+ "dataPath": "params_shard_24.bin",
894
+ "format": "raw-shard",
895
+ "nbytes": 22544384,
896
+ "records": [
897
+ {
898
+ "name": "model.layers.6.mlp.down_proj.q_weight",
899
+ "shape": [
900
+ 1376,
901
+ 4096
902
+ ],
903
+ "dtype": "uint32",
904
+ "format": "f32-to-bf16",
905
+ "nbytes": 22544384,
906
+ "byteOffset": 0
907
+ }
908
+ ],
909
+ "md5sum": "522a3e1535cf5940f47ca1132e4d94f1"
910
+ },
911
+ {
912
+ "dataPath": "params_shard_25.bin",
913
+ "format": "raw-shard",
914
+ "nbytes": 25165824,
915
+ "records": [
916
+ {
917
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
918
+ "shape": [
919
+ 512,
920
+ 12288
921
+ ],
922
+ "dtype": "uint32",
923
+ "format": "f32-to-bf16",
924
+ "nbytes": 25165824,
925
+ "byteOffset": 0
926
+ }
927
+ ],
928
+ "md5sum": "5625f757d660c2b5f6894c0a452b4f9e"
929
+ },
930
+ {
931
+ "dataPath": "params_shard_26.bin",
932
+ "format": "raw-shard",
933
+ "nbytes": 32587776,
934
+ "records": [
935
+ {
936
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
937
+ "shape": [
938
+ 128,
939
+ 12288
940
+ ],
941
+ "dtype": "float16",
942
+ "format": "f32-to-bf16",
943
+ "nbytes": 3145728,
944
+ "byteOffset": 0
945
+ },
946
+ {
947
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
948
+ "shape": [
949
+ 512,
950
+ 4096
951
+ ],
952
+ "dtype": "uint32",
953
+ "format": "f32-to-bf16",
954
+ "nbytes": 8388608,
955
+ "byteOffset": 3145728
956
+ },
957
+ {
958
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
959
+ "shape": [
960
+ 128,
961
+ 4096
962
+ ],
963
+ "dtype": "float16",
964
+ "format": "f32-to-bf16",
965
+ "nbytes": 1048576,
966
+ "byteOffset": 11534336
967
+ },
968
+ {
969
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
970
+ "shape": [
971
+ 128,
972
+ 22016
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 5636096,
977
+ "byteOffset": 12582912
978
+ },
979
+ {
980
+ "name": "model.layers.6.mlp.down_proj.q_scale",
981
+ "shape": [
982
+ 344,
983
+ 4096
984
+ ],
985
+ "dtype": "float16",
986
+ "format": "f32-to-bf16",
987
+ "nbytes": 2818048,
988
+ "byteOffset": 18219008
989
+ },
990
+ {
991
+ "name": "model.layers.6.input_layernorm.weight",
992
+ "shape": [
993
+ 4096
994
+ ],
995
+ "dtype": "float16",
996
+ "format": "f32-to-bf16",
997
+ "nbytes": 8192,
998
+ "byteOffset": 21037056
999
+ },
1000
+ {
1001
+ "name": "model.layers.6.post_attention_layernorm.weight",
1002
+ "shape": [
1003
+ 4096
1004
+ ],
1005
+ "dtype": "float16",
1006
+ "format": "f32-to-bf16",
1007
+ "nbytes": 8192,
1008
+ "byteOffset": 21045248
1009
+ },
1010
+ {
1011
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1012
+ "shape": [
1013
+ 128,
1014
+ 12288
1015
+ ],
1016
+ "dtype": "float16",
1017
+ "format": "f32-to-bf16",
1018
+ "nbytes": 3145728,
1019
+ "byteOffset": 21053440
1020
+ },
1021
+ {
1022
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
1023
+ "shape": [
1024
+ 512,
1025
+ 4096
1026
+ ],
1027
+ "dtype": "uint32",
1028
+ "format": "f32-to-bf16",
1029
+ "nbytes": 8388608,
1030
+ "byteOffset": 24199168
1031
+ }
1032
+ ],
1033
+ "md5sum": "c123b8433a0d596eea37c76bffd74c6d"
1034
+ },
1035
+ {
1036
+ "dataPath": "params_shard_27.bin",
1037
+ "format": "raw-shard",
1038
+ "nbytes": 45088768,
1039
+ "records": [
1040
+ {
1041
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
1042
+ "shape": [
1043
+ 512,
1044
+ 22016
1045
+ ],
1046
+ "dtype": "uint32",
1047
+ "format": "f32-to-bf16",
1048
+ "nbytes": 45088768,
1049
+ "byteOffset": 0
1050
+ }
1051
+ ],
1052
+ "md5sum": "805e55a7b4e85081f2e8dd60ef166d17"
1053
+ },
1054
+ {
1055
+ "dataPath": "params_shard_28.bin",
1056
+ "format": "raw-shard",
1057
+ "nbytes": 25165824,
1058
+ "records": [
1059
+ {
1060
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1061
+ "shape": [
1062
+ 512,
1063
+ 12288
1064
+ ],
1065
+ "dtype": "uint32",
1066
+ "format": "f32-to-bf16",
1067
+ "nbytes": 25165824,
1068
+ "byteOffset": 0
1069
+ }
1070
+ ],
1071
+ "md5sum": "dae988315b9cecd5c082a601ba10aaee"
1072
+ },
1073
+ {
1074
+ "dataPath": "params_shard_29.bin",
1075
+ "format": "raw-shard",
1076
+ "nbytes": 32063488,
1077
+ "records": [
1078
+ {
1079
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
1080
+ "shape": [
1081
+ 128,
1082
+ 4096
1083
+ ],
1084
+ "dtype": "float16",
1085
+ "format": "f32-to-bf16",
1086
+ "nbytes": 1048576,
1087
+ "byteOffset": 0
1088
+ },
1089
+ {
1090
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
1091
+ "shape": [
1092
+ 128,
1093
+ 22016
1094
+ ],
1095
+ "dtype": "float16",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 5636096,
1098
+ "byteOffset": 1048576
1099
+ },
1100
+ {
1101
+ "name": "model.layers.7.mlp.down_proj.q_weight",
1102
+ "shape": [
1103
+ 1376,
1104
+ 4096
1105
+ ],
1106
+ "dtype": "uint32",
1107
+ "format": "f32-to-bf16",
1108
+ "nbytes": 22544384,
1109
+ "byteOffset": 6684672
1110
+ },
1111
+ {
1112
+ "name": "model.layers.7.mlp.down_proj.q_scale",
1113
+ "shape": [
1114
+ 344,
1115
+ 4096
1116
+ ],
1117
+ "dtype": "float16",
1118
+ "format": "f32-to-bf16",
1119
+ "nbytes": 2818048,
1120
+ "byteOffset": 29229056
1121
+ },
1122
+ {
1123
+ "name": "model.layers.7.input_layernorm.weight",
1124
+ "shape": [
1125
+ 4096
1126
+ ],
1127
+ "dtype": "float16",
1128
+ "format": "f32-to-bf16",
1129
+ "nbytes": 8192,
1130
+ "byteOffset": 32047104
1131
+ },
1132
+ {
1133
+ "name": "model.layers.7.post_attention_layernorm.weight",
1134
+ "shape": [
1135
+ 4096
1136
+ ],
1137
+ "dtype": "float16",
1138
+ "format": "f32-to-bf16",
1139
+ "nbytes": 8192,
1140
+ "byteOffset": 32055296
1141
+ }
1142
+ ],
1143
+ "md5sum": "36cee653735ee2a05805c04ea3dcc1f7"
1144
+ },
1145
+ {
1146
+ "dataPath": "params_shard_30.bin",
1147
+ "format": "raw-shard",
1148
+ "nbytes": 45088768,
1149
+ "records": [
1150
+ {
1151
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
1152
+ "shape": [
1153
+ 512,
1154
+ 22016
1155
+ ],
1156
+ "dtype": "uint32",
1157
+ "format": "f32-to-bf16",
1158
+ "nbytes": 45088768,
1159
+ "byteOffset": 0
1160
+ }
1161
+ ],
1162
+ "md5sum": "49bce51187a8083c60a9538181193a97"
1163
+ },
1164
+ {
1165
+ "dataPath": "params_shard_31.bin",
1166
+ "format": "raw-shard",
1167
+ "nbytes": 22544384,
1168
+ "records": [
1169
+ {
1170
+ "name": "model.layers.8.mlp.down_proj.q_weight",
1171
+ "shape": [
1172
+ 1376,
1173
+ 4096
1174
+ ],
1175
+ "dtype": "uint32",
1176
+ "format": "f32-to-bf16",
1177
+ "nbytes": 22544384,
1178
+ "byteOffset": 0
1179
+ }
1180
+ ],
1181
+ "md5sum": "6d6079646311cfbd581ad2ae09c512d7"
1182
+ },
1183
+ {
1184
+ "dataPath": "params_shard_32.bin",
1185
+ "format": "raw-shard",
1186
+ "nbytes": 25165824,
1187
+ "records": [
1188
+ {
1189
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1190
+ "shape": [
1191
+ 512,
1192
+ 12288
1193
+ ],
1194
+ "dtype": "uint32",
1195
+ "format": "f32-to-bf16",
1196
+ "nbytes": 25165824,
1197
+ "byteOffset": 0
1198
+ }
1199
+ ],
1200
+ "md5sum": "80c576a52474d7eb2735769b84b3da0a"
1201
+ },
1202
+ {
1203
+ "dataPath": "params_shard_33.bin",
1204
+ "format": "raw-shard",
1205
+ "nbytes": 32587776,
1206
+ "records": [
1207
+ {
1208
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1209
+ "shape": [
1210
+ 128,
1211
+ 12288
1212
+ ],
1213
+ "dtype": "float16",
1214
+ "format": "f32-to-bf16",
1215
+ "nbytes": 3145728,
1216
+ "byteOffset": 0
1217
+ },
1218
+ {
1219
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
1220
+ "shape": [
1221
+ 512,
1222
+ 4096
1223
+ ],
1224
+ "dtype": "uint32",
1225
+ "format": "f32-to-bf16",
1226
+ "nbytes": 8388608,
1227
+ "byteOffset": 3145728
1228
+ },
1229
+ {
1230
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
1231
+ "shape": [
1232
+ 128,
1233
+ 4096
1234
+ ],
1235
+ "dtype": "float16",
1236
+ "format": "f32-to-bf16",
1237
+ "nbytes": 1048576,
1238
+ "byteOffset": 11534336
1239
+ },
1240
+ {
1241
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
1242
+ "shape": [
1243
+ 128,
1244
+ 22016
1245
+ ],
1246
+ "dtype": "float16",
1247
+ "format": "f32-to-bf16",
1248
+ "nbytes": 5636096,
1249
+ "byteOffset": 12582912
1250
+ },
1251
+ {
1252
+ "name": "model.layers.8.mlp.down_proj.q_scale",
1253
+ "shape": [
1254
+ 344,
1255
+ 4096
1256
+ ],
1257
+ "dtype": "float16",
1258
+ "format": "f32-to-bf16",
1259
+ "nbytes": 2818048,
1260
+ "byteOffset": 18219008
1261
+ },
1262
+ {
1263
+ "name": "model.layers.8.input_layernorm.weight",
1264
+ "shape": [
1265
+ 4096
1266
+ ],
1267
+ "dtype": "float16",
1268
+ "format": "f32-to-bf16",
1269
+ "nbytes": 8192,
1270
+ "byteOffset": 21037056
1271
+ },
1272
+ {
1273
+ "name": "model.layers.8.post_attention_layernorm.weight",
1274
+ "shape": [
1275
+ 4096
1276
+ ],
1277
+ "dtype": "float16",
1278
+ "format": "f32-to-bf16",
1279
+ "nbytes": 8192,
1280
+ "byteOffset": 21045248
1281
+ },
1282
+ {
1283
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1284
+ "shape": [
1285
+ 128,
1286
+ 12288
1287
+ ],
1288
+ "dtype": "float16",
1289
+ "format": "f32-to-bf16",
1290
+ "nbytes": 3145728,
1291
+ "byteOffset": 21053440
1292
+ },
1293
+ {
1294
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
1295
+ "shape": [
1296
+ 512,
1297
+ 4096
1298
+ ],
1299
+ "dtype": "uint32",
1300
+ "format": "f32-to-bf16",
1301
+ "nbytes": 8388608,
1302
+ "byteOffset": 24199168
1303
+ }
1304
+ ],
1305
+ "md5sum": "dce80d098b2b47b0a422ca585a8d857a"
1306
+ },
1307
+ {
1308
+ "dataPath": "params_shard_34.bin",
1309
+ "format": "raw-shard",
1310
+ "nbytes": 45088768,
1311
+ "records": [
1312
+ {
1313
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
1314
+ "shape": [
1315
+ 512,
1316
+ 22016
1317
+ ],
1318
+ "dtype": "uint32",
1319
+ "format": "f32-to-bf16",
1320
+ "nbytes": 45088768,
1321
+ "byteOffset": 0
1322
+ }
1323
+ ],
1324
+ "md5sum": "809ef224699ace8b7b71b36566a58eb3"
1325
+ },
1326
+ {
1327
+ "dataPath": "params_shard_35.bin",
1328
+ "format": "raw-shard",
1329
+ "nbytes": 25165824,
1330
+ "records": [
1331
+ {
1332
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
1333
+ "shape": [
1334
+ 512,
1335
+ 12288
1336
+ ],
1337
+ "dtype": "uint32",
1338
+ "format": "f32-to-bf16",
1339
+ "nbytes": 25165824,
1340
+ "byteOffset": 0
1341
+ }
1342
+ ],
1343
+ "md5sum": "627d3f5e0b7bc2972a13aef95bee1bc5"
1344
+ },
1345
+ {
1346
+ "dataPath": "params_shard_36.bin",
1347
+ "format": "raw-shard",
1348
+ "nbytes": 32063488,
1349
+ "records": [
1350
+ {
1351
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
1352
+ "shape": [
1353
+ 128,
1354
+ 4096
1355
+ ],
1356
+ "dtype": "float16",
1357
+ "format": "f32-to-bf16",
1358
+ "nbytes": 1048576,
1359
+ "byteOffset": 0
1360
+ },
1361
+ {
1362
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
1363
+ "shape": [
1364
+ 128,
1365
+ 22016
1366
+ ],
1367
+ "dtype": "float16",
1368
+ "format": "f32-to-bf16",
1369
+ "nbytes": 5636096,
1370
+ "byteOffset": 1048576
1371
+ },
1372
+ {
1373
+ "name": "model.layers.9.mlp.down_proj.q_weight",
1374
+ "shape": [
1375
+ 1376,
1376
+ 4096
1377
+ ],
1378
+ "dtype": "uint32",
1379
+ "format": "f32-to-bf16",
1380
+ "nbytes": 22544384,
1381
+ "byteOffset": 6684672
1382
+ },
1383
+ {
1384
+ "name": "model.layers.9.mlp.down_proj.q_scale",
1385
+ "shape": [
1386
+ 344,
1387
+ 4096
1388
+ ],
1389
+ "dtype": "float16",
1390
+ "format": "f32-to-bf16",
1391
+ "nbytes": 2818048,
1392
+ "byteOffset": 29229056
1393
+ },
1394
+ {
1395
+ "name": "model.layers.9.input_layernorm.weight",
1396
+ "shape": [
1397
+ 4096
1398
+ ],
1399
+ "dtype": "float16",
1400
+ "format": "f32-to-bf16",
1401
+ "nbytes": 8192,
1402
+ "byteOffset": 32047104
1403
+ },
1404
+ {
1405
+ "name": "model.layers.9.post_attention_layernorm.weight",
1406
+ "shape": [
1407
+ 4096
1408
+ ],
1409
+ "dtype": "float16",
1410
+ "format": "f32-to-bf16",
1411
+ "nbytes": 8192,
1412
+ "byteOffset": 32055296
1413
+ }
1414
+ ],
1415
+ "md5sum": "cdd944ef3abc535c16ab5baebf5515b6"
1416
+ },
1417
+ {
1418
+ "dataPath": "params_shard_37.bin",
1419
+ "format": "raw-shard",
1420
+ "nbytes": 45088768,
1421
+ "records": [
1422
+ {
1423
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
1424
+ "shape": [
1425
+ 512,
1426
+ 22016
1427
+ ],
1428
+ "dtype": "uint32",
1429
+ "format": "f32-to-bf16",
1430
+ "nbytes": 45088768,
1431
+ "byteOffset": 0
1432
+ }
1433
+ ],
1434
+ "md5sum": "84a0d8df7b05f28a5fbf683152ccb758"
1435
+ },
1436
+ {
1437
+ "dataPath": "params_shard_38.bin",
1438
+ "format": "raw-shard",
1439
+ "nbytes": 22544384,
1440
+ "records": [
1441
+ {
1442
+ "name": "model.layers.10.mlp.down_proj.q_weight",
1443
+ "shape": [
1444
+ 1376,
1445
+ 4096
1446
+ ],
1447
+ "dtype": "uint32",
1448
+ "format": "f32-to-bf16",
1449
+ "nbytes": 22544384,
1450
+ "byteOffset": 0
1451
+ }
1452
+ ],
1453
+ "md5sum": "d302a8927ab13bdb23b5dfc577c6b915"
1454
+ },
1455
+ {
1456
+ "dataPath": "params_shard_39.bin",
1457
+ "format": "raw-shard",
1458
+ "nbytes": 25165824,
1459
+ "records": [
1460
+ {
1461
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
1462
+ "shape": [
1463
+ 512,
1464
+ 12288
1465
+ ],
1466
+ "dtype": "uint32",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 25165824,
1469
+ "byteOffset": 0
1470
+ }
1471
+ ],
1472
+ "md5sum": "e5a69957aac71da78feb9b23c5fb5a3a"
1473
+ },
1474
+ {
1475
+ "dataPath": "params_shard_40.bin",
1476
+ "format": "raw-shard",
1477
+ "nbytes": 32587776,
1478
+ "records": [
1479
+ {
1480
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
1481
+ "shape": [
1482
+ 128,
1483
+ 12288
1484
+ ],
1485
+ "dtype": "float16",
1486
+ "format": "f32-to-bf16",
1487
+ "nbytes": 3145728,
1488
+ "byteOffset": 0
1489
+ },
1490
+ {
1491
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
1492
+ "shape": [
1493
+ 512,
1494
+ 4096
1495
+ ],
1496
+ "dtype": "uint32",
1497
+ "format": "f32-to-bf16",
1498
+ "nbytes": 8388608,
1499
+ "byteOffset": 3145728
1500
+ },
1501
+ {
1502
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
1503
+ "shape": [
1504
+ 128,
1505
+ 4096
1506
+ ],
1507
+ "dtype": "float16",
1508
+ "format": "f32-to-bf16",
1509
+ "nbytes": 1048576,
1510
+ "byteOffset": 11534336
1511
+ },
1512
+ {
1513
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
1514
+ "shape": [
1515
+ 128,
1516
+ 22016
1517
+ ],
1518
+ "dtype": "float16",
1519
+ "format": "f32-to-bf16",
1520
+ "nbytes": 5636096,
1521
+ "byteOffset": 12582912
1522
+ },
1523
+ {
1524
+ "name": "model.layers.10.mlp.down_proj.q_scale",
1525
+ "shape": [
1526
+ 344,
1527
+ 4096
1528
+ ],
1529
+ "dtype": "float16",
1530
+ "format": "f32-to-bf16",
1531
+ "nbytes": 2818048,
1532
+ "byteOffset": 18219008
1533
+ },
1534
+ {
1535
+ "name": "model.layers.10.input_layernorm.weight",
1536
+ "shape": [
1537
+ 4096
1538
+ ],
1539
+ "dtype": "float16",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 8192,
1542
+ "byteOffset": 21037056
1543
+ },
1544
+ {
1545
+ "name": "model.layers.10.post_attention_layernorm.weight",
1546
+ "shape": [
1547
+ 4096
1548
+ ],
1549
+ "dtype": "float16",
1550
+ "format": "f32-to-bf16",
1551
+ "nbytes": 8192,
1552
+ "byteOffset": 21045248
1553
+ },
1554
+ {
1555
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
1556
+ "shape": [
1557
+ 128,
1558
+ 12288
1559
+ ],
1560
+ "dtype": "float16",
1561
+ "format": "f32-to-bf16",
1562
+ "nbytes": 3145728,
1563
+ "byteOffset": 21053440
1564
+ },
1565
+ {
1566
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
1567
+ "shape": [
1568
+ 512,
1569
+ 4096
1570
+ ],
1571
+ "dtype": "uint32",
1572
+ "format": "f32-to-bf16",
1573
+ "nbytes": 8388608,
1574
+ "byteOffset": 24199168
1575
+ }
1576
+ ],
1577
+ "md5sum": "1ad6af1d7d02cc573ff6ae8a31ae7d64"
1578
+ },
1579
+ {
1580
+ "dataPath": "params_shard_41.bin",
1581
+ "format": "raw-shard",
1582
+ "nbytes": 45088768,
1583
+ "records": [
1584
+ {
1585
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
1586
+ "shape": [
1587
+ 512,
1588
+ 22016
1589
+ ],
1590
+ "dtype": "uint32",
1591
+ "format": "f32-to-bf16",
1592
+ "nbytes": 45088768,
1593
+ "byteOffset": 0
1594
+ }
1595
+ ],
1596
+ "md5sum": "36f3a1edafceb384df49e23b68a1623f"
1597
+ },
1598
+ {
1599
+ "dataPath": "params_shard_42.bin",
1600
+ "format": "raw-shard",
1601
+ "nbytes": 25165824,
1602
+ "records": [
1603
+ {
1604
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
1605
+ "shape": [
1606
+ 512,
1607
+ 12288
1608
+ ],
1609
+ "dtype": "uint32",
1610
+ "format": "f32-to-bf16",
1611
+ "nbytes": 25165824,
1612
+ "byteOffset": 0
1613
+ }
1614
+ ],
1615
+ "md5sum": "41c10ea6f4183a339c9495544a5073a4"
1616
+ },
1617
+ {
1618
+ "dataPath": "params_shard_43.bin",
1619
+ "format": "raw-shard",
1620
+ "nbytes": 32063488,
1621
+ "records": [
1622
+ {
1623
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
1624
+ "shape": [
1625
+ 128,
1626
+ 4096
1627
+ ],
1628
+ "dtype": "float16",
1629
+ "format": "f32-to-bf16",
1630
+ "nbytes": 1048576,
1631
+ "byteOffset": 0
1632
+ },
1633
+ {
1634
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
1635
+ "shape": [
1636
+ 128,
1637
+ 22016
1638
+ ],
1639
+ "dtype": "float16",
1640
+ "format": "f32-to-bf16",
1641
+ "nbytes": 5636096,
1642
+ "byteOffset": 1048576
1643
+ },
1644
+ {
1645
+ "name": "model.layers.11.mlp.down_proj.q_weight",
1646
+ "shape": [
1647
+ 1376,
1648
+ 4096
1649
+ ],
1650
+ "dtype": "uint32",
1651
+ "format": "f32-to-bf16",
1652
+ "nbytes": 22544384,
1653
+ "byteOffset": 6684672
1654
+ },
1655
+ {
1656
+ "name": "model.layers.11.mlp.down_proj.q_scale",
1657
+ "shape": [
1658
+ 344,
1659
+ 4096
1660
+ ],
1661
+ "dtype": "float16",
1662
+ "format": "f32-to-bf16",
1663
+ "nbytes": 2818048,
1664
+ "byteOffset": 29229056
1665
+ },
1666
+ {
1667
+ "name": "model.layers.11.input_layernorm.weight",
1668
+ "shape": [
1669
+ 4096
1670
+ ],
1671
+ "dtype": "float16",
1672
+ "format": "f32-to-bf16",
1673
+ "nbytes": 8192,
1674
+ "byteOffset": 32047104
1675
+ },
1676
+ {
1677
+ "name": "model.layers.11.post_attention_layernorm.weight",
1678
+ "shape": [
1679
+ 4096
1680
+ ],
1681
+ "dtype": "float16",
1682
+ "format": "f32-to-bf16",
1683
+ "nbytes": 8192,
1684
+ "byteOffset": 32055296
1685
+ }
1686
+ ],
1687
+ "md5sum": "11e2349a5804e96ab4b016beb9216906"
1688
+ },
1689
+ {
1690
+ "dataPath": "params_shard_44.bin",
1691
+ "format": "raw-shard",
1692
+ "nbytes": 45088768,
1693
+ "records": [
1694
+ {
1695
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
1696
+ "shape": [
1697
+ 512,
1698
+ 22016
1699
+ ],
1700
+ "dtype": "uint32",
1701
+ "format": "f32-to-bf16",
1702
+ "nbytes": 45088768,
1703
+ "byteOffset": 0
1704
+ }
1705
+ ],
1706
+ "md5sum": "cf198550156434640f526fec3638c905"
1707
+ },
1708
+ {
1709
+ "dataPath": "params_shard_45.bin",
1710
+ "format": "raw-shard",
1711
+ "nbytes": 22544384,
1712
+ "records": [
1713
+ {
1714
+ "name": "model.layers.12.mlp.down_proj.q_weight",
1715
+ "shape": [
1716
+ 1376,
1717
+ 4096
1718
+ ],
1719
+ "dtype": "uint32",
1720
+ "format": "f32-to-bf16",
1721
+ "nbytes": 22544384,
1722
+ "byteOffset": 0
1723
+ }
1724
+ ],
1725
+ "md5sum": "58342ee279d6a4a8b2f657b36f27bb46"
1726
+ },
1727
+ {
1728
+ "dataPath": "params_shard_46.bin",
1729
+ "format": "raw-shard",
1730
+ "nbytes": 25165824,
1731
+ "records": [
1732
+ {
1733
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
1734
+ "shape": [
1735
+ 512,
1736
+ 12288
1737
+ ],
1738
+ "dtype": "uint32",
1739
+ "format": "f32-to-bf16",
1740
+ "nbytes": 25165824,
1741
+ "byteOffset": 0
1742
+ }
1743
+ ],
1744
+ "md5sum": "0c56ff5a360bcebaf82be0a014147c52"
1745
+ },
1746
+ {
1747
+ "dataPath": "params_shard_47.bin",
1748
+ "format": "raw-shard",
1749
+ "nbytes": 32587776,
1750
+ "records": [
1751
+ {
1752
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
1753
+ "shape": [
1754
+ 128,
1755
+ 12288
1756
+ ],
1757
+ "dtype": "float16",
1758
+ "format": "f32-to-bf16",
1759
+ "nbytes": 3145728,
1760
+ "byteOffset": 0
1761
+ },
1762
+ {
1763
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
1764
+ "shape": [
1765
+ 512,
1766
+ 4096
1767
+ ],
1768
+ "dtype": "uint32",
1769
+ "format": "f32-to-bf16",
1770
+ "nbytes": 8388608,
1771
+ "byteOffset": 3145728
1772
+ },
1773
+ {
1774
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
1775
+ "shape": [
1776
+ 128,
1777
+ 4096
1778
+ ],
1779
+ "dtype": "float16",
1780
+ "format": "f32-to-bf16",
1781
+ "nbytes": 1048576,
1782
+ "byteOffset": 11534336
1783
+ },
1784
+ {
1785
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
1786
+ "shape": [
1787
+ 128,
1788
+ 22016
1789
+ ],
1790
+ "dtype": "float16",
1791
+ "format": "f32-to-bf16",
1792
+ "nbytes": 5636096,
1793
+ "byteOffset": 12582912
1794
+ },
1795
+ {
1796
+ "name": "model.layers.12.mlp.down_proj.q_scale",
1797
+ "shape": [
1798
+ 344,
1799
+ 4096
1800
+ ],
1801
+ "dtype": "float16",
1802
+ "format": "f32-to-bf16",
1803
+ "nbytes": 2818048,
1804
+ "byteOffset": 18219008
1805
+ },
1806
+ {
1807
+ "name": "model.layers.12.input_layernorm.weight",
1808
+ "shape": [
1809
+ 4096
1810
+ ],
1811
+ "dtype": "float16",
1812
+ "format": "f32-to-bf16",
1813
+ "nbytes": 8192,
1814
+ "byteOffset": 21037056
1815
+ },
1816
+ {
1817
+ "name": "model.layers.12.post_attention_layernorm.weight",
1818
+ "shape": [
1819
+ 4096
1820
+ ],
1821
+ "dtype": "float16",
1822
+ "format": "f32-to-bf16",
1823
+ "nbytes": 8192,
1824
+ "byteOffset": 21045248
1825
+ },
1826
+ {
1827
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
1828
+ "shape": [
1829
+ 128,
1830
+ 12288
1831
+ ],
1832
+ "dtype": "float16",
1833
+ "format": "f32-to-bf16",
1834
+ "nbytes": 3145728,
1835
+ "byteOffset": 21053440
1836
+ },
1837
+ {
1838
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
1839
+ "shape": [
1840
+ 512,
1841
+ 4096
1842
+ ],
1843
+ "dtype": "uint32",
1844
+ "format": "f32-to-bf16",
1845
+ "nbytes": 8388608,
1846
+ "byteOffset": 24199168
1847
+ }
1848
+ ],
1849
+ "md5sum": "01ee8ad6a934879f6d1ec61034ee59ae"
1850
+ },
1851
+ {
1852
+ "dataPath": "params_shard_48.bin",
1853
+ "format": "raw-shard",
1854
+ "nbytes": 45088768,
1855
+ "records": [
1856
+ {
1857
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
1858
+ "shape": [
1859
+ 512,
1860
+ 22016
1861
+ ],
1862
+ "dtype": "uint32",
1863
+ "format": "f32-to-bf16",
1864
+ "nbytes": 45088768,
1865
+ "byteOffset": 0
1866
+ }
1867
+ ],
1868
+ "md5sum": "665554a4a88ea6721472b55119f63fe6"
1869
+ },
1870
+ {
1871
+ "dataPath": "params_shard_49.bin",
1872
+ "format": "raw-shard",
1873
+ "nbytes": 25165824,
1874
+ "records": [
1875
+ {
1876
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
1877
+ "shape": [
1878
+ 512,
1879
+ 12288
1880
+ ],
1881
+ "dtype": "uint32",
1882
+ "format": "f32-to-bf16",
1883
+ "nbytes": 25165824,
1884
+ "byteOffset": 0
1885
+ }
1886
+ ],
1887
+ "md5sum": "593150566899d0e627799200b84eb067"
1888
+ },
1889
+ {
1890
+ "dataPath": "params_shard_50.bin",
1891
+ "format": "raw-shard",
1892
+ "nbytes": 32063488,
1893
+ "records": [
1894
+ {
1895
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
1896
+ "shape": [
1897
+ 128,
1898
+ 4096
1899
+ ],
1900
+ "dtype": "float16",
1901
+ "format": "f32-to-bf16",
1902
+ "nbytes": 1048576,
1903
+ "byteOffset": 0
1904
+ },
1905
+ {
1906
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
1907
+ "shape": [
1908
+ 128,
1909
+ 22016
1910
+ ],
1911
+ "dtype": "float16",
1912
+ "format": "f32-to-bf16",
1913
+ "nbytes": 5636096,
1914
+ "byteOffset": 1048576
1915
+ },
1916
+ {
1917
+ "name": "model.layers.13.mlp.down_proj.q_weight",
1918
+ "shape": [
1919
+ 1376,
1920
+ 4096
1921
+ ],
1922
+ "dtype": "uint32",
1923
+ "format": "f32-to-bf16",
1924
+ "nbytes": 22544384,
1925
+ "byteOffset": 6684672
1926
+ },
1927
+ {
1928
+ "name": "model.layers.13.mlp.down_proj.q_scale",
1929
+ "shape": [
1930
+ 344,
1931
+ 4096
1932
+ ],
1933
+ "dtype": "float16",
1934
+ "format": "f32-to-bf16",
1935
+ "nbytes": 2818048,
1936
+ "byteOffset": 29229056
1937
+ },
1938
+ {
1939
+ "name": "model.layers.13.input_layernorm.weight",
1940
+ "shape": [
1941
+ 4096
1942
+ ],
1943
+ "dtype": "float16",
1944
+ "format": "f32-to-bf16",
1945
+ "nbytes": 8192,
1946
+ "byteOffset": 32047104
1947
+ },
1948
+ {
1949
+ "name": "model.layers.13.post_attention_layernorm.weight",
1950
+ "shape": [
1951
+ 4096
1952
+ ],
1953
+ "dtype": "float16",
1954
+ "format": "f32-to-bf16",
1955
+ "nbytes": 8192,
1956
+ "byteOffset": 32055296
1957
+ }
1958
+ ],
1959
+ "md5sum": "7ff1822429ca9857d43e880758c32030"
1960
+ },
1961
+ {
1962
+ "dataPath": "params_shard_51.bin",
1963
+ "format": "raw-shard",
1964
+ "nbytes": 45088768,
1965
+ "records": [
1966
+ {
1967
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
1968
+ "shape": [
1969
+ 512,
1970
+ 22016
1971
+ ],
1972
+ "dtype": "uint32",
1973
+ "format": "f32-to-bf16",
1974
+ "nbytes": 45088768,
1975
+ "byteOffset": 0
1976
+ }
1977
+ ],
1978
+ "md5sum": "2cc15cb7e3aacd3647fcabd5d8f17ef9"
1979
+ },
1980
+ {
1981
+ "dataPath": "params_shard_52.bin",
1982
+ "format": "raw-shard",
1983
+ "nbytes": 22544384,
1984
+ "records": [
1985
+ {
1986
+ "name": "model.layers.14.mlp.down_proj.q_weight",
1987
+ "shape": [
1988
+ 1376,
1989
+ 4096
1990
+ ],
1991
+ "dtype": "uint32",
1992
+ "format": "f32-to-bf16",
1993
+ "nbytes": 22544384,
1994
+ "byteOffset": 0
1995
+ }
1996
+ ],
1997
+ "md5sum": "e7339464db06df5d0bd690f80bf0dd41"
1998
+ },
1999
+ {
2000
+ "dataPath": "params_shard_53.bin",
2001
+ "format": "raw-shard",
2002
+ "nbytes": 25165824,
2003
+ "records": [
2004
+ {
2005
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
2006
+ "shape": [
2007
+ 512,
2008
+ 12288
2009
+ ],
2010
+ "dtype": "uint32",
2011
+ "format": "f32-to-bf16",
2012
+ "nbytes": 25165824,
2013
+ "byteOffset": 0
2014
+ }
2015
+ ],
2016
+ "md5sum": "10ff9c7c4343d42dd0d240d7c89fdb1a"
2017
+ },
2018
+ {
2019
+ "dataPath": "params_shard_54.bin",
2020
+ "format": "raw-shard",
2021
+ "nbytes": 32587776,
2022
+ "records": [
2023
+ {
2024
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
2025
+ "shape": [
2026
+ 128,
2027
+ 12288
2028
+ ],
2029
+ "dtype": "float16",
2030
+ "format": "f32-to-bf16",
2031
+ "nbytes": 3145728,
2032
+ "byteOffset": 0
2033
+ },
2034
+ {
2035
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
2036
+ "shape": [
2037
+ 512,
2038
+ 4096
2039
+ ],
2040
+ "dtype": "uint32",
2041
+ "format": "f32-to-bf16",
2042
+ "nbytes": 8388608,
2043
+ "byteOffset": 3145728
2044
+ },
2045
+ {
2046
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
2047
+ "shape": [
2048
+ 128,
2049
+ 4096
2050
+ ],
2051
+ "dtype": "float16",
2052
+ "format": "f32-to-bf16",
2053
+ "nbytes": 1048576,
2054
+ "byteOffset": 11534336
2055
+ },
2056
+ {
2057
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
2058
+ "shape": [
2059
+ 128,
2060
+ 22016
2061
+ ],
2062
+ "dtype": "float16",
2063
+ "format": "f32-to-bf16",
2064
+ "nbytes": 5636096,
2065
+ "byteOffset": 12582912
2066
+ },
2067
+ {
2068
+ "name": "model.layers.14.mlp.down_proj.q_scale",
2069
+ "shape": [
2070
+ 344,
2071
+ 4096
2072
+ ],
2073
+ "dtype": "float16",
2074
+ "format": "f32-to-bf16",
2075
+ "nbytes": 2818048,
2076
+ "byteOffset": 18219008
2077
+ },
2078
+ {
2079
+ "name": "model.layers.14.input_layernorm.weight",
2080
+ "shape": [
2081
+ 4096
2082
+ ],
2083
+ "dtype": "float16",
2084
+ "format": "f32-to-bf16",
2085
+ "nbytes": 8192,
2086
+ "byteOffset": 21037056
2087
+ },
2088
+ {
2089
+ "name": "model.layers.14.post_attention_layernorm.weight",
2090
+ "shape": [
2091
+ 4096
2092
+ ],
2093
+ "dtype": "float16",
2094
+ "format": "f32-to-bf16",
2095
+ "nbytes": 8192,
2096
+ "byteOffset": 21045248
2097
+ },
2098
+ {
2099
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
2100
+ "shape": [
2101
+ 128,
2102
+ 12288
2103
+ ],
2104
+ "dtype": "float16",
2105
+ "format": "f32-to-bf16",
2106
+ "nbytes": 3145728,
2107
+ "byteOffset": 21053440
2108
+ },
2109
+ {
2110
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
2111
+ "shape": [
2112
+ 512,
2113
+ 4096
2114
+ ],
2115
+ "dtype": "uint32",
2116
+ "format": "f32-to-bf16",
2117
+ "nbytes": 8388608,
2118
+ "byteOffset": 24199168
2119
+ }
2120
+ ],
2121
+ "md5sum": "1269bece685abdb06e72d24b0d0320e3"
2122
+ },
2123
+ {
2124
+ "dataPath": "params_shard_55.bin",
2125
+ "format": "raw-shard",
2126
+ "nbytes": 45088768,
2127
+ "records": [
2128
+ {
2129
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
2130
+ "shape": [
2131
+ 512,
2132
+ 22016
2133
+ ],
2134
+ "dtype": "uint32",
2135
+ "format": "f32-to-bf16",
2136
+ "nbytes": 45088768,
2137
+ "byteOffset": 0
2138
+ }
2139
+ ],
2140
+ "md5sum": "76cfc2fe6a4f3cc78b7a06ac317db286"
2141
+ },
2142
+ {
2143
+ "dataPath": "params_shard_56.bin",
2144
+ "format": "raw-shard",
2145
+ "nbytes": 25165824,
2146
+ "records": [
2147
+ {
2148
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
2149
+ "shape": [
2150
+ 512,
2151
+ 12288
2152
+ ],
2153
+ "dtype": "uint32",
2154
+ "format": "f32-to-bf16",
2155
+ "nbytes": 25165824,
2156
+ "byteOffset": 0
2157
+ }
2158
+ ],
2159
+ "md5sum": "8d8bd945869313bf34f495460ff473c0"
2160
+ },
2161
+ {
2162
+ "dataPath": "params_shard_57.bin",
2163
+ "format": "raw-shard",
2164
+ "nbytes": 32063488,
2165
+ "records": [
2166
+ {
2167
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
2168
+ "shape": [
2169
+ 128,
2170
+ 4096
2171
+ ],
2172
+ "dtype": "float16",
2173
+ "format": "f32-to-bf16",
2174
+ "nbytes": 1048576,
2175
+ "byteOffset": 0
2176
+ },
2177
+ {
2178
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
2179
+ "shape": [
2180
+ 128,
2181
+ 22016
2182
+ ],
2183
+ "dtype": "float16",
2184
+ "format": "f32-to-bf16",
2185
+ "nbytes": 5636096,
2186
+ "byteOffset": 1048576
2187
+ },
2188
+ {
2189
+ "name": "model.layers.15.mlp.down_proj.q_weight",
2190
+ "shape": [
2191
+ 1376,
2192
+ 4096
2193
+ ],
2194
+ "dtype": "uint32",
2195
+ "format": "f32-to-bf16",
2196
+ "nbytes": 22544384,
2197
+ "byteOffset": 6684672
2198
+ },
2199
+ {
2200
+ "name": "model.layers.15.mlp.down_proj.q_scale",
2201
+ "shape": [
2202
+ 344,
2203
+ 4096
2204
+ ],
2205
+ "dtype": "float16",
2206
+ "format": "f32-to-bf16",
2207
+ "nbytes": 2818048,
2208
+ "byteOffset": 29229056
2209
+ },
2210
+ {
2211
+ "name": "model.layers.15.input_layernorm.weight",
2212
+ "shape": [
2213
+ 4096
2214
+ ],
2215
+ "dtype": "float16",
2216
+ "format": "f32-to-bf16",
2217
+ "nbytes": 8192,
2218
+ "byteOffset": 32047104
2219
+ },
2220
+ {
2221
+ "name": "model.layers.15.post_attention_layernorm.weight",
2222
+ "shape": [
2223
+ 4096
2224
+ ],
2225
+ "dtype": "float16",
2226
+ "format": "f32-to-bf16",
2227
+ "nbytes": 8192,
2228
+ "byteOffset": 32055296
2229
+ }
2230
+ ],
2231
+ "md5sum": "8968721bb0bca4d937a41d73a4464573"
2232
+ },
2233
+ {
2234
+ "dataPath": "params_shard_58.bin",
2235
+ "format": "raw-shard",
2236
+ "nbytes": 45088768,
2237
+ "records": [
2238
+ {
2239
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
2240
+ "shape": [
2241
+ 512,
2242
+ 22016
2243
+ ],
2244
+ "dtype": "uint32",
2245
+ "format": "f32-to-bf16",
2246
+ "nbytes": 45088768,
2247
+ "byteOffset": 0
2248
+ }
2249
+ ],
2250
+ "md5sum": "8b481258c607bec0b84d5dbef9c9228f"
2251
+ },
2252
+ {
2253
+ "dataPath": "params_shard_59.bin",
2254
+ "format": "raw-shard",
2255
+ "nbytes": 22544384,
2256
+ "records": [
2257
+ {
2258
+ "name": "model.layers.16.mlp.down_proj.q_weight",
2259
+ "shape": [
2260
+ 1376,
2261
+ 4096
2262
+ ],
2263
+ "dtype": "uint32",
2264
+ "format": "f32-to-bf16",
2265
+ "nbytes": 22544384,
2266
+ "byteOffset": 0
2267
+ }
2268
+ ],
2269
+ "md5sum": "199eaac5804cd946f7249fdd519d4251"
2270
+ },
2271
+ {
2272
+ "dataPath": "params_shard_60.bin",
2273
+ "format": "raw-shard",
2274
+ "nbytes": 65536000,
2275
+ "records": [
2276
+ {
2277
+ "name": "lm_head.q_weight",
2278
+ "shape": [
2279
+ 512,
2280
+ 32000
2281
+ ],
2282
+ "dtype": "uint32",
2283
+ "format": "f32-to-bf16",
2284
+ "nbytes": 65536000,
2285
+ "byteOffset": 0
2286
+ }
2287
+ ],
2288
+ "md5sum": "d08e47868866d4246fcbea7a7c86a21b"
2289
+ },
2290
+ {
2291
+ "dataPath": "params_shard_61.bin",
2292
+ "format": "raw-shard",
2293
+ "nbytes": 29253632,
2294
+ "records": [
2295
+ {
2296
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
2297
+ "shape": [
2298
+ 128,
2299
+ 12288
2300
+ ],
2301
+ "dtype": "float16",
2302
+ "format": "f32-to-bf16",
2303
+ "nbytes": 3145728,
2304
+ "byteOffset": 0
2305
+ },
2306
+ {
2307
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
2308
+ "shape": [
2309
+ 512,
2310
+ 4096
2311
+ ],
2312
+ "dtype": "uint32",
2313
+ "format": "f32-to-bf16",
2314
+ "nbytes": 8388608,
2315
+ "byteOffset": 3145728
2316
+ },
2317
+ {
2318
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
2319
+ "shape": [
2320
+ 128,
2321
+ 4096
2322
+ ],
2323
+ "dtype": "float16",
2324
+ "format": "f32-to-bf16",
2325
+ "nbytes": 1048576,
2326
+ "byteOffset": 11534336
2327
+ },
2328
+ {
2329
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
2330
+ "shape": [
2331
+ 128,
2332
+ 22016
2333
+ ],
2334
+ "dtype": "float16",
2335
+ "format": "f32-to-bf16",
2336
+ "nbytes": 5636096,
2337
+ "byteOffset": 12582912
2338
+ },
2339
+ {
2340
+ "name": "model.layers.16.mlp.down_proj.q_scale",
2341
+ "shape": [
2342
+ 344,
2343
+ 4096
2344
+ ],
2345
+ "dtype": "float16",
2346
+ "format": "f32-to-bf16",
2347
+ "nbytes": 2818048,
2348
+ "byteOffset": 18219008
2349
+ },
2350
+ {
2351
+ "name": "model.layers.16.input_layernorm.weight",
2352
+ "shape": [
2353
+ 4096
2354
+ ],
2355
+ "dtype": "float16",
2356
+ "format": "f32-to-bf16",
2357
+ "nbytes": 8192,
2358
+ "byteOffset": 21037056
2359
+ },
2360
+ {
2361
+ "name": "model.layers.16.post_attention_layernorm.weight",
2362
+ "shape": [
2363
+ 4096
2364
+ ],
2365
+ "dtype": "float16",
2366
+ "format": "f32-to-bf16",
2367
+ "nbytes": 8192,
2368
+ "byteOffset": 21045248
2369
+ },
2370
+ {
2371
+ "name": "model.norm.weight",
2372
+ "shape": [
2373
+ 4096
2374
+ ],
2375
+ "dtype": "float16",
2376
+ "format": "f32-to-bf16",
2377
+ "nbytes": 8192,
2378
+ "byteOffset": 21053440
2379
+ },
2380
+ {
2381
+ "name": "lm_head.q_scale",
2382
+ "shape": [
2383
+ 128,
2384
+ 32000
2385
+ ],
2386
+ "dtype": "float16",
2387
+ "format": "f32-to-bf16",
2388
+ "nbytes": 8192000,
2389
+ "byteOffset": 21061632
2390
+ }
2391
+ ],
2392
+ "md5sum": "72e2e78ee16f32e8e8d93f88aeede5e7"
2393
+ }
2394
+ ]
2395
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48c349a2f31f2994dd70b2150b743bccdfeb916a0e85a58bb4028cda8eca343a
3
+ size 65536000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0874c138dc8121ce103729c57e8d48f46630ccd07be3534e5e7e1bf693d8acc9
3
+ size 33357824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f49b02bcdf4a062add4575111429e4e63f6ffa4aa802c394d568aad0a03fa43
3
+ size 22544384
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a16cf48a03a21edb8e20c5643b8cd071e5dd3857f5e477caa50ef31a38bb56b
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cc005bb5813b7057f60fe89c43b4f42b6fedc82fa6047d6df12af7fef0850ec
3
+ size 32587776
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0adc6404921801dddb0978e9c6c2e9c2a10641a49de1f0e5a82e8cbb772a7c7b
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f35392825bbf52a0be87cb3c8df450bfb0b311cd3f9331cd1a72541e05b56209
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6f9815ab5c2d0bc1c65f513e238f4ecdabaa7d734ddcc44e6ae5fddc7f85329
3
+ size 32063488
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48513895c297a169a50179fc59a3b49e0e623d19ba33533fcfecf94e7ad95ef2
3
+ size 45088768
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41abdb12605a84e1b4631f4f81d4ed0a8c9023e1163228ce9912488de0920ea9
3
+ size 22544384
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1719f4ae6ff47260e25e69d00f02124806b84fd824c9db9b83209c9d67b4a9be
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef640d0d15872fa72d7dcb005859e20cbf676576b6d9088075a1e9f046d55bfb
3
+ size 32587776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde1812672b4eaec8100cebec897c070ef0b15d0f94db7af7e059bc3f05cbd6e
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:707df07ad47871c0cca9a12f716fbab9315476307a77768b6c6eac4d607082ac
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fcb5c85f7166767a38a5745e262f4d1ba319163e0524ee26cf799c2a6713d35
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfbb0e95a0f90104bbe4deabbfef910cd69e265efb9e0604886074d9755b1c2d
3
+ size 32063488
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10b728472b70399d5cbed0dd565f3dbe34d49e903743a2ec4b292031f227e91
3
+ size 45088768
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0371cca744cc29610048b8a50461cf578258c1605d7630ddb0b1c5a0638bbbc
3
+ size 22544384
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f81dffd1e3b513c425eb145a23e9fcca89254dd2633ff1095a7014df20162ca9
3
+ size 25165824
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0eb9916c829c96a156f6e9e381f170f1aa08623c3c54ce01c77edd92044f9a69
3
+ size 32587776
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dc5a00194f6dec00e33907725a67f35298451808c68e0809904e9e5a50f78e2
3
+ size 45088768
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:345ed26ddd22506a4e211a0b1b81679f2a3d0d741b60a5c0f91129135b22e4f6
3
+ size 25165824
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e332c2c5ca1706e06f759cd26af71ecd9d8e41c5b85770aedf248e2aabb68ed
3
+ size 32063488
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e94f1fdda9d8502e036b5678f484d1183dc5760715890fc32559d6c630c199f6
3
+ size 22544384
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1f05f0dae53d501d5bb8595ab838ff616107f64515a5dfdb423d65bfbe87fc6
3
+ size 45088768
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c60d76e945f47fbbf24a79c888f8b225823298791333df7f22cd3868cbd358
3
+ size 22544384
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a038fa7e241be1986e46b13892a89877f3b7b5827cfbd7661bdc74b590d9e068
3
+ size 25165824
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690403152f82fc0a8d5de18cb8b997c602f60a70fe1c219ff38f2dc7244bbfda
3
+ size 32587776
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3caebf49b840dd697a6fc6a94ea0f27af1916752423d0ee881688bc18144a7
3
+ size 45088768
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5f91aebe5b4b3ae1be181d69e59f95f0a31f0a0196756b261e39be7a1de6a20
3
+ size 25165824
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f160eb97820bbc553831bb247b0c247dd2045cb244c8b4d26ee41b938783baec
3
+ size 32063488
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60bf4cf7aa4659df1d5561d17b9c18f518d8788fa01a8367c9dc8b0df9138aa2
3
+ size 45088768
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27c3d253d29713c3d3597a3bcebe5a0dbe1cbb6a1f7eadb08ea73d67a6afb849
3
+ size 22544384
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fecd0a967ecb070203836e3d64c0a682fe8b6949f5fe620d012a598402720c79
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:906629331c892f2197e40849e9235ce832b8e84a8355179b0fc8680911b95255
3
+ size 25165824
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc8e665ec6da8a4e7aa75ce4010bb8b68ae8698958f3f8bedf5f13cec64649b
3
+ size 32587776
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d0bc4668e6128ba93b56189f20a01eb72362f42018b2dd9a6ab169c0ecad14f
3
+ size 45088768
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75cec6d848f6a8cde7d3df79807f564722941b955baa2ff406d2120bc8a0792
3
+ size 25165824
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a3ef307c514d6b043228915435672a9c837b9cd782b21ab47af09eef87a6d12
3
+ size 32063488
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7df71fedf79928e5ae5b169db4523cae745ab635d40b13be40f507666c0ba315
3
+ size 45088768
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3f4b6cd4132110397c085894a22febfb379744a7cc557eb0a8e570ae7e5e2bc
3
+ size 22544384
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a887c5a4c9afd5987d61c7570a88fdc2341be8408ef72e07d93ebb220d60e0
3
+ size 25165824
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:635952fa100c599eb0980a1688fcc0cb325895a7c04c53e25fe56a261bc1dc2f
3
+ size 32587776
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb39e48a451902b9fb3c10fc1e255cfe517374edd9ec17b87b8581c1a63285c2
3
+ size 45088768
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd52ee70db4d44eda79c7f8fdf443de6f0445a26b9d6c45b4f660404f093783f
3
+ size 25165824
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69babedc29632f9c73ab78ddb31b8e412606fd2f7bfeeefaa509e9cb29c6f81e
3
+ size 32587776
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756f8970d4640bcbe73f3e5c4c77b36e6b1022e364476515181c88aa77bdba41
3
+ size 32063488
params_shard_51.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9884c8896ce1359d3d6f93adb1ba1533ea32fc0eebd3e7aca0e1ea027e694df6
3
+ size 45088768