geonmin-kim commited on
Commit
d433ee4
·
verified ·
1 Parent(s): c3831df

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
mlc-chat-config.json ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "qwen2",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_act": "silu",
7
+ "hidden_size": 1536,
8
+ "intermediate_size": 8960,
9
+ "num_attention_heads": 12,
10
+ "num_hidden_layers": 28,
11
+ "num_key_value_heads": 2,
12
+ "rms_norm_eps": 1e-06,
13
+ "rope_theta": 1000000.0,
14
+ "vocab_size": 151936,
15
+ "tie_word_embeddings": true,
16
+ "context_window_size": 768,
17
+ "prefill_chunk_size": 768,
18
+ "tensor_parallel_shards": 1,
19
+ "head_dim": 128,
20
+ "dtype": "float32",
21
+ "max_batch_size": 80
22
+ },
23
+ "vocab_size": 151936,
24
+ "context_window_size": 768,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 768,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "pipeline_parallel_stages": 1,
30
+ "temperature": 1.0,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.0,
34
+ "top_p": 1.0,
35
+ "tokenizer_files": [
36
+ "tokenizer.json",
37
+ "vocab.json",
38
+ "merges.txt",
39
+ "tokenizer_config.json"
40
+ ],
41
+ "tokenizer_info": {
42
+ "token_postproc_method": "byte_level",
43
+ "prepend_space_in_encode": false,
44
+ "strip_space_in_decode": false
45
+ },
46
+ "conv_template": {
47
+ "name": "qwen2",
48
+ "system_template": "<|im_start|>system\n{system_message}<|im_end|>\n",
49
+ "system_message": "You are a helpful assistant.",
50
+ "system_prefix_token_ids": null,
51
+ "add_role_after_system_message": true,
52
+ "roles": {
53
+ "user": "<|im_start|>user",
54
+ "assistant": "<|im_start|>assistant"
55
+ },
56
+ "role_templates": {
57
+ "user": "{user_message}",
58
+ "assistant": "{assistant_message}",
59
+ "tool": "{tool_message}"
60
+ },
61
+ "messages": [],
62
+ "seps": [
63
+ "<|im_end|>\n"
64
+ ],
65
+ "role_content_sep": "\n",
66
+ "role_empty_sep": "\n",
67
+ "stop_str": [
68
+ "<|endoftext|>",
69
+ "<|im_end|>"
70
+ ],
71
+ "stop_token_ids": [
72
+ 151643,
73
+ 151645
74
+ ],
75
+ "function_string": "",
76
+ "use_function_calling": false
77
+ },
78
+ "pad_token_id": 0,
79
+ "bos_token_id": 151643,
80
+ "eos_token_id": 151643
81
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2638 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 198,
4
+ "ParamBytes": 3087428608.0,
5
+ "BitsPerParam": 13.89882147873375
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 466747392,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 151936,
17
+ 1536
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 466747392,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "fac6ce8ba0d8ae438d10b5bb546c7cbd"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 55050240,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
34
+ "shape": [
35
+ 17920,
36
+ 1536
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 55050240,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "b0c672ae0f591da2d49f4dce650e72ae"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 27525120,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.mlp.down_proj.weight",
53
+ "shape": [
54
+ 1536,
55
+ 8960
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 27525120,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "db1e935ff9f5b6ff5da80f8b6d1628ff"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 55050240,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
72
+ "shape": [
73
+ 17920,
74
+ 1536
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 55050240,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "baee6aadc8be0f23324126937a4cf15b"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 27525120,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.1.mlp.down_proj.weight",
91
+ "shape": [
92
+ 1536,
93
+ 8960
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 27525120,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "9581d907a6edac4b6bc1c357d0d2343f"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 55050240,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
110
+ "shape": [
111
+ 17920,
112
+ 1536
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 55050240,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "a8b915076ac9a72930fb6871e62d46ef"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 27525120,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.2.mlp.down_proj.weight",
129
+ "shape": [
130
+ 1536,
131
+ 8960
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 27525120,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "972150741a85989c34825d9c970f6741"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 33060864,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.0.self_attn.c_attn.weight",
148
+ "shape": [
149
+ 2048,
150
+ 1536
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 6291456,
155
+ "byteOffset": 0
156
+ },
157
+ {
158
+ "name": "model.layers.0.self_attn.c_attn.bias",
159
+ "shape": [
160
+ 2048
161
+ ],
162
+ "dtype": "float16",
163
+ "format": "f32-to-bf16",
164
+ "nbytes": 4096,
165
+ "byteOffset": 6291456
166
+ },
167
+ {
168
+ "name": "model.layers.0.self_attn.o_proj.weight",
169
+ "shape": [
170
+ 1536,
171
+ 1536
172
+ ],
173
+ "dtype": "float16",
174
+ "format": "f32-to-bf16",
175
+ "nbytes": 4718592,
176
+ "byteOffset": 6295552
177
+ },
178
+ {
179
+ "name": "model.layers.0.input_layernorm.weight",
180
+ "shape": [
181
+ 1536
182
+ ],
183
+ "dtype": "float16",
184
+ "format": "f32-to-bf16",
185
+ "nbytes": 3072,
186
+ "byteOffset": 11014144
187
+ },
188
+ {
189
+ "name": "model.layers.0.post_attention_layernorm.weight",
190
+ "shape": [
191
+ 1536
192
+ ],
193
+ "dtype": "float16",
194
+ "format": "f32-to-bf16",
195
+ "nbytes": 3072,
196
+ "byteOffset": 11017216
197
+ },
198
+ {
199
+ "name": "model.layers.1.self_attn.c_attn.weight",
200
+ "shape": [
201
+ 2048,
202
+ 1536
203
+ ],
204
+ "dtype": "float16",
205
+ "format": "f32-to-bf16",
206
+ "nbytes": 6291456,
207
+ "byteOffset": 11020288
208
+ },
209
+ {
210
+ "name": "model.layers.1.self_attn.c_attn.bias",
211
+ "shape": [
212
+ 2048
213
+ ],
214
+ "dtype": "float16",
215
+ "format": "f32-to-bf16",
216
+ "nbytes": 4096,
217
+ "byteOffset": 17311744
218
+ },
219
+ {
220
+ "name": "model.layers.1.self_attn.o_proj.weight",
221
+ "shape": [
222
+ 1536,
223
+ 1536
224
+ ],
225
+ "dtype": "float16",
226
+ "format": "f32-to-bf16",
227
+ "nbytes": 4718592,
228
+ "byteOffset": 17315840
229
+ },
230
+ {
231
+ "name": "model.layers.1.input_layernorm.weight",
232
+ "shape": [
233
+ 1536
234
+ ],
235
+ "dtype": "float16",
236
+ "format": "f32-to-bf16",
237
+ "nbytes": 3072,
238
+ "byteOffset": 22034432
239
+ },
240
+ {
241
+ "name": "model.layers.1.post_attention_layernorm.weight",
242
+ "shape": [
243
+ 1536
244
+ ],
245
+ "dtype": "float16",
246
+ "format": "f32-to-bf16",
247
+ "nbytes": 3072,
248
+ "byteOffset": 22037504
249
+ },
250
+ {
251
+ "name": "model.layers.2.self_attn.c_attn.weight",
252
+ "shape": [
253
+ 2048,
254
+ 1536
255
+ ],
256
+ "dtype": "float16",
257
+ "format": "f32-to-bf16",
258
+ "nbytes": 6291456,
259
+ "byteOffset": 22040576
260
+ },
261
+ {
262
+ "name": "model.layers.2.self_attn.c_attn.bias",
263
+ "shape": [
264
+ 2048
265
+ ],
266
+ "dtype": "float16",
267
+ "format": "f32-to-bf16",
268
+ "nbytes": 4096,
269
+ "byteOffset": 28332032
270
+ },
271
+ {
272
+ "name": "model.layers.2.self_attn.o_proj.weight",
273
+ "shape": [
274
+ 1536,
275
+ 1536
276
+ ],
277
+ "dtype": "float16",
278
+ "format": "f32-to-bf16",
279
+ "nbytes": 4718592,
280
+ "byteOffset": 28336128
281
+ },
282
+ {
283
+ "name": "model.layers.2.input_layernorm.weight",
284
+ "shape": [
285
+ 1536
286
+ ],
287
+ "dtype": "float16",
288
+ "format": "f32-to-bf16",
289
+ "nbytes": 3072,
290
+ "byteOffset": 33054720
291
+ },
292
+ {
293
+ "name": "model.layers.2.post_attention_layernorm.weight",
294
+ "shape": [
295
+ 1536
296
+ ],
297
+ "dtype": "float16",
298
+ "format": "f32-to-bf16",
299
+ "nbytes": 3072,
300
+ "byteOffset": 33057792
301
+ }
302
+ ],
303
+ "md5sum": "cc217b23db4df89fcfed2ffa54ee328a"
304
+ },
305
+ {
306
+ "dataPath": "params_shard_8.bin",
307
+ "format": "raw-shard",
308
+ "nbytes": 55050240,
309
+ "records": [
310
+ {
311
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
312
+ "shape": [
313
+ 17920,
314
+ 1536
315
+ ],
316
+ "dtype": "float16",
317
+ "format": "f32-to-bf16",
318
+ "nbytes": 55050240,
319
+ "byteOffset": 0
320
+ }
321
+ ],
322
+ "md5sum": "969f8fa80cdf5b272c2e607a2119c4e5"
323
+ },
324
+ {
325
+ "dataPath": "params_shard_9.bin",
326
+ "format": "raw-shard",
327
+ "nbytes": 27525120,
328
+ "records": [
329
+ {
330
+ "name": "model.layers.3.mlp.down_proj.weight",
331
+ "shape": [
332
+ 1536,
333
+ 8960
334
+ ],
335
+ "dtype": "float16",
336
+ "format": "f32-to-bf16",
337
+ "nbytes": 27525120,
338
+ "byteOffset": 0
339
+ }
340
+ ],
341
+ "md5sum": "d860175d6e9380a30cfbe06262104ec7"
342
+ },
343
+ {
344
+ "dataPath": "params_shard_10.bin",
345
+ "format": "raw-shard",
346
+ "nbytes": 55050240,
347
+ "records": [
348
+ {
349
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
350
+ "shape": [
351
+ 17920,
352
+ 1536
353
+ ],
354
+ "dtype": "float16",
355
+ "format": "f32-to-bf16",
356
+ "nbytes": 55050240,
357
+ "byteOffset": 0
358
+ }
359
+ ],
360
+ "md5sum": "3f5dad1ba616506c4e5084bb18bbf1d6"
361
+ },
362
+ {
363
+ "dataPath": "params_shard_11.bin",
364
+ "format": "raw-shard",
365
+ "nbytes": 27525120,
366
+ "records": [
367
+ {
368
+ "name": "model.layers.4.mlp.down_proj.weight",
369
+ "shape": [
370
+ 1536,
371
+ 8960
372
+ ],
373
+ "dtype": "float16",
374
+ "format": "f32-to-bf16",
375
+ "nbytes": 27525120,
376
+ "byteOffset": 0
377
+ }
378
+ ],
379
+ "md5sum": "b993af8733b274d5e5aaacdc6462f74f"
380
+ },
381
+ {
382
+ "dataPath": "params_shard_12.bin",
383
+ "format": "raw-shard",
384
+ "nbytes": 55050240,
385
+ "records": [
386
+ {
387
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
388
+ "shape": [
389
+ 17920,
390
+ 1536
391
+ ],
392
+ "dtype": "float16",
393
+ "format": "f32-to-bf16",
394
+ "nbytes": 55050240,
395
+ "byteOffset": 0
396
+ }
397
+ ],
398
+ "md5sum": "2877a35f91a04b1c58f6a824e8d61e87"
399
+ },
400
+ {
401
+ "dataPath": "params_shard_13.bin",
402
+ "format": "raw-shard",
403
+ "nbytes": 27525120,
404
+ "records": [
405
+ {
406
+ "name": "model.layers.5.mlp.down_proj.weight",
407
+ "shape": [
408
+ 1536,
409
+ 8960
410
+ ],
411
+ "dtype": "float16",
412
+ "format": "f32-to-bf16",
413
+ "nbytes": 27525120,
414
+ "byteOffset": 0
415
+ }
416
+ ],
417
+ "md5sum": "8bad654a3c485e170a6f0e5ddab81db0"
418
+ },
419
+ {
420
+ "dataPath": "params_shard_14.bin",
421
+ "format": "raw-shard",
422
+ "nbytes": 33060864,
423
+ "records": [
424
+ {
425
+ "name": "model.layers.3.self_attn.c_attn.weight",
426
+ "shape": [
427
+ 2048,
428
+ 1536
429
+ ],
430
+ "dtype": "float16",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 6291456,
433
+ "byteOffset": 0
434
+ },
435
+ {
436
+ "name": "model.layers.3.self_attn.c_attn.bias",
437
+ "shape": [
438
+ 2048
439
+ ],
440
+ "dtype": "float16",
441
+ "format": "f32-to-bf16",
442
+ "nbytes": 4096,
443
+ "byteOffset": 6291456
444
+ },
445
+ {
446
+ "name": "model.layers.3.self_attn.o_proj.weight",
447
+ "shape": [
448
+ 1536,
449
+ 1536
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 4718592,
454
+ "byteOffset": 6295552
455
+ },
456
+ {
457
+ "name": "model.layers.3.input_layernorm.weight",
458
+ "shape": [
459
+ 1536
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 3072,
464
+ "byteOffset": 11014144
465
+ },
466
+ {
467
+ "name": "model.layers.3.post_attention_layernorm.weight",
468
+ "shape": [
469
+ 1536
470
+ ],
471
+ "dtype": "float16",
472
+ "format": "f32-to-bf16",
473
+ "nbytes": 3072,
474
+ "byteOffset": 11017216
475
+ },
476
+ {
477
+ "name": "model.layers.4.self_attn.c_attn.weight",
478
+ "shape": [
479
+ 2048,
480
+ 1536
481
+ ],
482
+ "dtype": "float16",
483
+ "format": "f32-to-bf16",
484
+ "nbytes": 6291456,
485
+ "byteOffset": 11020288
486
+ },
487
+ {
488
+ "name": "model.layers.4.self_attn.c_attn.bias",
489
+ "shape": [
490
+ 2048
491
+ ],
492
+ "dtype": "float16",
493
+ "format": "f32-to-bf16",
494
+ "nbytes": 4096,
495
+ "byteOffset": 17311744
496
+ },
497
+ {
498
+ "name": "model.layers.4.self_attn.o_proj.weight",
499
+ "shape": [
500
+ 1536,
501
+ 1536
502
+ ],
503
+ "dtype": "float16",
504
+ "format": "f32-to-bf16",
505
+ "nbytes": 4718592,
506
+ "byteOffset": 17315840
507
+ },
508
+ {
509
+ "name": "model.layers.4.input_layernorm.weight",
510
+ "shape": [
511
+ 1536
512
+ ],
513
+ "dtype": "float16",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 3072,
516
+ "byteOffset": 22034432
517
+ },
518
+ {
519
+ "name": "model.layers.4.post_attention_layernorm.weight",
520
+ "shape": [
521
+ 1536
522
+ ],
523
+ "dtype": "float16",
524
+ "format": "f32-to-bf16",
525
+ "nbytes": 3072,
526
+ "byteOffset": 22037504
527
+ },
528
+ {
529
+ "name": "model.layers.5.self_attn.c_attn.weight",
530
+ "shape": [
531
+ 2048,
532
+ 1536
533
+ ],
534
+ "dtype": "float16",
535
+ "format": "f32-to-bf16",
536
+ "nbytes": 6291456,
537
+ "byteOffset": 22040576
538
+ },
539
+ {
540
+ "name": "model.layers.5.self_attn.c_attn.bias",
541
+ "shape": [
542
+ 2048
543
+ ],
544
+ "dtype": "float16",
545
+ "format": "f32-to-bf16",
546
+ "nbytes": 4096,
547
+ "byteOffset": 28332032
548
+ },
549
+ {
550
+ "name": "model.layers.5.self_attn.o_proj.weight",
551
+ "shape": [
552
+ 1536,
553
+ 1536
554
+ ],
555
+ "dtype": "float16",
556
+ "format": "f32-to-bf16",
557
+ "nbytes": 4718592,
558
+ "byteOffset": 28336128
559
+ },
560
+ {
561
+ "name": "model.layers.5.input_layernorm.weight",
562
+ "shape": [
563
+ 1536
564
+ ],
565
+ "dtype": "float16",
566
+ "format": "f32-to-bf16",
567
+ "nbytes": 3072,
568
+ "byteOffset": 33054720
569
+ },
570
+ {
571
+ "name": "model.layers.5.post_attention_layernorm.weight",
572
+ "shape": [
573
+ 1536
574
+ ],
575
+ "dtype": "float16",
576
+ "format": "f32-to-bf16",
577
+ "nbytes": 3072,
578
+ "byteOffset": 33057792
579
+ }
580
+ ],
581
+ "md5sum": "9a67ac0bf8178e4054e781eb27f403ea"
582
+ },
583
+ {
584
+ "dataPath": "params_shard_15.bin",
585
+ "format": "raw-shard",
586
+ "nbytes": 55050240,
587
+ "records": [
588
+ {
589
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
590
+ "shape": [
591
+ 17920,
592
+ 1536
593
+ ],
594
+ "dtype": "float16",
595
+ "format": "f32-to-bf16",
596
+ "nbytes": 55050240,
597
+ "byteOffset": 0
598
+ }
599
+ ],
600
+ "md5sum": "ccd5502649f7168e48df2ccbaeeb1d9b"
601
+ },
602
+ {
603
+ "dataPath": "params_shard_16.bin",
604
+ "format": "raw-shard",
605
+ "nbytes": 27525120,
606
+ "records": [
607
+ {
608
+ "name": "model.layers.6.mlp.down_proj.weight",
609
+ "shape": [
610
+ 1536,
611
+ 8960
612
+ ],
613
+ "dtype": "float16",
614
+ "format": "f32-to-bf16",
615
+ "nbytes": 27525120,
616
+ "byteOffset": 0
617
+ }
618
+ ],
619
+ "md5sum": "271cf1aad384c2d628d1aaf62a140a9c"
620
+ },
621
+ {
622
+ "dataPath": "params_shard_17.bin",
623
+ "format": "raw-shard",
624
+ "nbytes": 55050240,
625
+ "records": [
626
+ {
627
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
628
+ "shape": [
629
+ 17920,
630
+ 1536
631
+ ],
632
+ "dtype": "float16",
633
+ "format": "f32-to-bf16",
634
+ "nbytes": 55050240,
635
+ "byteOffset": 0
636
+ }
637
+ ],
638
+ "md5sum": "901c1912a0418111b928559f750e0d95"
639
+ },
640
+ {
641
+ "dataPath": "params_shard_18.bin",
642
+ "format": "raw-shard",
643
+ "nbytes": 27525120,
644
+ "records": [
645
+ {
646
+ "name": "model.layers.7.mlp.down_proj.weight",
647
+ "shape": [
648
+ 1536,
649
+ 8960
650
+ ],
651
+ "dtype": "float16",
652
+ "format": "f32-to-bf16",
653
+ "nbytes": 27525120,
654
+ "byteOffset": 0
655
+ }
656
+ ],
657
+ "md5sum": "379946453685b3ee0b55935c9538eecd"
658
+ },
659
+ {
660
+ "dataPath": "params_shard_19.bin",
661
+ "format": "raw-shard",
662
+ "nbytes": 55050240,
663
+ "records": [
664
+ {
665
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
666
+ "shape": [
667
+ 17920,
668
+ 1536
669
+ ],
670
+ "dtype": "float16",
671
+ "format": "f32-to-bf16",
672
+ "nbytes": 55050240,
673
+ "byteOffset": 0
674
+ }
675
+ ],
676
+ "md5sum": "56fa2871d8315eef920e5df9970ea9fe"
677
+ },
678
+ {
679
+ "dataPath": "params_shard_20.bin",
680
+ "format": "raw-shard",
681
+ "nbytes": 27525120,
682
+ "records": [
683
+ {
684
+ "name": "model.layers.8.mlp.down_proj.weight",
685
+ "shape": [
686
+ 1536,
687
+ 8960
688
+ ],
689
+ "dtype": "float16",
690
+ "format": "f32-to-bf16",
691
+ "nbytes": 27525120,
692
+ "byteOffset": 0
693
+ }
694
+ ],
695
+ "md5sum": "817b9f5a7e095c5f05876994890301b4"
696
+ },
697
+ {
698
+ "dataPath": "params_shard_21.bin",
699
+ "format": "raw-shard",
700
+ "nbytes": 33060864,
701
+ "records": [
702
+ {
703
+ "name": "model.layers.6.self_attn.c_attn.weight",
704
+ "shape": [
705
+ 2048,
706
+ 1536
707
+ ],
708
+ "dtype": "float16",
709
+ "format": "f32-to-bf16",
710
+ "nbytes": 6291456,
711
+ "byteOffset": 0
712
+ },
713
+ {
714
+ "name": "model.layers.6.self_attn.c_attn.bias",
715
+ "shape": [
716
+ 2048
717
+ ],
718
+ "dtype": "float16",
719
+ "format": "f32-to-bf16",
720
+ "nbytes": 4096,
721
+ "byteOffset": 6291456
722
+ },
723
+ {
724
+ "name": "model.layers.6.self_attn.o_proj.weight",
725
+ "shape": [
726
+ 1536,
727
+ 1536
728
+ ],
729
+ "dtype": "float16",
730
+ "format": "f32-to-bf16",
731
+ "nbytes": 4718592,
732
+ "byteOffset": 6295552
733
+ },
734
+ {
735
+ "name": "model.layers.6.input_layernorm.weight",
736
+ "shape": [
737
+ 1536
738
+ ],
739
+ "dtype": "float16",
740
+ "format": "f32-to-bf16",
741
+ "nbytes": 3072,
742
+ "byteOffset": 11014144
743
+ },
744
+ {
745
+ "name": "model.layers.6.post_attention_layernorm.weight",
746
+ "shape": [
747
+ 1536
748
+ ],
749
+ "dtype": "float16",
750
+ "format": "f32-to-bf16",
751
+ "nbytes": 3072,
752
+ "byteOffset": 11017216
753
+ },
754
+ {
755
+ "name": "model.layers.7.self_attn.c_attn.weight",
756
+ "shape": [
757
+ 2048,
758
+ 1536
759
+ ],
760
+ "dtype": "float16",
761
+ "format": "f32-to-bf16",
762
+ "nbytes": 6291456,
763
+ "byteOffset": 11020288
764
+ },
765
+ {
766
+ "name": "model.layers.7.self_attn.c_attn.bias",
767
+ "shape": [
768
+ 2048
769
+ ],
770
+ "dtype": "float16",
771
+ "format": "f32-to-bf16",
772
+ "nbytes": 4096,
773
+ "byteOffset": 17311744
774
+ },
775
+ {
776
+ "name": "model.layers.7.self_attn.o_proj.weight",
777
+ "shape": [
778
+ 1536,
779
+ 1536
780
+ ],
781
+ "dtype": "float16",
782
+ "format": "f32-to-bf16",
783
+ "nbytes": 4718592,
784
+ "byteOffset": 17315840
785
+ },
786
+ {
787
+ "name": "model.layers.7.input_layernorm.weight",
788
+ "shape": [
789
+ 1536
790
+ ],
791
+ "dtype": "float16",
792
+ "format": "f32-to-bf16",
793
+ "nbytes": 3072,
794
+ "byteOffset": 22034432
795
+ },
796
+ {
797
+ "name": "model.layers.7.post_attention_layernorm.weight",
798
+ "shape": [
799
+ 1536
800
+ ],
801
+ "dtype": "float16",
802
+ "format": "f32-to-bf16",
803
+ "nbytes": 3072,
804
+ "byteOffset": 22037504
805
+ },
806
+ {
807
+ "name": "model.layers.8.self_attn.c_attn.weight",
808
+ "shape": [
809
+ 2048,
810
+ 1536
811
+ ],
812
+ "dtype": "float16",
813
+ "format": "f32-to-bf16",
814
+ "nbytes": 6291456,
815
+ "byteOffset": 22040576
816
+ },
817
+ {
818
+ "name": "model.layers.8.self_attn.c_attn.bias",
819
+ "shape": [
820
+ 2048
821
+ ],
822
+ "dtype": "float16",
823
+ "format": "f32-to-bf16",
824
+ "nbytes": 4096,
825
+ "byteOffset": 28332032
826
+ },
827
+ {
828
+ "name": "model.layers.8.self_attn.o_proj.weight",
829
+ "shape": [
830
+ 1536,
831
+ 1536
832
+ ],
833
+ "dtype": "float16",
834
+ "format": "f32-to-bf16",
835
+ "nbytes": 4718592,
836
+ "byteOffset": 28336128
837
+ },
838
+ {
839
+ "name": "model.layers.8.input_layernorm.weight",
840
+ "shape": [
841
+ 1536
842
+ ],
843
+ "dtype": "float16",
844
+ "format": "f32-to-bf16",
845
+ "nbytes": 3072,
846
+ "byteOffset": 33054720
847
+ },
848
+ {
849
+ "name": "model.layers.8.post_attention_layernorm.weight",
850
+ "shape": [
851
+ 1536
852
+ ],
853
+ "dtype": "float16",
854
+ "format": "f32-to-bf16",
855
+ "nbytes": 3072,
856
+ "byteOffset": 33057792
857
+ }
858
+ ],
859
+ "md5sum": "bcd5c8951311cb8debd10c7a9cbd2764"
860
+ },
861
+ {
862
+ "dataPath": "params_shard_22.bin",
863
+ "format": "raw-shard",
864
+ "nbytes": 55050240,
865
+ "records": [
866
+ {
867
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
868
+ "shape": [
869
+ 17920,
870
+ 1536
871
+ ],
872
+ "dtype": "float16",
873
+ "format": "f32-to-bf16",
874
+ "nbytes": 55050240,
875
+ "byteOffset": 0
876
+ }
877
+ ],
878
+ "md5sum": "faf5cbdd3c2ff11883ce9f8554bcf498"
879
+ },
880
+ {
881
+ "dataPath": "params_shard_23.bin",
882
+ "format": "raw-shard",
883
+ "nbytes": 27525120,
884
+ "records": [
885
+ {
886
+ "name": "model.layers.9.mlp.down_proj.weight",
887
+ "shape": [
888
+ 1536,
889
+ 8960
890
+ ],
891
+ "dtype": "float16",
892
+ "format": "f32-to-bf16",
893
+ "nbytes": 27525120,
894
+ "byteOffset": 0
895
+ }
896
+ ],
897
+ "md5sum": "17ae02bef8401386b5eb9f653b2f420a"
898
+ },
899
+ {
900
+ "dataPath": "params_shard_24.bin",
901
+ "format": "raw-shard",
902
+ "nbytes": 55050240,
903
+ "records": [
904
+ {
905
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
906
+ "shape": [
907
+ 17920,
908
+ 1536
909
+ ],
910
+ "dtype": "float16",
911
+ "format": "f32-to-bf16",
912
+ "nbytes": 55050240,
913
+ "byteOffset": 0
914
+ }
915
+ ],
916
+ "md5sum": "69af3fefd0ad33870219cab220d98c1b"
917
+ },
918
+ {
919
+ "dataPath": "params_shard_25.bin",
920
+ "format": "raw-shard",
921
+ "nbytes": 27525120,
922
+ "records": [
923
+ {
924
+ "name": "model.layers.10.mlp.down_proj.weight",
925
+ "shape": [
926
+ 1536,
927
+ 8960
928
+ ],
929
+ "dtype": "float16",
930
+ "format": "f32-to-bf16",
931
+ "nbytes": 27525120,
932
+ "byteOffset": 0
933
+ }
934
+ ],
935
+ "md5sum": "5862aaa3d0c9514ced28f0c391b70d8c"
936
+ },
937
+ {
938
+ "dataPath": "params_shard_26.bin",
939
+ "format": "raw-shard",
940
+ "nbytes": 55050240,
941
+ "records": [
942
+ {
943
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
944
+ "shape": [
945
+ 17920,
946
+ 1536
947
+ ],
948
+ "dtype": "float16",
949
+ "format": "f32-to-bf16",
950
+ "nbytes": 55050240,
951
+ "byteOffset": 0
952
+ }
953
+ ],
954
+ "md5sum": "221ad0e352c9a06fbd95fa8990829aac"
955
+ },
956
+ {
957
+ "dataPath": "params_shard_27.bin",
958
+ "format": "raw-shard",
959
+ "nbytes": 27525120,
960
+ "records": [
961
+ {
962
+ "name": "model.layers.11.mlp.down_proj.weight",
963
+ "shape": [
964
+ 1536,
965
+ 8960
966
+ ],
967
+ "dtype": "float16",
968
+ "format": "f32-to-bf16",
969
+ "nbytes": 27525120,
970
+ "byteOffset": 0
971
+ }
972
+ ],
973
+ "md5sum": "605ca260375f758e22d4265fffa356fc"
974
+ },
975
+ {
976
+ "dataPath": "params_shard_28.bin",
977
+ "format": "raw-shard",
978
+ "nbytes": 33060864,
979
+ "records": [
980
+ {
981
+ "name": "model.layers.9.self_attn.c_attn.weight",
982
+ "shape": [
983
+ 2048,
984
+ 1536
985
+ ],
986
+ "dtype": "float16",
987
+ "format": "f32-to-bf16",
988
+ "nbytes": 6291456,
989
+ "byteOffset": 0
990
+ },
991
+ {
992
+ "name": "model.layers.9.self_attn.c_attn.bias",
993
+ "shape": [
994
+ 2048
995
+ ],
996
+ "dtype": "float16",
997
+ "format": "f32-to-bf16",
998
+ "nbytes": 4096,
999
+ "byteOffset": 6291456
1000
+ },
1001
+ {
1002
+ "name": "model.layers.9.self_attn.o_proj.weight",
1003
+ "shape": [
1004
+ 1536,
1005
+ 1536
1006
+ ],
1007
+ "dtype": "float16",
1008
+ "format": "f32-to-bf16",
1009
+ "nbytes": 4718592,
1010
+ "byteOffset": 6295552
1011
+ },
1012
+ {
1013
+ "name": "model.layers.9.input_layernorm.weight",
1014
+ "shape": [
1015
+ 1536
1016
+ ],
1017
+ "dtype": "float16",
1018
+ "format": "f32-to-bf16",
1019
+ "nbytes": 3072,
1020
+ "byteOffset": 11014144
1021
+ },
1022
+ {
1023
+ "name": "model.layers.9.post_attention_layernorm.weight",
1024
+ "shape": [
1025
+ 1536
1026
+ ],
1027
+ "dtype": "float16",
1028
+ "format": "f32-to-bf16",
1029
+ "nbytes": 3072,
1030
+ "byteOffset": 11017216
1031
+ },
1032
+ {
1033
+ "name": "model.layers.10.self_attn.c_attn.weight",
1034
+ "shape": [
1035
+ 2048,
1036
+ 1536
1037
+ ],
1038
+ "dtype": "float16",
1039
+ "format": "f32-to-bf16",
1040
+ "nbytes": 6291456,
1041
+ "byteOffset": 11020288
1042
+ },
1043
+ {
1044
+ "name": "model.layers.10.self_attn.c_attn.bias",
1045
+ "shape": [
1046
+ 2048
1047
+ ],
1048
+ "dtype": "float16",
1049
+ "format": "f32-to-bf16",
1050
+ "nbytes": 4096,
1051
+ "byteOffset": 17311744
1052
+ },
1053
+ {
1054
+ "name": "model.layers.10.self_attn.o_proj.weight",
1055
+ "shape": [
1056
+ 1536,
1057
+ 1536
1058
+ ],
1059
+ "dtype": "float16",
1060
+ "format": "f32-to-bf16",
1061
+ "nbytes": 4718592,
1062
+ "byteOffset": 17315840
1063
+ },
1064
+ {
1065
+ "name": "model.layers.10.input_layernorm.weight",
1066
+ "shape": [
1067
+ 1536
1068
+ ],
1069
+ "dtype": "float16",
1070
+ "format": "f32-to-bf16",
1071
+ "nbytes": 3072,
1072
+ "byteOffset": 22034432
1073
+ },
1074
+ {
1075
+ "name": "model.layers.10.post_attention_layernorm.weight",
1076
+ "shape": [
1077
+ 1536
1078
+ ],
1079
+ "dtype": "float16",
1080
+ "format": "f32-to-bf16",
1081
+ "nbytes": 3072,
1082
+ "byteOffset": 22037504
1083
+ },
1084
+ {
1085
+ "name": "model.layers.11.self_attn.c_attn.weight",
1086
+ "shape": [
1087
+ 2048,
1088
+ 1536
1089
+ ],
1090
+ "dtype": "float16",
1091
+ "format": "f32-to-bf16",
1092
+ "nbytes": 6291456,
1093
+ "byteOffset": 22040576
1094
+ },
1095
+ {
1096
+ "name": "model.layers.11.self_attn.c_attn.bias",
1097
+ "shape": [
1098
+ 2048
1099
+ ],
1100
+ "dtype": "float16",
1101
+ "format": "f32-to-bf16",
1102
+ "nbytes": 4096,
1103
+ "byteOffset": 28332032
1104
+ },
1105
+ {
1106
+ "name": "model.layers.11.self_attn.o_proj.weight",
1107
+ "shape": [
1108
+ 1536,
1109
+ 1536
1110
+ ],
1111
+ "dtype": "float16",
1112
+ "format": "f32-to-bf16",
1113
+ "nbytes": 4718592,
1114
+ "byteOffset": 28336128
1115
+ },
1116
+ {
1117
+ "name": "model.layers.11.input_layernorm.weight",
1118
+ "shape": [
1119
+ 1536
1120
+ ],
1121
+ "dtype": "float16",
1122
+ "format": "f32-to-bf16",
1123
+ "nbytes": 3072,
1124
+ "byteOffset": 33054720
1125
+ },
1126
+ {
1127
+ "name": "model.layers.11.post_attention_layernorm.weight",
1128
+ "shape": [
1129
+ 1536
1130
+ ],
1131
+ "dtype": "float16",
1132
+ "format": "f32-to-bf16",
1133
+ "nbytes": 3072,
1134
+ "byteOffset": 33057792
1135
+ }
1136
+ ],
1137
+ "md5sum": "03e434a9483fb2aaa19f4cab3e4fb083"
1138
+ },
1139
+ {
1140
+ "dataPath": "params_shard_29.bin",
1141
+ "format": "raw-shard",
1142
+ "nbytes": 55050240,
1143
+ "records": [
1144
+ {
1145
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
1146
+ "shape": [
1147
+ 17920,
1148
+ 1536
1149
+ ],
1150
+ "dtype": "float16",
1151
+ "format": "f32-to-bf16",
1152
+ "nbytes": 55050240,
1153
+ "byteOffset": 0
1154
+ }
1155
+ ],
1156
+ "md5sum": "dc39bbf30e260c4170b068cc8553d7cd"
1157
+ },
1158
+ {
1159
+ "dataPath": "params_shard_30.bin",
1160
+ "format": "raw-shard",
1161
+ "nbytes": 27525120,
1162
+ "records": [
1163
+ {
1164
+ "name": "model.layers.12.mlp.down_proj.weight",
1165
+ "shape": [
1166
+ 1536,
1167
+ 8960
1168
+ ],
1169
+ "dtype": "float16",
1170
+ "format": "f32-to-bf16",
1171
+ "nbytes": 27525120,
1172
+ "byteOffset": 0
1173
+ }
1174
+ ],
1175
+ "md5sum": "d6ebf5371ced23357eb54793db3a4afb"
1176
+ },
1177
+ {
1178
+ "dataPath": "params_shard_31.bin",
1179
+ "format": "raw-shard",
1180
+ "nbytes": 55050240,
1181
+ "records": [
1182
+ {
1183
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
1184
+ "shape": [
1185
+ 17920,
1186
+ 1536
1187
+ ],
1188
+ "dtype": "float16",
1189
+ "format": "f32-to-bf16",
1190
+ "nbytes": 55050240,
1191
+ "byteOffset": 0
1192
+ }
1193
+ ],
1194
+ "md5sum": "b23a6222ce16baad47b61fe41f8660b0"
1195
+ },
1196
+ {
1197
+ "dataPath": "params_shard_32.bin",
1198
+ "format": "raw-shard",
1199
+ "nbytes": 27525120,
1200
+ "records": [
1201
+ {
1202
+ "name": "model.layers.13.mlp.down_proj.weight",
1203
+ "shape": [
1204
+ 1536,
1205
+ 8960
1206
+ ],
1207
+ "dtype": "float16",
1208
+ "format": "f32-to-bf16",
1209
+ "nbytes": 27525120,
1210
+ "byteOffset": 0
1211
+ }
1212
+ ],
1213
+ "md5sum": "f579126381720332c477dfeb50701e20"
1214
+ },
1215
+ {
1216
+ "dataPath": "params_shard_33.bin",
1217
+ "format": "raw-shard",
1218
+ "nbytes": 55050240,
1219
+ "records": [
1220
+ {
1221
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
1222
+ "shape": [
1223
+ 17920,
1224
+ 1536
1225
+ ],
1226
+ "dtype": "float16",
1227
+ "format": "f32-to-bf16",
1228
+ "nbytes": 55050240,
1229
+ "byteOffset": 0
1230
+ }
1231
+ ],
1232
+ "md5sum": "44b78e073ac8337e052b9744380ec3c7"
1233
+ },
1234
+ {
1235
+ "dataPath": "params_shard_34.bin",
1236
+ "format": "raw-shard",
1237
+ "nbytes": 27525120,
1238
+ "records": [
1239
+ {
1240
+ "name": "model.layers.14.mlp.down_proj.weight",
1241
+ "shape": [
1242
+ 1536,
1243
+ 8960
1244
+ ],
1245
+ "dtype": "float16",
1246
+ "format": "f32-to-bf16",
1247
+ "nbytes": 27525120,
1248
+ "byteOffset": 0
1249
+ }
1250
+ ],
1251
+ "md5sum": "d205aa713ee99dee09b571cfedc7cefd"
1252
+ },
1253
+ {
1254
+ "dataPath": "params_shard_35.bin",
1255
+ "format": "raw-shard",
1256
+ "nbytes": 33060864,
1257
+ "records": [
1258
+ {
1259
+ "name": "model.layers.12.self_attn.c_attn.weight",
1260
+ "shape": [
1261
+ 2048,
1262
+ 1536
1263
+ ],
1264
+ "dtype": "float16",
1265
+ "format": "f32-to-bf16",
1266
+ "nbytes": 6291456,
1267
+ "byteOffset": 0
1268
+ },
1269
+ {
1270
+ "name": "model.layers.12.self_attn.c_attn.bias",
1271
+ "shape": [
1272
+ 2048
1273
+ ],
1274
+ "dtype": "float16",
1275
+ "format": "f32-to-bf16",
1276
+ "nbytes": 4096,
1277
+ "byteOffset": 6291456
1278
+ },
1279
+ {
1280
+ "name": "model.layers.12.self_attn.o_proj.weight",
1281
+ "shape": [
1282
+ 1536,
1283
+ 1536
1284
+ ],
1285
+ "dtype": "float16",
1286
+ "format": "f32-to-bf16",
1287
+ "nbytes": 4718592,
1288
+ "byteOffset": 6295552
1289
+ },
1290
+ {
1291
+ "name": "model.layers.12.input_layernorm.weight",
1292
+ "shape": [
1293
+ 1536
1294
+ ],
1295
+ "dtype": "float16",
1296
+ "format": "f32-to-bf16",
1297
+ "nbytes": 3072,
1298
+ "byteOffset": 11014144
1299
+ },
1300
+ {
1301
+ "name": "model.layers.12.post_attention_layernorm.weight",
1302
+ "shape": [
1303
+ 1536
1304
+ ],
1305
+ "dtype": "float16",
1306
+ "format": "f32-to-bf16",
1307
+ "nbytes": 3072,
1308
+ "byteOffset": 11017216
1309
+ },
1310
+ {
1311
+ "name": "model.layers.13.self_attn.c_attn.weight",
1312
+ "shape": [
1313
+ 2048,
1314
+ 1536
1315
+ ],
1316
+ "dtype": "float16",
1317
+ "format": "f32-to-bf16",
1318
+ "nbytes": 6291456,
1319
+ "byteOffset": 11020288
1320
+ },
1321
+ {
1322
+ "name": "model.layers.13.self_attn.c_attn.bias",
1323
+ "shape": [
1324
+ 2048
1325
+ ],
1326
+ "dtype": "float16",
1327
+ "format": "f32-to-bf16",
1328
+ "nbytes": 4096,
1329
+ "byteOffset": 17311744
1330
+ },
1331
+ {
1332
+ "name": "model.layers.13.self_attn.o_proj.weight",
1333
+ "shape": [
1334
+ 1536,
1335
+ 1536
1336
+ ],
1337
+ "dtype": "float16",
1338
+ "format": "f32-to-bf16",
1339
+ "nbytes": 4718592,
1340
+ "byteOffset": 17315840
1341
+ },
1342
+ {
1343
+ "name": "model.layers.13.input_layernorm.weight",
1344
+ "shape": [
1345
+ 1536
1346
+ ],
1347
+ "dtype": "float16",
1348
+ "format": "f32-to-bf16",
1349
+ "nbytes": 3072,
1350
+ "byteOffset": 22034432
1351
+ },
1352
+ {
1353
+ "name": "model.layers.13.post_attention_layernorm.weight",
1354
+ "shape": [
1355
+ 1536
1356
+ ],
1357
+ "dtype": "float16",
1358
+ "format": "f32-to-bf16",
1359
+ "nbytes": 3072,
1360
+ "byteOffset": 22037504
1361
+ },
1362
+ {
1363
+ "name": "model.layers.14.self_attn.c_attn.weight",
1364
+ "shape": [
1365
+ 2048,
1366
+ 1536
1367
+ ],
1368
+ "dtype": "float16",
1369
+ "format": "f32-to-bf16",
1370
+ "nbytes": 6291456,
1371
+ "byteOffset": 22040576
1372
+ },
1373
+ {
1374
+ "name": "model.layers.14.self_attn.c_attn.bias",
1375
+ "shape": [
1376
+ 2048
1377
+ ],
1378
+ "dtype": "float16",
1379
+ "format": "f32-to-bf16",
1380
+ "nbytes": 4096,
1381
+ "byteOffset": 28332032
1382
+ },
1383
+ {
1384
+ "name": "model.layers.14.self_attn.o_proj.weight",
1385
+ "shape": [
1386
+ 1536,
1387
+ 1536
1388
+ ],
1389
+ "dtype": "float16",
1390
+ "format": "f32-to-bf16",
1391
+ "nbytes": 4718592,
1392
+ "byteOffset": 28336128
1393
+ },
1394
+ {
1395
+ "name": "model.layers.14.input_layernorm.weight",
1396
+ "shape": [
1397
+ 1536
1398
+ ],
1399
+ "dtype": "float16",
1400
+ "format": "f32-to-bf16",
1401
+ "nbytes": 3072,
1402
+ "byteOffset": 33054720
1403
+ },
1404
+ {
1405
+ "name": "model.layers.14.post_attention_layernorm.weight",
1406
+ "shape": [
1407
+ 1536
1408
+ ],
1409
+ "dtype": "float16",
1410
+ "format": "f32-to-bf16",
1411
+ "nbytes": 3072,
1412
+ "byteOffset": 33057792
1413
+ }
1414
+ ],
1415
+ "md5sum": "54c052279cd7ffad4678b87826192f76"
1416
+ },
1417
+ {
1418
+ "dataPath": "params_shard_36.bin",
1419
+ "format": "raw-shard",
1420
+ "nbytes": 55050240,
1421
+ "records": [
1422
+ {
1423
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
1424
+ "shape": [
1425
+ 17920,
1426
+ 1536
1427
+ ],
1428
+ "dtype": "float16",
1429
+ "format": "f32-to-bf16",
1430
+ "nbytes": 55050240,
1431
+ "byteOffset": 0
1432
+ }
1433
+ ],
1434
+ "md5sum": "0dac5ce31c1f2eaa17a56d0da55f7b6d"
1435
+ },
1436
+ {
1437
+ "dataPath": "params_shard_37.bin",
1438
+ "format": "raw-shard",
1439
+ "nbytes": 27525120,
1440
+ "records": [
1441
+ {
1442
+ "name": "model.layers.15.mlp.down_proj.weight",
1443
+ "shape": [
1444
+ 1536,
1445
+ 8960
1446
+ ],
1447
+ "dtype": "float16",
1448
+ "format": "f32-to-bf16",
1449
+ "nbytes": 27525120,
1450
+ "byteOffset": 0
1451
+ }
1452
+ ],
1453
+ "md5sum": "6b9f7819701341646bc2a266108f95b2"
1454
+ },
1455
+ {
1456
+ "dataPath": "params_shard_38.bin",
1457
+ "format": "raw-shard",
1458
+ "nbytes": 55050240,
1459
+ "records": [
1460
+ {
1461
+ "name": "model.layers.16.mlp.gate_up_proj.weight",
1462
+ "shape": [
1463
+ 17920,
1464
+ 1536
1465
+ ],
1466
+ "dtype": "float16",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 55050240,
1469
+ "byteOffset": 0
1470
+ }
1471
+ ],
1472
+ "md5sum": "04cd76c5b6fc4ebe6baaa8644b6930fd"
1473
+ },
1474
+ {
1475
+ "dataPath": "params_shard_39.bin",
1476
+ "format": "raw-shard",
1477
+ "nbytes": 27525120,
1478
+ "records": [
1479
+ {
1480
+ "name": "model.layers.16.mlp.down_proj.weight",
1481
+ "shape": [
1482
+ 1536,
1483
+ 8960
1484
+ ],
1485
+ "dtype": "float16",
1486
+ "format": "f32-to-bf16",
1487
+ "nbytes": 27525120,
1488
+ "byteOffset": 0
1489
+ }
1490
+ ],
1491
+ "md5sum": "349f0c70d75d8a410f1b236fd7e395b5"
1492
+ },
1493
+ {
1494
+ "dataPath": "params_shard_40.bin",
1495
+ "format": "raw-shard",
1496
+ "nbytes": 55050240,
1497
+ "records": [
1498
+ {
1499
+ "name": "model.layers.17.mlp.gate_up_proj.weight",
1500
+ "shape": [
1501
+ 17920,
1502
+ 1536
1503
+ ],
1504
+ "dtype": "float16",
1505
+ "format": "f32-to-bf16",
1506
+ "nbytes": 55050240,
1507
+ "byteOffset": 0
1508
+ }
1509
+ ],
1510
+ "md5sum": "63960103006f54f63a378784156e1ff6"
1511
+ },
1512
+ {
1513
+ "dataPath": "params_shard_41.bin",
1514
+ "format": "raw-shard",
1515
+ "nbytes": 27525120,
1516
+ "records": [
1517
+ {
1518
+ "name": "model.layers.17.mlp.down_proj.weight",
1519
+ "shape": [
1520
+ 1536,
1521
+ 8960
1522
+ ],
1523
+ "dtype": "float16",
1524
+ "format": "f32-to-bf16",
1525
+ "nbytes": 27525120,
1526
+ "byteOffset": 0
1527
+ }
1528
+ ],
1529
+ "md5sum": "0f07069c62ca1e2d8c3e7ee712fe82b0"
1530
+ },
1531
+ {
1532
+ "dataPath": "params_shard_42.bin",
1533
+ "format": "raw-shard",
1534
+ "nbytes": 33060864,
1535
+ "records": [
1536
+ {
1537
+ "name": "model.layers.15.self_attn.c_attn.weight",
1538
+ "shape": [
1539
+ 2048,
1540
+ 1536
1541
+ ],
1542
+ "dtype": "float16",
1543
+ "format": "f32-to-bf16",
1544
+ "nbytes": 6291456,
1545
+ "byteOffset": 0
1546
+ },
1547
+ {
1548
+ "name": "model.layers.15.self_attn.c_attn.bias",
1549
+ "shape": [
1550
+ 2048
1551
+ ],
1552
+ "dtype": "float16",
1553
+ "format": "f32-to-bf16",
1554
+ "nbytes": 4096,
1555
+ "byteOffset": 6291456
1556
+ },
1557
+ {
1558
+ "name": "model.layers.15.self_attn.o_proj.weight",
1559
+ "shape": [
1560
+ 1536,
1561
+ 1536
1562
+ ],
1563
+ "dtype": "float16",
1564
+ "format": "f32-to-bf16",
1565
+ "nbytes": 4718592,
1566
+ "byteOffset": 6295552
1567
+ },
1568
+ {
1569
+ "name": "model.layers.15.input_layernorm.weight",
1570
+ "shape": [
1571
+ 1536
1572
+ ],
1573
+ "dtype": "float16",
1574
+ "format": "f32-to-bf16",
1575
+ "nbytes": 3072,
1576
+ "byteOffset": 11014144
1577
+ },
1578
+ {
1579
+ "name": "model.layers.15.post_attention_layernorm.weight",
1580
+ "shape": [
1581
+ 1536
1582
+ ],
1583
+ "dtype": "float16",
1584
+ "format": "f32-to-bf16",
1585
+ "nbytes": 3072,
1586
+ "byteOffset": 11017216
1587
+ },
1588
+ {
1589
+ "name": "model.layers.16.self_attn.c_attn.weight",
1590
+ "shape": [
1591
+ 2048,
1592
+ 1536
1593
+ ],
1594
+ "dtype": "float16",
1595
+ "format": "f32-to-bf16",
1596
+ "nbytes": 6291456,
1597
+ "byteOffset": 11020288
1598
+ },
1599
+ {
1600
+ "name": "model.layers.16.self_attn.c_attn.bias",
1601
+ "shape": [
1602
+ 2048
1603
+ ],
1604
+ "dtype": "float16",
1605
+ "format": "f32-to-bf16",
1606
+ "nbytes": 4096,
1607
+ "byteOffset": 17311744
1608
+ },
1609
+ {
1610
+ "name": "model.layers.16.self_attn.o_proj.weight",
1611
+ "shape": [
1612
+ 1536,
1613
+ 1536
1614
+ ],
1615
+ "dtype": "float16",
1616
+ "format": "f32-to-bf16",
1617
+ "nbytes": 4718592,
1618
+ "byteOffset": 17315840
1619
+ },
1620
+ {
1621
+ "name": "model.layers.16.input_layernorm.weight",
1622
+ "shape": [
1623
+ 1536
1624
+ ],
1625
+ "dtype": "float16",
1626
+ "format": "f32-to-bf16",
1627
+ "nbytes": 3072,
1628
+ "byteOffset": 22034432
1629
+ },
1630
+ {
1631
+ "name": "model.layers.16.post_attention_layernorm.weight",
1632
+ "shape": [
1633
+ 1536
1634
+ ],
1635
+ "dtype": "float16",
1636
+ "format": "f32-to-bf16",
1637
+ "nbytes": 3072,
1638
+ "byteOffset": 22037504
1639
+ },
1640
+ {
1641
+ "name": "model.layers.17.self_attn.c_attn.weight",
1642
+ "shape": [
1643
+ 2048,
1644
+ 1536
1645
+ ],
1646
+ "dtype": "float16",
1647
+ "format": "f32-to-bf16",
1648
+ "nbytes": 6291456,
1649
+ "byteOffset": 22040576
1650
+ },
1651
+ {
1652
+ "name": "model.layers.17.self_attn.c_attn.bias",
1653
+ "shape": [
1654
+ 2048
1655
+ ],
1656
+ "dtype": "float16",
1657
+ "format": "f32-to-bf16",
1658
+ "nbytes": 4096,
1659
+ "byteOffset": 28332032
1660
+ },
1661
+ {
1662
+ "name": "model.layers.17.self_attn.o_proj.weight",
1663
+ "shape": [
1664
+ 1536,
1665
+ 1536
1666
+ ],
1667
+ "dtype": "float16",
1668
+ "format": "f32-to-bf16",
1669
+ "nbytes": 4718592,
1670
+ "byteOffset": 28336128
1671
+ },
1672
+ {
1673
+ "name": "model.layers.17.input_layernorm.weight",
1674
+ "shape": [
1675
+ 1536
1676
+ ],
1677
+ "dtype": "float16",
1678
+ "format": "f32-to-bf16",
1679
+ "nbytes": 3072,
1680
+ "byteOffset": 33054720
1681
+ },
1682
+ {
1683
+ "name": "model.layers.17.post_attention_layernorm.weight",
1684
+ "shape": [
1685
+ 1536
1686
+ ],
1687
+ "dtype": "float16",
1688
+ "format": "f32-to-bf16",
1689
+ "nbytes": 3072,
1690
+ "byteOffset": 33057792
1691
+ }
1692
+ ],
1693
+ "md5sum": "cbdd25fd19637b648dff1256b0362d00"
1694
+ },
1695
+ {
1696
+ "dataPath": "params_shard_43.bin",
1697
+ "format": "raw-shard",
1698
+ "nbytes": 55050240,
1699
+ "records": [
1700
+ {
1701
+ "name": "model.layers.18.mlp.gate_up_proj.weight",
1702
+ "shape": [
1703
+ 17920,
1704
+ 1536
1705
+ ],
1706
+ "dtype": "float16",
1707
+ "format": "f32-to-bf16",
1708
+ "nbytes": 55050240,
1709
+ "byteOffset": 0
1710
+ }
1711
+ ],
1712
+ "md5sum": "639c98457bec8c7e83c2c1da8b9410f2"
1713
+ },
1714
+ {
1715
+ "dataPath": "params_shard_44.bin",
1716
+ "format": "raw-shard",
1717
+ "nbytes": 27525120,
1718
+ "records": [
1719
+ {
1720
+ "name": "model.layers.18.mlp.down_proj.weight",
1721
+ "shape": [
1722
+ 1536,
1723
+ 8960
1724
+ ],
1725
+ "dtype": "float16",
1726
+ "format": "f32-to-bf16",
1727
+ "nbytes": 27525120,
1728
+ "byteOffset": 0
1729
+ }
1730
+ ],
1731
+ "md5sum": "c335e79b08a2fe03d63fa514b658d909"
1732
+ },
1733
+ {
1734
+ "dataPath": "params_shard_45.bin",
1735
+ "format": "raw-shard",
1736
+ "nbytes": 55050240,
1737
+ "records": [
1738
+ {
1739
+ "name": "model.layers.19.mlp.gate_up_proj.weight",
1740
+ "shape": [
1741
+ 17920,
1742
+ 1536
1743
+ ],
1744
+ "dtype": "float16",
1745
+ "format": "f32-to-bf16",
1746
+ "nbytes": 55050240,
1747
+ "byteOffset": 0
1748
+ }
1749
+ ],
1750
+ "md5sum": "6e0525006bc32f3ec81492080dc9f7f2"
1751
+ },
1752
+ {
1753
+ "dataPath": "params_shard_46.bin",
1754
+ "format": "raw-shard",
1755
+ "nbytes": 27525120,
1756
+ "records": [
1757
+ {
1758
+ "name": "model.layers.19.mlp.down_proj.weight",
1759
+ "shape": [
1760
+ 1536,
1761
+ 8960
1762
+ ],
1763
+ "dtype": "float16",
1764
+ "format": "f32-to-bf16",
1765
+ "nbytes": 27525120,
1766
+ "byteOffset": 0
1767
+ }
1768
+ ],
1769
+ "md5sum": "d968c5af427bccf73fb824e0be561cd9"
1770
+ },
1771
+ {
1772
+ "dataPath": "params_shard_47.bin",
1773
+ "format": "raw-shard",
1774
+ "nbytes": 55050240,
1775
+ "records": [
1776
+ {
1777
+ "name": "model.layers.20.mlp.gate_up_proj.weight",
1778
+ "shape": [
1779
+ 17920,
1780
+ 1536
1781
+ ],
1782
+ "dtype": "float16",
1783
+ "format": "f32-to-bf16",
1784
+ "nbytes": 55050240,
1785
+ "byteOffset": 0
1786
+ }
1787
+ ],
1788
+ "md5sum": "80e8b32cb11f59a6022acf852eea7ace"
1789
+ },
1790
+ {
1791
+ "dataPath": "params_shard_48.bin",
1792
+ "format": "raw-shard",
1793
+ "nbytes": 27525120,
1794
+ "records": [
1795
+ {
1796
+ "name": "model.layers.20.mlp.down_proj.weight",
1797
+ "shape": [
1798
+ 1536,
1799
+ 8960
1800
+ ],
1801
+ "dtype": "float16",
1802
+ "format": "f32-to-bf16",
1803
+ "nbytes": 27525120,
1804
+ "byteOffset": 0
1805
+ }
1806
+ ],
1807
+ "md5sum": "c075bcffc12a1d712ca76c1471ae444a"
1808
+ },
1809
+ {
1810
+ "dataPath": "params_shard_49.bin",
1811
+ "format": "raw-shard",
1812
+ "nbytes": 33060864,
1813
+ "records": [
1814
+ {
1815
+ "name": "model.layers.18.self_attn.c_attn.weight",
1816
+ "shape": [
1817
+ 2048,
1818
+ 1536
1819
+ ],
1820
+ "dtype": "float16",
1821
+ "format": "f32-to-bf16",
1822
+ "nbytes": 6291456,
1823
+ "byteOffset": 0
1824
+ },
1825
+ {
1826
+ "name": "model.layers.18.self_attn.c_attn.bias",
1827
+ "shape": [
1828
+ 2048
1829
+ ],
1830
+ "dtype": "float16",
1831
+ "format": "f32-to-bf16",
1832
+ "nbytes": 4096,
1833
+ "byteOffset": 6291456
1834
+ },
1835
+ {
1836
+ "name": "model.layers.18.self_attn.o_proj.weight",
1837
+ "shape": [
1838
+ 1536,
1839
+ 1536
1840
+ ],
1841
+ "dtype": "float16",
1842
+ "format": "f32-to-bf16",
1843
+ "nbytes": 4718592,
1844
+ "byteOffset": 6295552
1845
+ },
1846
+ {
1847
+ "name": "model.layers.18.input_layernorm.weight",
1848
+ "shape": [
1849
+ 1536
1850
+ ],
1851
+ "dtype": "float16",
1852
+ "format": "f32-to-bf16",
1853
+ "nbytes": 3072,
1854
+ "byteOffset": 11014144
1855
+ },
1856
+ {
1857
+ "name": "model.layers.18.post_attention_layernorm.weight",
1858
+ "shape": [
1859
+ 1536
1860
+ ],
1861
+ "dtype": "float16",
1862
+ "format": "f32-to-bf16",
1863
+ "nbytes": 3072,
1864
+ "byteOffset": 11017216
1865
+ },
1866
+ {
1867
+ "name": "model.layers.19.self_attn.c_attn.weight",
1868
+ "shape": [
1869
+ 2048,
1870
+ 1536
1871
+ ],
1872
+ "dtype": "float16",
1873
+ "format": "f32-to-bf16",
1874
+ "nbytes": 6291456,
1875
+ "byteOffset": 11020288
1876
+ },
1877
+ {
1878
+ "name": "model.layers.19.self_attn.c_attn.bias",
1879
+ "shape": [
1880
+ 2048
1881
+ ],
1882
+ "dtype": "float16",
1883
+ "format": "f32-to-bf16",
1884
+ "nbytes": 4096,
1885
+ "byteOffset": 17311744
1886
+ },
1887
+ {
1888
+ "name": "model.layers.19.self_attn.o_proj.weight",
1889
+ "shape": [
1890
+ 1536,
1891
+ 1536
1892
+ ],
1893
+ "dtype": "float16",
1894
+ "format": "f32-to-bf16",
1895
+ "nbytes": 4718592,
1896
+ "byteOffset": 17315840
1897
+ },
1898
+ {
1899
+ "name": "model.layers.19.input_layernorm.weight",
1900
+ "shape": [
1901
+ 1536
1902
+ ],
1903
+ "dtype": "float16",
1904
+ "format": "f32-to-bf16",
1905
+ "nbytes": 3072,
1906
+ "byteOffset": 22034432
1907
+ },
1908
+ {
1909
+ "name": "model.layers.19.post_attention_layernorm.weight",
1910
+ "shape": [
1911
+ 1536
1912
+ ],
1913
+ "dtype": "float16",
1914
+ "format": "f32-to-bf16",
1915
+ "nbytes": 3072,
1916
+ "byteOffset": 22037504
1917
+ },
1918
+ {
1919
+ "name": "model.layers.20.self_attn.c_attn.weight",
1920
+ "shape": [
1921
+ 2048,
1922
+ 1536
1923
+ ],
1924
+ "dtype": "float16",
1925
+ "format": "f32-to-bf16",
1926
+ "nbytes": 6291456,
1927
+ "byteOffset": 22040576
1928
+ },
1929
+ {
1930
+ "name": "model.layers.20.self_attn.c_attn.bias",
1931
+ "shape": [
1932
+ 2048
1933
+ ],
1934
+ "dtype": "float16",
1935
+ "format": "f32-to-bf16",
1936
+ "nbytes": 4096,
1937
+ "byteOffset": 28332032
1938
+ },
1939
+ {
1940
+ "name": "model.layers.20.self_attn.o_proj.weight",
1941
+ "shape": [
1942
+ 1536,
1943
+ 1536
1944
+ ],
1945
+ "dtype": "float16",
1946
+ "format": "f32-to-bf16",
1947
+ "nbytes": 4718592,
1948
+ "byteOffset": 28336128
1949
+ },
1950
+ {
1951
+ "name": "model.layers.20.input_layernorm.weight",
1952
+ "shape": [
1953
+ 1536
1954
+ ],
1955
+ "dtype": "float16",
1956
+ "format": "f32-to-bf16",
1957
+ "nbytes": 3072,
1958
+ "byteOffset": 33054720
1959
+ },
1960
+ {
1961
+ "name": "model.layers.20.post_attention_layernorm.weight",
1962
+ "shape": [
1963
+ 1536
1964
+ ],
1965
+ "dtype": "float16",
1966
+ "format": "f32-to-bf16",
1967
+ "nbytes": 3072,
1968
+ "byteOffset": 33057792
1969
+ }
1970
+ ],
1971
+ "md5sum": "73cd2787ae936b6aa7f0c3c9c85b3ff2"
1972
+ },
1973
+ {
1974
+ "dataPath": "params_shard_50.bin",
1975
+ "format": "raw-shard",
1976
+ "nbytes": 55050240,
1977
+ "records": [
1978
+ {
1979
+ "name": "model.layers.21.mlp.gate_up_proj.weight",
1980
+ "shape": [
1981
+ 17920,
1982
+ 1536
1983
+ ],
1984
+ "dtype": "float16",
1985
+ "format": "f32-to-bf16",
1986
+ "nbytes": 55050240,
1987
+ "byteOffset": 0
1988
+ }
1989
+ ],
1990
+ "md5sum": "136a14789a41480fcc08d89e66ca2501"
1991
+ },
1992
+ {
1993
+ "dataPath": "params_shard_51.bin",
1994
+ "format": "raw-shard",
1995
+ "nbytes": 27525120,
1996
+ "records": [
1997
+ {
1998
+ "name": "model.layers.21.mlp.down_proj.weight",
1999
+ "shape": [
2000
+ 1536,
2001
+ 8960
2002
+ ],
2003
+ "dtype": "float16",
2004
+ "format": "f32-to-bf16",
2005
+ "nbytes": 27525120,
2006
+ "byteOffset": 0
2007
+ }
2008
+ ],
2009
+ "md5sum": "a3f54fbb03f00cc32caaa8b0a5cb8394"
2010
+ },
2011
+ {
2012
+ "dataPath": "params_shard_52.bin",
2013
+ "format": "raw-shard",
2014
+ "nbytes": 55050240,
2015
+ "records": [
2016
+ {
2017
+ "name": "model.layers.22.mlp.gate_up_proj.weight",
2018
+ "shape": [
2019
+ 17920,
2020
+ 1536
2021
+ ],
2022
+ "dtype": "float16",
2023
+ "format": "f32-to-bf16",
2024
+ "nbytes": 55050240,
2025
+ "byteOffset": 0
2026
+ }
2027
+ ],
2028
+ "md5sum": "2ffd1ebbe30c22f30f61a3d92c032d1b"
2029
+ },
2030
+ {
2031
+ "dataPath": "params_shard_53.bin",
2032
+ "format": "raw-shard",
2033
+ "nbytes": 27525120,
2034
+ "records": [
2035
+ {
2036
+ "name": "model.layers.22.mlp.down_proj.weight",
2037
+ "shape": [
2038
+ 1536,
2039
+ 8960
2040
+ ],
2041
+ "dtype": "float16",
2042
+ "format": "f32-to-bf16",
2043
+ "nbytes": 27525120,
2044
+ "byteOffset": 0
2045
+ }
2046
+ ],
2047
+ "md5sum": "2a50d409322f1488c49d5287dd21c3b8"
2048
+ },
2049
+ {
2050
+ "dataPath": "params_shard_54.bin",
2051
+ "format": "raw-shard",
2052
+ "nbytes": 55050240,
2053
+ "records": [
2054
+ {
2055
+ "name": "model.layers.23.mlp.gate_up_proj.weight",
2056
+ "shape": [
2057
+ 17920,
2058
+ 1536
2059
+ ],
2060
+ "dtype": "float16",
2061
+ "format": "f32-to-bf16",
2062
+ "nbytes": 55050240,
2063
+ "byteOffset": 0
2064
+ }
2065
+ ],
2066
+ "md5sum": "89a049c13e34f896861d5989e7af37ca"
2067
+ },
2068
+ {
2069
+ "dataPath": "params_shard_55.bin",
2070
+ "format": "raw-shard",
2071
+ "nbytes": 27525120,
2072
+ "records": [
2073
+ {
2074
+ "name": "model.layers.23.mlp.down_proj.weight",
2075
+ "shape": [
2076
+ 1536,
2077
+ 8960
2078
+ ],
2079
+ "dtype": "float16",
2080
+ "format": "f32-to-bf16",
2081
+ "nbytes": 27525120,
2082
+ "byteOffset": 0
2083
+ }
2084
+ ],
2085
+ "md5sum": "60e61d289310f677b122617d45a9c7e4"
2086
+ },
2087
+ {
2088
+ "dataPath": "params_shard_56.bin",
2089
+ "format": "raw-shard",
2090
+ "nbytes": 33060864,
2091
+ "records": [
2092
+ {
2093
+ "name": "model.layers.21.self_attn.c_attn.weight",
2094
+ "shape": [
2095
+ 2048,
2096
+ 1536
2097
+ ],
2098
+ "dtype": "float16",
2099
+ "format": "f32-to-bf16",
2100
+ "nbytes": 6291456,
2101
+ "byteOffset": 0
2102
+ },
2103
+ {
2104
+ "name": "model.layers.21.self_attn.c_attn.bias",
2105
+ "shape": [
2106
+ 2048
2107
+ ],
2108
+ "dtype": "float16",
2109
+ "format": "f32-to-bf16",
2110
+ "nbytes": 4096,
2111
+ "byteOffset": 6291456
2112
+ },
2113
+ {
2114
+ "name": "model.layers.21.self_attn.o_proj.weight",
2115
+ "shape": [
2116
+ 1536,
2117
+ 1536
2118
+ ],
2119
+ "dtype": "float16",
2120
+ "format": "f32-to-bf16",
2121
+ "nbytes": 4718592,
2122
+ "byteOffset": 6295552
2123
+ },
2124
+ {
2125
+ "name": "model.layers.21.input_layernorm.weight",
2126
+ "shape": [
2127
+ 1536
2128
+ ],
2129
+ "dtype": "float16",
2130
+ "format": "f32-to-bf16",
2131
+ "nbytes": 3072,
2132
+ "byteOffset": 11014144
2133
+ },
2134
+ {
2135
+ "name": "model.layers.21.post_attention_layernorm.weight",
2136
+ "shape": [
2137
+ 1536
2138
+ ],
2139
+ "dtype": "float16",
2140
+ "format": "f32-to-bf16",
2141
+ "nbytes": 3072,
2142
+ "byteOffset": 11017216
2143
+ },
2144
+ {
2145
+ "name": "model.layers.22.self_attn.c_attn.weight",
2146
+ "shape": [
2147
+ 2048,
2148
+ 1536
2149
+ ],
2150
+ "dtype": "float16",
2151
+ "format": "f32-to-bf16",
2152
+ "nbytes": 6291456,
2153
+ "byteOffset": 11020288
2154
+ },
2155
+ {
2156
+ "name": "model.layers.22.self_attn.c_attn.bias",
2157
+ "shape": [
2158
+ 2048
2159
+ ],
2160
+ "dtype": "float16",
2161
+ "format": "f32-to-bf16",
2162
+ "nbytes": 4096,
2163
+ "byteOffset": 17311744
2164
+ },
2165
+ {
2166
+ "name": "model.layers.22.self_attn.o_proj.weight",
2167
+ "shape": [
2168
+ 1536,
2169
+ 1536
2170
+ ],
2171
+ "dtype": "float16",
2172
+ "format": "f32-to-bf16",
2173
+ "nbytes": 4718592,
2174
+ "byteOffset": 17315840
2175
+ },
2176
+ {
2177
+ "name": "model.layers.22.input_layernorm.weight",
2178
+ "shape": [
2179
+ 1536
2180
+ ],
2181
+ "dtype": "float16",
2182
+ "format": "f32-to-bf16",
2183
+ "nbytes": 3072,
2184
+ "byteOffset": 22034432
2185
+ },
2186
+ {
2187
+ "name": "model.layers.22.post_attention_layernorm.weight",
2188
+ "shape": [
2189
+ 1536
2190
+ ],
2191
+ "dtype": "float16",
2192
+ "format": "f32-to-bf16",
2193
+ "nbytes": 3072,
2194
+ "byteOffset": 22037504
2195
+ },
2196
+ {
2197
+ "name": "model.layers.23.self_attn.c_attn.weight",
2198
+ "shape": [
2199
+ 2048,
2200
+ 1536
2201
+ ],
2202
+ "dtype": "float16",
2203
+ "format": "f32-to-bf16",
2204
+ "nbytes": 6291456,
2205
+ "byteOffset": 22040576
2206
+ },
2207
+ {
2208
+ "name": "model.layers.23.self_attn.c_attn.bias",
2209
+ "shape": [
2210
+ 2048
2211
+ ],
2212
+ "dtype": "float16",
2213
+ "format": "f32-to-bf16",
2214
+ "nbytes": 4096,
2215
+ "byteOffset": 28332032
2216
+ },
2217
+ {
2218
+ "name": "model.layers.23.self_attn.o_proj.weight",
2219
+ "shape": [
2220
+ 1536,
2221
+ 1536
2222
+ ],
2223
+ "dtype": "float16",
2224
+ "format": "f32-to-bf16",
2225
+ "nbytes": 4718592,
2226
+ "byteOffset": 28336128
2227
+ },
2228
+ {
2229
+ "name": "model.layers.23.input_layernorm.weight",
2230
+ "shape": [
2231
+ 1536
2232
+ ],
2233
+ "dtype": "float16",
2234
+ "format": "f32-to-bf16",
2235
+ "nbytes": 3072,
2236
+ "byteOffset": 33054720
2237
+ },
2238
+ {
2239
+ "name": "model.layers.23.post_attention_layernorm.weight",
2240
+ "shape": [
2241
+ 1536
2242
+ ],
2243
+ "dtype": "float16",
2244
+ "format": "f32-to-bf16",
2245
+ "nbytes": 3072,
2246
+ "byteOffset": 33057792
2247
+ }
2248
+ ],
2249
+ "md5sum": "2859dd48c6a504a6ff5fc777a5d0eb01"
2250
+ },
2251
+ {
2252
+ "dataPath": "params_shard_57.bin",
2253
+ "format": "raw-shard",
2254
+ "nbytes": 55050240,
2255
+ "records": [
2256
+ {
2257
+ "name": "model.layers.24.mlp.gate_up_proj.weight",
2258
+ "shape": [
2259
+ 17920,
2260
+ 1536
2261
+ ],
2262
+ "dtype": "float16",
2263
+ "format": "f32-to-bf16",
2264
+ "nbytes": 55050240,
2265
+ "byteOffset": 0
2266
+ }
2267
+ ],
2268
+ "md5sum": "00c023336d6889bb9d24bcb3762ff819"
2269
+ },
2270
+ {
2271
+ "dataPath": "params_shard_58.bin",
2272
+ "format": "raw-shard",
2273
+ "nbytes": 27525120,
2274
+ "records": [
2275
+ {
2276
+ "name": "model.layers.24.mlp.down_proj.weight",
2277
+ "shape": [
2278
+ 1536,
2279
+ 8960
2280
+ ],
2281
+ "dtype": "float16",
2282
+ "format": "f32-to-bf16",
2283
+ "nbytes": 27525120,
2284
+ "byteOffset": 0
2285
+ }
2286
+ ],
2287
+ "md5sum": "725c3d334464020a85016e629f58eda8"
2288
+ },
2289
+ {
2290
+ "dataPath": "params_shard_59.bin",
2291
+ "format": "raw-shard",
2292
+ "nbytes": 55050240,
2293
+ "records": [
2294
+ {
2295
+ "name": "model.layers.25.mlp.gate_up_proj.weight",
2296
+ "shape": [
2297
+ 17920,
2298
+ 1536
2299
+ ],
2300
+ "dtype": "float16",
2301
+ "format": "f32-to-bf16",
2302
+ "nbytes": 55050240,
2303
+ "byteOffset": 0
2304
+ }
2305
+ ],
2306
+ "md5sum": "09c4056fb69cfcce7f2c9829ba155e28"
2307
+ },
2308
+ {
2309
+ "dataPath": "params_shard_60.bin",
2310
+ "format": "raw-shard",
2311
+ "nbytes": 27525120,
2312
+ "records": [
2313
+ {
2314
+ "name": "model.layers.25.mlp.down_proj.weight",
2315
+ "shape": [
2316
+ 1536,
2317
+ 8960
2318
+ ],
2319
+ "dtype": "float16",
2320
+ "format": "f32-to-bf16",
2321
+ "nbytes": 27525120,
2322
+ "byteOffset": 0
2323
+ }
2324
+ ],
2325
+ "md5sum": "898fa6e97b95ddc00a3a685714dd2056"
2326
+ },
2327
+ {
2328
+ "dataPath": "params_shard_61.bin",
2329
+ "format": "raw-shard",
2330
+ "nbytes": 55050240,
2331
+ "records": [
2332
+ {
2333
+ "name": "model.layers.26.mlp.gate_up_proj.weight",
2334
+ "shape": [
2335
+ 17920,
2336
+ 1536
2337
+ ],
2338
+ "dtype": "float16",
2339
+ "format": "f32-to-bf16",
2340
+ "nbytes": 55050240,
2341
+ "byteOffset": 0
2342
+ }
2343
+ ],
2344
+ "md5sum": "ae5ce4d265237aae0fd576bb6f528dca"
2345
+ },
2346
+ {
2347
+ "dataPath": "params_shard_62.bin",
2348
+ "format": "raw-shard",
2349
+ "nbytes": 27525120,
2350
+ "records": [
2351
+ {
2352
+ "name": "model.layers.26.mlp.down_proj.weight",
2353
+ "shape": [
2354
+ 1536,
2355
+ 8960
2356
+ ],
2357
+ "dtype": "float16",
2358
+ "format": "f32-to-bf16",
2359
+ "nbytes": 27525120,
2360
+ "byteOffset": 0
2361
+ }
2362
+ ],
2363
+ "md5sum": "58375129ef74f23538d522ef09315b51"
2364
+ },
2365
+ {
2366
+ "dataPath": "params_shard_63.bin",
2367
+ "format": "raw-shard",
2368
+ "nbytes": 33060864,
2369
+ "records": [
2370
+ {
2371
+ "name": "model.layers.24.self_attn.c_attn.weight",
2372
+ "shape": [
2373
+ 2048,
2374
+ 1536
2375
+ ],
2376
+ "dtype": "float16",
2377
+ "format": "f32-to-bf16",
2378
+ "nbytes": 6291456,
2379
+ "byteOffset": 0
2380
+ },
2381
+ {
2382
+ "name": "model.layers.24.self_attn.c_attn.bias",
2383
+ "shape": [
2384
+ 2048
2385
+ ],
2386
+ "dtype": "float16",
2387
+ "format": "f32-to-bf16",
2388
+ "nbytes": 4096,
2389
+ "byteOffset": 6291456
2390
+ },
2391
+ {
2392
+ "name": "model.layers.24.self_attn.o_proj.weight",
2393
+ "shape": [
2394
+ 1536,
2395
+ 1536
2396
+ ],
2397
+ "dtype": "float16",
2398
+ "format": "f32-to-bf16",
2399
+ "nbytes": 4718592,
2400
+ "byteOffset": 6295552
2401
+ },
2402
+ {
2403
+ "name": "model.layers.24.input_layernorm.weight",
2404
+ "shape": [
2405
+ 1536
2406
+ ],
2407
+ "dtype": "float16",
2408
+ "format": "f32-to-bf16",
2409
+ "nbytes": 3072,
2410
+ "byteOffset": 11014144
2411
+ },
2412
+ {
2413
+ "name": "model.layers.24.post_attention_layernorm.weight",
2414
+ "shape": [
2415
+ 1536
2416
+ ],
2417
+ "dtype": "float16",
2418
+ "format": "f32-to-bf16",
2419
+ "nbytes": 3072,
2420
+ "byteOffset": 11017216
2421
+ },
2422
+ {
2423
+ "name": "model.layers.25.self_attn.c_attn.weight",
2424
+ "shape": [
2425
+ 2048,
2426
+ 1536
2427
+ ],
2428
+ "dtype": "float16",
2429
+ "format": "f32-to-bf16",
2430
+ "nbytes": 6291456,
2431
+ "byteOffset": 11020288
2432
+ },
2433
+ {
2434
+ "name": "model.layers.25.self_attn.c_attn.bias",
2435
+ "shape": [
2436
+ 2048
2437
+ ],
2438
+ "dtype": "float16",
2439
+ "format": "f32-to-bf16",
2440
+ "nbytes": 4096,
2441
+ "byteOffset": 17311744
2442
+ },
2443
+ {
2444
+ "name": "model.layers.25.self_attn.o_proj.weight",
2445
+ "shape": [
2446
+ 1536,
2447
+ 1536
2448
+ ],
2449
+ "dtype": "float16",
2450
+ "format": "f32-to-bf16",
2451
+ "nbytes": 4718592,
2452
+ "byteOffset": 17315840
2453
+ },
2454
+ {
2455
+ "name": "model.layers.25.input_layernorm.weight",
2456
+ "shape": [
2457
+ 1536
2458
+ ],
2459
+ "dtype": "float16",
2460
+ "format": "f32-to-bf16",
2461
+ "nbytes": 3072,
2462
+ "byteOffset": 22034432
2463
+ },
2464
+ {
2465
+ "name": "model.layers.25.post_attention_layernorm.weight",
2466
+ "shape": [
2467
+ 1536
2468
+ ],
2469
+ "dtype": "float16",
2470
+ "format": "f32-to-bf16",
2471
+ "nbytes": 3072,
2472
+ "byteOffset": 22037504
2473
+ },
2474
+ {
2475
+ "name": "model.layers.26.self_attn.c_attn.weight",
2476
+ "shape": [
2477
+ 2048,
2478
+ 1536
2479
+ ],
2480
+ "dtype": "float16",
2481
+ "format": "f32-to-bf16",
2482
+ "nbytes": 6291456,
2483
+ "byteOffset": 22040576
2484
+ },
2485
+ {
2486
+ "name": "model.layers.26.self_attn.c_attn.bias",
2487
+ "shape": [
2488
+ 2048
2489
+ ],
2490
+ "dtype": "float16",
2491
+ "format": "f32-to-bf16",
2492
+ "nbytes": 4096,
2493
+ "byteOffset": 28332032
2494
+ },
2495
+ {
2496
+ "name": "model.layers.26.self_attn.o_proj.weight",
2497
+ "shape": [
2498
+ 1536,
2499
+ 1536
2500
+ ],
2501
+ "dtype": "float16",
2502
+ "format": "f32-to-bf16",
2503
+ "nbytes": 4718592,
2504
+ "byteOffset": 28336128
2505
+ },
2506
+ {
2507
+ "name": "model.layers.26.input_layernorm.weight",
2508
+ "shape": [
2509
+ 1536
2510
+ ],
2511
+ "dtype": "float16",
2512
+ "format": "f32-to-bf16",
2513
+ "nbytes": 3072,
2514
+ "byteOffset": 33054720
2515
+ },
2516
+ {
2517
+ "name": "model.layers.26.post_attention_layernorm.weight",
2518
+ "shape": [
2519
+ 1536
2520
+ ],
2521
+ "dtype": "float16",
2522
+ "format": "f32-to-bf16",
2523
+ "nbytes": 3072,
2524
+ "byteOffset": 33057792
2525
+ }
2526
+ ],
2527
+ "md5sum": "50113496b65c91d08a408ca51382050c"
2528
+ },
2529
+ {
2530
+ "dataPath": "params_shard_64.bin",
2531
+ "format": "raw-shard",
2532
+ "nbytes": 55050240,
2533
+ "records": [
2534
+ {
2535
+ "name": "model.layers.27.mlp.gate_up_proj.weight",
2536
+ "shape": [
2537
+ 17920,
2538
+ 1536
2539
+ ],
2540
+ "dtype": "float16",
2541
+ "format": "f32-to-bf16",
2542
+ "nbytes": 55050240,
2543
+ "byteOffset": 0
2544
+ }
2545
+ ],
2546
+ "md5sum": "77bba71a9b67ed4377d409949e9ad145"
2547
+ },
2548
+ {
2549
+ "dataPath": "params_shard_65.bin",
2550
+ "format": "raw-shard",
2551
+ "nbytes": 27525120,
2552
+ "records": [
2553
+ {
2554
+ "name": "model.layers.27.mlp.down_proj.weight",
2555
+ "shape": [
2556
+ 1536,
2557
+ 8960
2558
+ ],
2559
+ "dtype": "float16",
2560
+ "format": "f32-to-bf16",
2561
+ "nbytes": 27525120,
2562
+ "byteOffset": 0
2563
+ }
2564
+ ],
2565
+ "md5sum": "0d6ad7eb791649a022f869b1edeb5355"
2566
+ },
2567
+ {
2568
+ "dataPath": "params_shard_66.bin",
2569
+ "format": "raw-shard",
2570
+ "nbytes": 11023360,
2571
+ "records": [
2572
+ {
2573
+ "name": "model.layers.27.self_attn.c_attn.weight",
2574
+ "shape": [
2575
+ 2048,
2576
+ 1536
2577
+ ],
2578
+ "dtype": "float16",
2579
+ "format": "f32-to-bf16",
2580
+ "nbytes": 6291456,
2581
+ "byteOffset": 0
2582
+ },
2583
+ {
2584
+ "name": "model.layers.27.self_attn.c_attn.bias",
2585
+ "shape": [
2586
+ 2048
2587
+ ],
2588
+ "dtype": "float16",
2589
+ "format": "f32-to-bf16",
2590
+ "nbytes": 4096,
2591
+ "byteOffset": 6291456
2592
+ },
2593
+ {
2594
+ "name": "model.layers.27.self_attn.o_proj.weight",
2595
+ "shape": [
2596
+ 1536,
2597
+ 1536
2598
+ ],
2599
+ "dtype": "float16",
2600
+ "format": "f32-to-bf16",
2601
+ "nbytes": 4718592,
2602
+ "byteOffset": 6295552
2603
+ },
2604
+ {
2605
+ "name": "model.layers.27.input_layernorm.weight",
2606
+ "shape": [
2607
+ 1536
2608
+ ],
2609
+ "dtype": "float16",
2610
+ "format": "f32-to-bf16",
2611
+ "nbytes": 3072,
2612
+ "byteOffset": 11014144
2613
+ },
2614
+ {
2615
+ "name": "model.layers.27.post_attention_layernorm.weight",
2616
+ "shape": [
2617
+ 1536
2618
+ ],
2619
+ "dtype": "float16",
2620
+ "format": "f32-to-bf16",
2621
+ "nbytes": 3072,
2622
+ "byteOffset": 11017216
2623
+ },
2624
+ {
2625
+ "name": "model.norm.weight",
2626
+ "shape": [
2627
+ 1536
2628
+ ],
2629
+ "dtype": "float16",
2630
+ "format": "f32-to-bf16",
2631
+ "nbytes": 3072,
2632
+ "byteOffset": 11020288
2633
+ }
2634
+ ],
2635
+ "md5sum": "29c619ad6174a67fe28726e26cc3974d"
2636
+ }
2637
+ ]
2638
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94273a5d0232f316bd22dff6d3c74e5d3a3ec943550d50c17e986248bfad3a40
3
+ size 466747392
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f48893f4c00c5aebfa08fd4670be6a7feb1cfd3bcee56f1ae8247bbc8bd049a
3
+ size 55050240
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1150a9235cae04dedd70dceb6d04b68063ba898adbccc3a6eae532e167d813
3
+ size 55050240
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:847367775f9a7ed97d56b9b4aa085e9d21e6119c81b4f64ada061e5ec04def19
3
+ size 27525120
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee74e3d0b230249cc282287f91b65113e4e6c63f1c90af6a7d9c6416f615b466
3
+ size 55050240
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:580e39e24a773bc9a2ad49ec4705f6ee63043369f0cc608fed39fbedbcff5cf0
3
+ size 27525120
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd13328e1345ea0fdf8c1307bd555ba86a80954f3af9c537f40769226a03b10
3
+ size 33060864
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c28b7cf43e1e75142720bed8c0b81d382385bb75c707ae19f157b534f9f6c98
3
+ size 55050240
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ba31cfd7078dd6670de840a71f45b0213a95ab9db7d0dce90909380bc77344
3
+ size 27525120
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28795da2c98c13ff4969befc676b2b1498e546f511081964512dfa4198cef7f9
3
+ size 55050240
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d0710846a52f8b29163de4dccdbc2564cb7326e24a791d2071944433a48455
3
+ size 27525120
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b08736da97358973209674ec7d1338f141a5a784c4829c651ffe80bafcb4b5
3
+ size 55050240
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:320c7e9478a4b42ede149b8ddb7e551137f28de3ae409bddc5346161aaee37fe
3
+ size 27525120
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:276b7878180dd1bd37eb33e7ef4adffb76fb147a85789a23ba06a2bd496c41ce
3
+ size 27525120
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64a21140ae18945af8b4d14f0e8a44607ee9e65002040505c566f01ceb5ec648
3
+ size 33060864
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff99ab52581293e3b1f5d04ae469da981480335c5473dfbec12f9c608efa8978
3
+ size 55050240
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ebcb3fc9e9b3d34044d91b663adb90c0c64ac626b16fd272d9f98c7697ceaf
3
+ size 27525120
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4c22ba0ecfc4ebe4efb3ac4965d430663abff3e94c74813c3fd9978ae9f84d
3
+ size 55050240
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d5f3b525cd3a9a2a809e107998308684809fed8e48296f195022ef26300b70e
3
+ size 27525120
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c317068124b7add21e049e1b83bff85ce5215426bb184a0f6239b2c273b3224
3
+ size 55050240
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797709528e8159253b3766d1bb500a6bf176b78f1af95b32a42f12473bac4265
3
+ size 27525120
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4d0342ca260d8f1b939669678d3172ac78e2099f0ee6ee79ce12acb10fed6a4
3
+ size 33060864
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d6106261d8f4c5538e122de63e72def7ab8ef0726fa03486e7d1a71e0b187b
3
+ size 55050240
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f4164679f238e92f28b009c00309d86f47481271ad9d5a51289804abd4582c
3
+ size 55050240
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5673680d5cafef38bd4782ab8c3904718f2b1c78d3ed51a6c49fc9b04866fa
3
+ size 27525120
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e2dbeacdd8554b3b11f3d40e03f1668a89a82d506e6221da8e24f7500998371
3
+ size 55050240
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5bc4fcccb8f3c3ee62ef7f9cbe52b3302f6c8d2d0a1a2475b69e25d83dca62e
3
+ size 27525120
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dc920714790561ed7d0f417c9b9fb622d03a7c7000240cd6bef37cf2ea71bfc
3
+ size 55050240
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:371479f149dad1d147116a1b92076448d7fa34e122c410eb37ea5764b4d05e32
3
+ size 27525120
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ad4fdb9d60933c373bfd9cc5f65a0b2a1582e7556a6fa47705379593bec87d
3
+ size 33060864
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3419c2e859b925fd2b1828ce49b8a2a45fd80b747b5f0ed34585c39d7dac6a3
3
+ size 55050240
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ccdfb5e4d32660dd7998683ad4b4b064b5cbdf57f410fdcf838695dba3ede17
3
+ size 27525120
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350bef7aad28ee53bc44eb9bd166d3537f828173639fabf97094e5f27f113ffe
3
+ size 55050240
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b150b489c765c4cc6dbdb4ab2fda019304695c60aa60c8801f831c7974bc02db
3
+ size 27525120
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d721b338259ef82c81f041ae383819256838b602e51005fc735b3e103c3ac8a9
3
+ size 27525120
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c0ca3aa80c3e599bd4d92fdf0c70e933be98d10b03ec0d8763d2f21909e285
3
+ size 55050240
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c67c94742047daa9d5c4ed31b6fbed6d1768cba8665826e49aaa267d3badf5
3
+ size 27525120
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8e44cd509e0318a0608bdd2af75a8ef35029e389ecd09f7f89ffd9877091f77
3
+ size 33060864
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9755e83da9c89b54d4a2345252ee44d1c3388bfe9f67a265e4b12c3558cf2757
3
+ size 55050240
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ff4ac3e08407d6b9dc2ccf54152f034a248789fc79ba734a66bf27936748795
3
+ size 27525120
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a02fc140d272f78a5c78ff8b11298843d2f37fa1fccfd20138120e1bb4ae212d
3
+ size 55050240
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04a632d1f27795b16af5a571fb3dc97b123ea3feb2788a6699547b22800ff1a
3
+ size 27525120
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20bd2fca44db6684afc54d8c8c74841152c483fcd214be510c6b70d6d6a5d04
3
+ size 55050240
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04cc735e97f44125b7aef33f149a4fbfb93061cbccd344173fa32ab4ed319bd1
3
+ size 27525120
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da9ce3a0353e0c1969e66338e1c5265b24aabb05412fc9fdfc5b4b48d212c586
3
+ size 33060864
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12926276bb36201240a01dd532cfa635bf7ae9d1127d50bb16347776efa3867b
3
+ size 55050240
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fe35eaa0c219453bcfcae4175f9fe8e25983afd7d82fb676c6335f835ba39d1
3
+ size 55050240