Kieran2828 commited on
Commit
97954d6
·
verified ·
1 Parent(s): 658c2fe

Upload 4 files

Browse files
ndarray-cache.json ADDED
@@ -0,0 +1,1691 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 137,
4
+ "ParamBytes": 394450304.0,
5
+ "BitsPerParam": 4.501226222315055
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 18435776,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32003,
17
+ 128
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 16385536,
22
+ "byteOffset": 0
23
+ },
24
+ {
25
+ "name": "model.embed_tokens.q_scale",
26
+ "shape": [
27
+ 32003,
28
+ 32
29
+ ],
30
+ "dtype": "float16",
31
+ "format": "f32-to-bf16",
32
+ "nbytes": 2048192,
33
+ "byteOffset": 16385536
34
+ },
35
+ {
36
+ "name": "model.norm.weight",
37
+ "shape": [
38
+ 1024
39
+ ],
40
+ "dtype": "float16",
41
+ "format": "f32-to-bf16",
42
+ "nbytes": 2048,
43
+ "byteOffset": 18433728
44
+ }
45
+ ],
46
+ "md5sum": "39262692eadce1dc2cc71f107dde7dba"
47
+ },
48
+ {
49
+ "dataPath": "params_shard_1.bin",
50
+ "format": "raw-shard",
51
+ "nbytes": 16777216,
52
+ "records": [
53
+ {
54
+ "name": "model.layers.0.moe.e1_e3.q_weight",
55
+ "shape": [
56
+ 4,
57
+ 8192,
58
+ 128
59
+ ],
60
+ "dtype": "uint32",
61
+ "format": "f32-to-bf16",
62
+ "nbytes": 16777216,
63
+ "byteOffset": 0
64
+ }
65
+ ],
66
+ "md5sum": "4b4790e347b682a2d1625d4ca8cf2fdc"
67
+ },
68
+ {
69
+ "dataPath": "params_shard_2.bin",
70
+ "format": "raw-shard",
71
+ "nbytes": 16777216,
72
+ "records": [
73
+ {
74
+ "name": "model.layers.1.moe.e1_e3.q_weight",
75
+ "shape": [
76
+ 4,
77
+ 8192,
78
+ 128
79
+ ],
80
+ "dtype": "uint32",
81
+ "format": "f32-to-bf16",
82
+ "nbytes": 16777216,
83
+ "byteOffset": 0
84
+ }
85
+ ],
86
+ "md5sum": "96cd48c89abfb63f5e47ac29c2dea29f"
87
+ },
88
+ {
89
+ "dataPath": "params_shard_3.bin",
90
+ "format": "raw-shard",
91
+ "nbytes": 16777216,
92
+ "records": [
93
+ {
94
+ "name": "model.layers.2.moe.e1_e3.q_weight",
95
+ "shape": [
96
+ 4,
97
+ 8192,
98
+ 128
99
+ ],
100
+ "dtype": "uint32",
101
+ "format": "f32-to-bf16",
102
+ "nbytes": 16777216,
103
+ "byteOffset": 0
104
+ }
105
+ ],
106
+ "md5sum": "2ebe7fb32b921287504c204504a3a3bf"
107
+ },
108
+ {
109
+ "dataPath": "params_shard_4.bin",
110
+ "format": "raw-shard",
111
+ "nbytes": 16777216,
112
+ "records": [
113
+ {
114
+ "name": "model.layers.3.moe.e1_e3.q_weight",
115
+ "shape": [
116
+ 4,
117
+ 8192,
118
+ 128
119
+ ],
120
+ "dtype": "uint32",
121
+ "format": "f32-to-bf16",
122
+ "nbytes": 16777216,
123
+ "byteOffset": 0
124
+ }
125
+ ],
126
+ "md5sum": "040d60d3b1c140992c37ae6f47157639"
127
+ },
128
+ {
129
+ "dataPath": "params_shard_5.bin",
130
+ "format": "raw-shard",
131
+ "nbytes": 16777216,
132
+ "records": [
133
+ {
134
+ "name": "model.layers.4.moe.e1_e3.q_weight",
135
+ "shape": [
136
+ 4,
137
+ 8192,
138
+ 128
139
+ ],
140
+ "dtype": "uint32",
141
+ "format": "f32-to-bf16",
142
+ "nbytes": 16777216,
143
+ "byteOffset": 0
144
+ }
145
+ ],
146
+ "md5sum": "78d5dc519482da4b6fa5a6fdbf3075fc"
147
+ },
148
+ {
149
+ "dataPath": "params_shard_6.bin",
150
+ "format": "raw-shard",
151
+ "nbytes": 16777216,
152
+ "records": [
153
+ {
154
+ "name": "model.layers.5.moe.e1_e3.q_weight",
155
+ "shape": [
156
+ 4,
157
+ 8192,
158
+ 128
159
+ ],
160
+ "dtype": "uint32",
161
+ "format": "f32-to-bf16",
162
+ "nbytes": 16777216,
163
+ "byteOffset": 0
164
+ }
165
+ ],
166
+ "md5sum": "ddf0d5b05b33b228c73a67b12bdf5181"
167
+ },
168
+ {
169
+ "dataPath": "params_shard_7.bin",
170
+ "format": "raw-shard",
171
+ "nbytes": 16777216,
172
+ "records": [
173
+ {
174
+ "name": "model.layers.6.moe.e1_e3.q_weight",
175
+ "shape": [
176
+ 4,
177
+ 8192,
178
+ 128
179
+ ],
180
+ "dtype": "uint32",
181
+ "format": "f32-to-bf16",
182
+ "nbytes": 16777216,
183
+ "byteOffset": 0
184
+ }
185
+ ],
186
+ "md5sum": "6393f0828fe8a7be924c6a58b122b655"
187
+ },
188
+ {
189
+ "dataPath": "params_shard_8.bin",
190
+ "format": "raw-shard",
191
+ "nbytes": 16777216,
192
+ "records": [
193
+ {
194
+ "name": "model.layers.7.moe.e1_e3.q_weight",
195
+ "shape": [
196
+ 4,
197
+ 8192,
198
+ 128
199
+ ],
200
+ "dtype": "uint32",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 16777216,
203
+ "byteOffset": 0
204
+ }
205
+ ],
206
+ "md5sum": "747277f3a82adb4957c54b7fbcc2265e"
207
+ },
208
+ {
209
+ "dataPath": "params_shard_9.bin",
210
+ "format": "raw-shard",
211
+ "nbytes": 33138368,
212
+ "records": [
213
+ {
214
+ "name": "lm_head.q_weight",
215
+ "shape": [
216
+ 32003,
217
+ 128
218
+ ],
219
+ "dtype": "uint32",
220
+ "format": "f32-to-bf16",
221
+ "nbytes": 16385536,
222
+ "byteOffset": 0
223
+ },
224
+ {
225
+ "name": "lm_head.q_scale",
226
+ "shape": [
227
+ 32003,
228
+ 32
229
+ ],
230
+ "dtype": "float16",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 2048192,
233
+ "byteOffset": 16385536
234
+ },
235
+ {
236
+ "name": "model.layers.0.input_layernorm.weight",
237
+ "shape": [
238
+ 1024
239
+ ],
240
+ "dtype": "float16",
241
+ "format": "f32-to-bf16",
242
+ "nbytes": 2048,
243
+ "byteOffset": 18433728
244
+ },
245
+ {
246
+ "name": "model.layers.1.input_layernorm.weight",
247
+ "shape": [
248
+ 1024
249
+ ],
250
+ "dtype": "float16",
251
+ "format": "f32-to-bf16",
252
+ "nbytes": 2048,
253
+ "byteOffset": 18435776
254
+ },
255
+ {
256
+ "name": "model.layers.2.input_layernorm.weight",
257
+ "shape": [
258
+ 1024
259
+ ],
260
+ "dtype": "float16",
261
+ "format": "f32-to-bf16",
262
+ "nbytes": 2048,
263
+ "byteOffset": 18437824
264
+ },
265
+ {
266
+ "name": "model.layers.3.input_layernorm.weight",
267
+ "shape": [
268
+ 1024
269
+ ],
270
+ "dtype": "float16",
271
+ "format": "f32-to-bf16",
272
+ "nbytes": 2048,
273
+ "byteOffset": 18439872
274
+ },
275
+ {
276
+ "name": "model.layers.4.input_layernorm.weight",
277
+ "shape": [
278
+ 1024
279
+ ],
280
+ "dtype": "float16",
281
+ "format": "f32-to-bf16",
282
+ "nbytes": 2048,
283
+ "byteOffset": 18441920
284
+ },
285
+ {
286
+ "name": "model.layers.5.input_layernorm.weight",
287
+ "shape": [
288
+ 1024
289
+ ],
290
+ "dtype": "float16",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 2048,
293
+ "byteOffset": 18443968
294
+ },
295
+ {
296
+ "name": "model.layers.6.input_layernorm.weight",
297
+ "shape": [
298
+ 1024
299
+ ],
300
+ "dtype": "float16",
301
+ "format": "f32-to-bf16",
302
+ "nbytes": 2048,
303
+ "byteOffset": 18446016
304
+ },
305
+ {
306
+ "name": "model.layers.7.input_layernorm.weight",
307
+ "shape": [
308
+ 1024
309
+ ],
310
+ "dtype": "float16",
311
+ "format": "f32-to-bf16",
312
+ "nbytes": 2048,
313
+ "byteOffset": 18448064
314
+ },
315
+ {
316
+ "name": "model.layers.8.input_layernorm.weight",
317
+ "shape": [
318
+ 1024
319
+ ],
320
+ "dtype": "float16",
321
+ "format": "f32-to-bf16",
322
+ "nbytes": 2048,
323
+ "byteOffset": 18450112
324
+ },
325
+ {
326
+ "name": "model.layers.9.input_layernorm.weight",
327
+ "shape": [
328
+ 1024
329
+ ],
330
+ "dtype": "float16",
331
+ "format": "f32-to-bf16",
332
+ "nbytes": 2048,
333
+ "byteOffset": 18452160
334
+ },
335
+ {
336
+ "name": "model.layers.10.input_layernorm.weight",
337
+ "shape": [
338
+ 1024
339
+ ],
340
+ "dtype": "float16",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 2048,
343
+ "byteOffset": 18454208
344
+ },
345
+ {
346
+ "name": "model.layers.11.input_layernorm.weight",
347
+ "shape": [
348
+ 1024
349
+ ],
350
+ "dtype": "float16",
351
+ "format": "f32-to-bf16",
352
+ "nbytes": 2048,
353
+ "byteOffset": 18456256
354
+ },
355
+ {
356
+ "name": "model.layers.0.moe.e1_e3.q_scale",
357
+ "shape": [
358
+ 4,
359
+ 8192,
360
+ 32
361
+ ],
362
+ "dtype": "float16",
363
+ "format": "f32-to-bf16",
364
+ "nbytes": 2097152,
365
+ "byteOffset": 18458304
366
+ },
367
+ {
368
+ "name": "model.layers.1.moe.e1_e3.q_scale",
369
+ "shape": [
370
+ 4,
371
+ 8192,
372
+ 32
373
+ ],
374
+ "dtype": "float16",
375
+ "format": "f32-to-bf16",
376
+ "nbytes": 2097152,
377
+ "byteOffset": 20555456
378
+ },
379
+ {
380
+ "name": "model.layers.2.moe.e1_e3.q_scale",
381
+ "shape": [
382
+ 4,
383
+ 8192,
384
+ 32
385
+ ],
386
+ "dtype": "float16",
387
+ "format": "f32-to-bf16",
388
+ "nbytes": 2097152,
389
+ "byteOffset": 22652608
390
+ },
391
+ {
392
+ "name": "model.layers.3.moe.e1_e3.q_scale",
393
+ "shape": [
394
+ 4,
395
+ 8192,
396
+ 32
397
+ ],
398
+ "dtype": "float16",
399
+ "format": "f32-to-bf16",
400
+ "nbytes": 2097152,
401
+ "byteOffset": 24749760
402
+ },
403
+ {
404
+ "name": "model.layers.4.moe.e1_e3.q_scale",
405
+ "shape": [
406
+ 4,
407
+ 8192,
408
+ 32
409
+ ],
410
+ "dtype": "float16",
411
+ "format": "f32-to-bf16",
412
+ "nbytes": 2097152,
413
+ "byteOffset": 26846912
414
+ },
415
+ {
416
+ "name": "model.layers.5.moe.e1_e3.q_scale",
417
+ "shape": [
418
+ 4,
419
+ 8192,
420
+ 32
421
+ ],
422
+ "dtype": "float16",
423
+ "format": "f32-to-bf16",
424
+ "nbytes": 2097152,
425
+ "byteOffset": 28944064
426
+ },
427
+ {
428
+ "name": "model.layers.6.moe.e1_e3.q_scale",
429
+ "shape": [
430
+ 4,
431
+ 8192,
432
+ 32
433
+ ],
434
+ "dtype": "float16",
435
+ "format": "f32-to-bf16",
436
+ "nbytes": 2097152,
437
+ "byteOffset": 31041216
438
+ }
439
+ ],
440
+ "md5sum": "15fc560eff0beef7363843096cd3f83f"
441
+ },
442
+ {
443
+ "dataPath": "params_shard_10.bin",
444
+ "format": "raw-shard",
445
+ "nbytes": 16777216,
446
+ "records": [
447
+ {
448
+ "name": "model.layers.9.moe.e1_e3.q_weight",
449
+ "shape": [
450
+ 4,
451
+ 8192,
452
+ 128
453
+ ],
454
+ "dtype": "uint32",
455
+ "format": "f32-to-bf16",
456
+ "nbytes": 16777216,
457
+ "byteOffset": 0
458
+ }
459
+ ],
460
+ "md5sum": "225eec8ab6c3a421d5f563f229fcf42f"
461
+ },
462
+ {
463
+ "dataPath": "params_shard_11.bin",
464
+ "format": "raw-shard",
465
+ "nbytes": 16777216,
466
+ "records": [
467
+ {
468
+ "name": "model.layers.10.moe.e1_e3.q_weight",
469
+ "shape": [
470
+ 4,
471
+ 8192,
472
+ 128
473
+ ],
474
+ "dtype": "uint32",
475
+ "format": "f32-to-bf16",
476
+ "nbytes": 16777216,
477
+ "byteOffset": 0
478
+ }
479
+ ],
480
+ "md5sum": "0b7d9bfd90e4239969f587c972110172"
481
+ },
482
+ {
483
+ "dataPath": "params_shard_12.bin",
484
+ "format": "raw-shard",
485
+ "nbytes": 16777216,
486
+ "records": [
487
+ {
488
+ "name": "model.layers.11.moe.e1_e3.q_weight",
489
+ "shape": [
490
+ 4,
491
+ 8192,
492
+ 128
493
+ ],
494
+ "dtype": "uint32",
495
+ "format": "f32-to-bf16",
496
+ "nbytes": 16777216,
497
+ "byteOffset": 0
498
+ }
499
+ ],
500
+ "md5sum": "49d56d62a2c5cbeb10306793e6aa6f70"
501
+ },
502
+ {
503
+ "dataPath": "params_shard_13.bin",
504
+ "format": "raw-shard",
505
+ "nbytes": 27262976,
506
+ "records": [
507
+ {
508
+ "name": "model.layers.7.moe.e1_e3.q_scale",
509
+ "shape": [
510
+ 4,
511
+ 8192,
512
+ 32
513
+ ],
514
+ "dtype": "float16",
515
+ "format": "f32-to-bf16",
516
+ "nbytes": 2097152,
517
+ "byteOffset": 0
518
+ },
519
+ {
520
+ "name": "model.layers.8.moe.e1_e3.q_weight",
521
+ "shape": [
522
+ 4,
523
+ 8192,
524
+ 128
525
+ ],
526
+ "dtype": "uint32",
527
+ "format": "f32-to-bf16",
528
+ "nbytes": 16777216,
529
+ "byteOffset": 2097152
530
+ },
531
+ {
532
+ "name": "model.layers.8.moe.e1_e3.q_scale",
533
+ "shape": [
534
+ 4,
535
+ 8192,
536
+ 32
537
+ ],
538
+ "dtype": "float16",
539
+ "format": "f32-to-bf16",
540
+ "nbytes": 2097152,
541
+ "byteOffset": 18874368
542
+ },
543
+ {
544
+ "name": "model.layers.9.moe.e1_e3.q_scale",
545
+ "shape": [
546
+ 4,
547
+ 8192,
548
+ 32
549
+ ],
550
+ "dtype": "float16",
551
+ "format": "f32-to-bf16",
552
+ "nbytes": 2097152,
553
+ "byteOffset": 20971520
554
+ },
555
+ {
556
+ "name": "model.layers.10.moe.e1_e3.q_scale",
557
+ "shape": [
558
+ 4,
559
+ 8192,
560
+ 32
561
+ ],
562
+ "dtype": "float16",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 2097152,
565
+ "byteOffset": 23068672
566
+ },
567
+ {
568
+ "name": "model.layers.11.moe.e1_e3.q_scale",
569
+ "shape": [
570
+ 4,
571
+ 8192,
572
+ 32
573
+ ],
574
+ "dtype": "float16",
575
+ "format": "f32-to-bf16",
576
+ "nbytes": 2097152,
577
+ "byteOffset": 25165824
578
+ }
579
+ ],
580
+ "md5sum": "655bd70482a3f3d9e74580733bbc15d1"
581
+ },
582
+ {
583
+ "dataPath": "params_shard_14.bin",
584
+ "format": "raw-shard",
585
+ "nbytes": 28311552,
586
+ "records": [
587
+ {
588
+ "name": "model.layers.0.moe.e2.q_weight",
589
+ "shape": [
590
+ 4,
591
+ 1024,
592
+ 512
593
+ ],
594
+ "dtype": "uint32",
595
+ "format": "f32-to-bf16",
596
+ "nbytes": 8388608,
597
+ "byteOffset": 0
598
+ },
599
+ {
600
+ "name": "model.layers.0.moe.e2.q_scale",
601
+ "shape": [
602
+ 4,
603
+ 1024,
604
+ 128
605
+ ],
606
+ "dtype": "float16",
607
+ "format": "f32-to-bf16",
608
+ "nbytes": 1048576,
609
+ "byteOffset": 8388608
610
+ },
611
+ {
612
+ "name": "model.layers.1.moe.e2.q_weight",
613
+ "shape": [
614
+ 4,
615
+ 1024,
616
+ 512
617
+ ],
618
+ "dtype": "uint32",
619
+ "format": "f32-to-bf16",
620
+ "nbytes": 8388608,
621
+ "byteOffset": 9437184
622
+ },
623
+ {
624
+ "name": "model.layers.1.moe.e2.q_scale",
625
+ "shape": [
626
+ 4,
627
+ 1024,
628
+ 128
629
+ ],
630
+ "dtype": "float16",
631
+ "format": "f32-to-bf16",
632
+ "nbytes": 1048576,
633
+ "byteOffset": 17825792
634
+ },
635
+ {
636
+ "name": "model.layers.2.moe.e2.q_weight",
637
+ "shape": [
638
+ 4,
639
+ 1024,
640
+ 512
641
+ ],
642
+ "dtype": "uint32",
643
+ "format": "f32-to-bf16",
644
+ "nbytes": 8388608,
645
+ "byteOffset": 18874368
646
+ },
647
+ {
648
+ "name": "model.layers.2.moe.e2.q_scale",
649
+ "shape": [
650
+ 4,
651
+ 1024,
652
+ 128
653
+ ],
654
+ "dtype": "float16",
655
+ "format": "f32-to-bf16",
656
+ "nbytes": 1048576,
657
+ "byteOffset": 27262976
658
+ }
659
+ ],
660
+ "md5sum": "b8058afbdee107c4dd664bf5677bb64b"
661
+ },
662
+ {
663
+ "dataPath": "params_shard_15.bin",
664
+ "format": "raw-shard",
665
+ "nbytes": 28311552,
666
+ "records": [
667
+ {
668
+ "name": "model.layers.3.moe.e2.q_weight",
669
+ "shape": [
670
+ 4,
671
+ 1024,
672
+ 512
673
+ ],
674
+ "dtype": "uint32",
675
+ "format": "f32-to-bf16",
676
+ "nbytes": 8388608,
677
+ "byteOffset": 0
678
+ },
679
+ {
680
+ "name": "model.layers.3.moe.e2.q_scale",
681
+ "shape": [
682
+ 4,
683
+ 1024,
684
+ 128
685
+ ],
686
+ "dtype": "float16",
687
+ "format": "f32-to-bf16",
688
+ "nbytes": 1048576,
689
+ "byteOffset": 8388608
690
+ },
691
+ {
692
+ "name": "model.layers.4.moe.e2.q_weight",
693
+ "shape": [
694
+ 4,
695
+ 1024,
696
+ 512
697
+ ],
698
+ "dtype": "uint32",
699
+ "format": "f32-to-bf16",
700
+ "nbytes": 8388608,
701
+ "byteOffset": 9437184
702
+ },
703
+ {
704
+ "name": "model.layers.4.moe.e2.q_scale",
705
+ "shape": [
706
+ 4,
707
+ 1024,
708
+ 128
709
+ ],
710
+ "dtype": "float16",
711
+ "format": "f32-to-bf16",
712
+ "nbytes": 1048576,
713
+ "byteOffset": 17825792
714
+ },
715
+ {
716
+ "name": "model.layers.5.moe.e2.q_weight",
717
+ "shape": [
718
+ 4,
719
+ 1024,
720
+ 512
721
+ ],
722
+ "dtype": "uint32",
723
+ "format": "f32-to-bf16",
724
+ "nbytes": 8388608,
725
+ "byteOffset": 18874368
726
+ },
727
+ {
728
+ "name": "model.layers.5.moe.e2.q_scale",
729
+ "shape": [
730
+ 4,
731
+ 1024,
732
+ 128
733
+ ],
734
+ "dtype": "float16",
735
+ "format": "f32-to-bf16",
736
+ "nbytes": 1048576,
737
+ "byteOffset": 27262976
738
+ }
739
+ ],
740
+ "md5sum": "ea81f78fb218262780ff13b5c95662dc"
741
+ },
742
+ {
743
+ "dataPath": "params_shard_16.bin",
744
+ "format": "raw-shard",
745
+ "nbytes": 28311552,
746
+ "records": [
747
+ {
748
+ "name": "model.layers.6.moe.e2.q_weight",
749
+ "shape": [
750
+ 4,
751
+ 1024,
752
+ 512
753
+ ],
754
+ "dtype": "uint32",
755
+ "format": "f32-to-bf16",
756
+ "nbytes": 8388608,
757
+ "byteOffset": 0
758
+ },
759
+ {
760
+ "name": "model.layers.6.moe.e2.q_scale",
761
+ "shape": [
762
+ 4,
763
+ 1024,
764
+ 128
765
+ ],
766
+ "dtype": "float16",
767
+ "format": "f32-to-bf16",
768
+ "nbytes": 1048576,
769
+ "byteOffset": 8388608
770
+ },
771
+ {
772
+ "name": "model.layers.7.moe.e2.q_weight",
773
+ "shape": [
774
+ 4,
775
+ 1024,
776
+ 512
777
+ ],
778
+ "dtype": "uint32",
779
+ "format": "f32-to-bf16",
780
+ "nbytes": 8388608,
781
+ "byteOffset": 9437184
782
+ },
783
+ {
784
+ "name": "model.layers.7.moe.e2.q_scale",
785
+ "shape": [
786
+ 4,
787
+ 1024,
788
+ 128
789
+ ],
790
+ "dtype": "float16",
791
+ "format": "f32-to-bf16",
792
+ "nbytes": 1048576,
793
+ "byteOffset": 17825792
794
+ },
795
+ {
796
+ "name": "model.layers.8.moe.e2.q_weight",
797
+ "shape": [
798
+ 4,
799
+ 1024,
800
+ 512
801
+ ],
802
+ "dtype": "uint32",
803
+ "format": "f32-to-bf16",
804
+ "nbytes": 8388608,
805
+ "byteOffset": 18874368
806
+ },
807
+ {
808
+ "name": "model.layers.8.moe.e2.q_scale",
809
+ "shape": [
810
+ 4,
811
+ 1024,
812
+ 128
813
+ ],
814
+ "dtype": "float16",
815
+ "format": "f32-to-bf16",
816
+ "nbytes": 1048576,
817
+ "byteOffset": 27262976
818
+ }
819
+ ],
820
+ "md5sum": "2ba93f293ac038d9c735a1bcdea65dc3"
821
+ },
822
+ {
823
+ "dataPath": "params_shard_17.bin",
824
+ "format": "raw-shard",
825
+ "nbytes": 33546240,
826
+ "records": [
827
+ {
828
+ "name": "model.layers.9.moe.e2.q_weight",
829
+ "shape": [
830
+ 4,
831
+ 1024,
832
+ 512
833
+ ],
834
+ "dtype": "uint32",
835
+ "format": "f32-to-bf16",
836
+ "nbytes": 8388608,
837
+ "byteOffset": 0
838
+ },
839
+ {
840
+ "name": "model.layers.9.moe.e2.q_scale",
841
+ "shape": [
842
+ 4,
843
+ 1024,
844
+ 128
845
+ ],
846
+ "dtype": "float16",
847
+ "format": "f32-to-bf16",
848
+ "nbytes": 1048576,
849
+ "byteOffset": 8388608
850
+ },
851
+ {
852
+ "name": "model.layers.10.moe.e2.q_weight",
853
+ "shape": [
854
+ 4,
855
+ 1024,
856
+ 512
857
+ ],
858
+ "dtype": "uint32",
859
+ "format": "f32-to-bf16",
860
+ "nbytes": 8388608,
861
+ "byteOffset": 9437184
862
+ },
863
+ {
864
+ "name": "model.layers.10.moe.e2.q_scale",
865
+ "shape": [
866
+ 4,
867
+ 1024,
868
+ 128
869
+ ],
870
+ "dtype": "float16",
871
+ "format": "f32-to-bf16",
872
+ "nbytes": 1048576,
873
+ "byteOffset": 17825792
874
+ },
875
+ {
876
+ "name": "model.layers.11.moe.e2.q_weight",
877
+ "shape": [
878
+ 4,
879
+ 1024,
880
+ 512
881
+ ],
882
+ "dtype": "uint32",
883
+ "format": "f32-to-bf16",
884
+ "nbytes": 8388608,
885
+ "byteOffset": 18874368
886
+ },
887
+ {
888
+ "name": "model.layers.11.moe.e2.q_scale",
889
+ "shape": [
890
+ 4,
891
+ 1024,
892
+ 128
893
+ ],
894
+ "dtype": "float16",
895
+ "format": "f32-to-bf16",
896
+ "nbytes": 1048576,
897
+ "byteOffset": 27262976
898
+ },
899
+ {
900
+ "name": "model.layers.0.post_attention_layernorm.weight",
901
+ "shape": [
902
+ 1024
903
+ ],
904
+ "dtype": "float16",
905
+ "format": "f32-to-bf16",
906
+ "nbytes": 2048,
907
+ "byteOffset": 28311552
908
+ },
909
+ {
910
+ "name": "model.layers.1.post_attention_layernorm.weight",
911
+ "shape": [
912
+ 1024
913
+ ],
914
+ "dtype": "float16",
915
+ "format": "f32-to-bf16",
916
+ "nbytes": 2048,
917
+ "byteOffset": 28313600
918
+ },
919
+ {
920
+ "name": "model.layers.2.post_attention_layernorm.weight",
921
+ "shape": [
922
+ 1024
923
+ ],
924
+ "dtype": "float16",
925
+ "format": "f32-to-bf16",
926
+ "nbytes": 2048,
927
+ "byteOffset": 28315648
928
+ },
929
+ {
930
+ "name": "model.layers.3.post_attention_layernorm.weight",
931
+ "shape": [
932
+ 1024
933
+ ],
934
+ "dtype": "float16",
935
+ "format": "f32-to-bf16",
936
+ "nbytes": 2048,
937
+ "byteOffset": 28317696
938
+ },
939
+ {
940
+ "name": "model.layers.4.post_attention_layernorm.weight",
941
+ "shape": [
942
+ 1024
943
+ ],
944
+ "dtype": "float16",
945
+ "format": "f32-to-bf16",
946
+ "nbytes": 2048,
947
+ "byteOffset": 28319744
948
+ },
949
+ {
950
+ "name": "model.layers.5.post_attention_layernorm.weight",
951
+ "shape": [
952
+ 1024
953
+ ],
954
+ "dtype": "float16",
955
+ "format": "f32-to-bf16",
956
+ "nbytes": 2048,
957
+ "byteOffset": 28321792
958
+ },
959
+ {
960
+ "name": "model.layers.6.post_attention_layernorm.weight",
961
+ "shape": [
962
+ 1024
963
+ ],
964
+ "dtype": "float16",
965
+ "format": "f32-to-bf16",
966
+ "nbytes": 2048,
967
+ "byteOffset": 28323840
968
+ },
969
+ {
970
+ "name": "model.layers.7.post_attention_layernorm.weight",
971
+ "shape": [
972
+ 1024
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 2048,
977
+ "byteOffset": 28325888
978
+ },
979
+ {
980
+ "name": "model.layers.8.post_attention_layernorm.weight",
981
+ "shape": [
982
+ 1024
983
+ ],
984
+ "dtype": "float16",
985
+ "format": "f32-to-bf16",
986
+ "nbytes": 2048,
987
+ "byteOffset": 28327936
988
+ },
989
+ {
990
+ "name": "model.layers.9.post_attention_layernorm.weight",
991
+ "shape": [
992
+ 1024
993
+ ],
994
+ "dtype": "float16",
995
+ "format": "f32-to-bf16",
996
+ "nbytes": 2048,
997
+ "byteOffset": 28329984
998
+ },
999
+ {
1000
+ "name": "model.layers.10.post_attention_layernorm.weight",
1001
+ "shape": [
1002
+ 1024
1003
+ ],
1004
+ "dtype": "float16",
1005
+ "format": "f32-to-bf16",
1006
+ "nbytes": 2048,
1007
+ "byteOffset": 28332032
1008
+ },
1009
+ {
1010
+ "name": "model.layers.11.post_attention_layernorm.weight",
1011
+ "shape": [
1012
+ 1024
1013
+ ],
1014
+ "dtype": "float16",
1015
+ "format": "f32-to-bf16",
1016
+ "nbytes": 2048,
1017
+ "byteOffset": 28334080
1018
+ },
1019
+ {
1020
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
1021
+ "shape": [
1022
+ 1536,
1023
+ 128
1024
+ ],
1025
+ "dtype": "uint32",
1026
+ "format": "f32-to-bf16",
1027
+ "nbytes": 786432,
1028
+ "byteOffset": 28336128
1029
+ },
1030
+ {
1031
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
1032
+ "shape": [
1033
+ 1536,
1034
+ 32
1035
+ ],
1036
+ "dtype": "float16",
1037
+ "format": "f32-to-bf16",
1038
+ "nbytes": 98304,
1039
+ "byteOffset": 29122560
1040
+ },
1041
+ {
1042
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
1043
+ "shape": [
1044
+ 1536,
1045
+ 128
1046
+ ],
1047
+ "dtype": "uint32",
1048
+ "format": "f32-to-bf16",
1049
+ "nbytes": 786432,
1050
+ "byteOffset": 29220864
1051
+ },
1052
+ {
1053
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
1054
+ "shape": [
1055
+ 1536,
1056
+ 32
1057
+ ],
1058
+ "dtype": "float16",
1059
+ "format": "f32-to-bf16",
1060
+ "nbytes": 98304,
1061
+ "byteOffset": 30007296
1062
+ },
1063
+ {
1064
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1065
+ "shape": [
1066
+ 1536,
1067
+ 128
1068
+ ],
1069
+ "dtype": "uint32",
1070
+ "format": "f32-to-bf16",
1071
+ "nbytes": 786432,
1072
+ "byteOffset": 30105600
1073
+ },
1074
+ {
1075
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1076
+ "shape": [
1077
+ 1536,
1078
+ 32
1079
+ ],
1080
+ "dtype": "float16",
1081
+ "format": "f32-to-bf16",
1082
+ "nbytes": 98304,
1083
+ "byteOffset": 30892032
1084
+ },
1085
+ {
1086
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
1087
+ "shape": [
1088
+ 1536,
1089
+ 128
1090
+ ],
1091
+ "dtype": "uint32",
1092
+ "format": "f32-to-bf16",
1093
+ "nbytes": 786432,
1094
+ "byteOffset": 30990336
1095
+ },
1096
+ {
1097
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
1098
+ "shape": [
1099
+ 1536,
1100
+ 32
1101
+ ],
1102
+ "dtype": "float16",
1103
+ "format": "f32-to-bf16",
1104
+ "nbytes": 98304,
1105
+ "byteOffset": 31776768
1106
+ },
1107
+ {
1108
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
1109
+ "shape": [
1110
+ 1536,
1111
+ 128
1112
+ ],
1113
+ "dtype": "uint32",
1114
+ "format": "f32-to-bf16",
1115
+ "nbytes": 786432,
1116
+ "byteOffset": 31875072
1117
+ },
1118
+ {
1119
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
1120
+ "shape": [
1121
+ 1536,
1122
+ 32
1123
+ ],
1124
+ "dtype": "float16",
1125
+ "format": "f32-to-bf16",
1126
+ "nbytes": 98304,
1127
+ "byteOffset": 32661504
1128
+ },
1129
+ {
1130
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
1131
+ "shape": [
1132
+ 1536,
1133
+ 128
1134
+ ],
1135
+ "dtype": "uint32",
1136
+ "format": "f32-to-bf16",
1137
+ "nbytes": 786432,
1138
+ "byteOffset": 32759808
1139
+ }
1140
+ ],
1141
+ "md5sum": "6cea3c1ce36c0eeebc1e0cd0c211e553"
1142
+ },
1143
+ {
1144
+ "dataPath": "params_shard_18.bin",
1145
+ "format": "raw-shard",
1146
+ "nbytes": 12582912,
1147
+ "records": [
1148
+ {
1149
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
1150
+ "shape": [
1151
+ 1536,
1152
+ 32
1153
+ ],
1154
+ "dtype": "float16",
1155
+ "format": "f32-to-bf16",
1156
+ "nbytes": 98304,
1157
+ "byteOffset": 0
1158
+ },
1159
+ {
1160
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
1161
+ "shape": [
1162
+ 1536,
1163
+ 128
1164
+ ],
1165
+ "dtype": "uint32",
1166
+ "format": "f32-to-bf16",
1167
+ "nbytes": 786432,
1168
+ "byteOffset": 98304
1169
+ },
1170
+ {
1171
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
1172
+ "shape": [
1173
+ 1536,
1174
+ 32
1175
+ ],
1176
+ "dtype": "float16",
1177
+ "format": "f32-to-bf16",
1178
+ "nbytes": 98304,
1179
+ "byteOffset": 884736
1180
+ },
1181
+ {
1182
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
1183
+ "shape": [
1184
+ 1536,
1185
+ 128
1186
+ ],
1187
+ "dtype": "uint32",
1188
+ "format": "f32-to-bf16",
1189
+ "nbytes": 786432,
1190
+ "byteOffset": 983040
1191
+ },
1192
+ {
1193
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1194
+ "shape": [
1195
+ 1536,
1196
+ 32
1197
+ ],
1198
+ "dtype": "float16",
1199
+ "format": "f32-to-bf16",
1200
+ "nbytes": 98304,
1201
+ "byteOffset": 1769472
1202
+ },
1203
+ {
1204
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1205
+ "shape": [
1206
+ 1536,
1207
+ 128
1208
+ ],
1209
+ "dtype": "uint32",
1210
+ "format": "f32-to-bf16",
1211
+ "nbytes": 786432,
1212
+ "byteOffset": 1867776
1213
+ },
1214
+ {
1215
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1216
+ "shape": [
1217
+ 1536,
1218
+ 32
1219
+ ],
1220
+ "dtype": "float16",
1221
+ "format": "f32-to-bf16",
1222
+ "nbytes": 98304,
1223
+ "byteOffset": 2654208
1224
+ },
1225
+ {
1226
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1227
+ "shape": [
1228
+ 1536,
1229
+ 128
1230
+ ],
1231
+ "dtype": "uint32",
1232
+ "format": "f32-to-bf16",
1233
+ "nbytes": 786432,
1234
+ "byteOffset": 2752512
1235
+ },
1236
+ {
1237
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1238
+ "shape": [
1239
+ 1536,
1240
+ 32
1241
+ ],
1242
+ "dtype": "float16",
1243
+ "format": "f32-to-bf16",
1244
+ "nbytes": 98304,
1245
+ "byteOffset": 3538944
1246
+ },
1247
+ {
1248
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
1249
+ "shape": [
1250
+ 1536,
1251
+ 128
1252
+ ],
1253
+ "dtype": "uint32",
1254
+ "format": "f32-to-bf16",
1255
+ "nbytes": 786432,
1256
+ "byteOffset": 3637248
1257
+ },
1258
+ {
1259
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
1260
+ "shape": [
1261
+ 1536,
1262
+ 32
1263
+ ],
1264
+ "dtype": "float16",
1265
+ "format": "f32-to-bf16",
1266
+ "nbytes": 98304,
1267
+ "byteOffset": 4423680
1268
+ },
1269
+ {
1270
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
1271
+ "shape": [
1272
+ 1536,
1273
+ 128
1274
+ ],
1275
+ "dtype": "uint32",
1276
+ "format": "f32-to-bf16",
1277
+ "nbytes": 786432,
1278
+ "byteOffset": 4521984
1279
+ },
1280
+ {
1281
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
1282
+ "shape": [
1283
+ 1536,
1284
+ 32
1285
+ ],
1286
+ "dtype": "float16",
1287
+ "format": "f32-to-bf16",
1288
+ "nbytes": 98304,
1289
+ "byteOffset": 5308416
1290
+ },
1291
+ {
1292
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
1293
+ "shape": [
1294
+ 1024,
1295
+ 128
1296
+ ],
1297
+ "dtype": "uint32",
1298
+ "format": "f32-to-bf16",
1299
+ "nbytes": 524288,
1300
+ "byteOffset": 5406720
1301
+ },
1302
+ {
1303
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
1304
+ "shape": [
1305
+ 1024,
1306
+ 32
1307
+ ],
1308
+ "dtype": "float16",
1309
+ "format": "f32-to-bf16",
1310
+ "nbytes": 65536,
1311
+ "byteOffset": 5931008
1312
+ },
1313
+ {
1314
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
1315
+ "shape": [
1316
+ 1024,
1317
+ 128
1318
+ ],
1319
+ "dtype": "uint32",
1320
+ "format": "f32-to-bf16",
1321
+ "nbytes": 524288,
1322
+ "byteOffset": 5996544
1323
+ },
1324
+ {
1325
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
1326
+ "shape": [
1327
+ 1024,
1328
+ 32
1329
+ ],
1330
+ "dtype": "float16",
1331
+ "format": "f32-to-bf16",
1332
+ "nbytes": 65536,
1333
+ "byteOffset": 6520832
1334
+ },
1335
+ {
1336
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
1337
+ "shape": [
1338
+ 1024,
1339
+ 128
1340
+ ],
1341
+ "dtype": "uint32",
1342
+ "format": "f32-to-bf16",
1343
+ "nbytes": 524288,
1344
+ "byteOffset": 6586368
1345
+ },
1346
+ {
1347
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
1348
+ "shape": [
1349
+ 1024,
1350
+ 32
1351
+ ],
1352
+ "dtype": "float16",
1353
+ "format": "f32-to-bf16",
1354
+ "nbytes": 65536,
1355
+ "byteOffset": 7110656
1356
+ },
1357
+ {
1358
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
1359
+ "shape": [
1360
+ 1024,
1361
+ 128
1362
+ ],
1363
+ "dtype": "uint32",
1364
+ "format": "f32-to-bf16",
1365
+ "nbytes": 524288,
1366
+ "byteOffset": 7176192
1367
+ },
1368
+ {
1369
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
1370
+ "shape": [
1371
+ 1024,
1372
+ 32
1373
+ ],
1374
+ "dtype": "float16",
1375
+ "format": "f32-to-bf16",
1376
+ "nbytes": 65536,
1377
+ "byteOffset": 7700480
1378
+ },
1379
+ {
1380
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
1381
+ "shape": [
1382
+ 1024,
1383
+ 128
1384
+ ],
1385
+ "dtype": "uint32",
1386
+ "format": "f32-to-bf16",
1387
+ "nbytes": 524288,
1388
+ "byteOffset": 7766016
1389
+ },
1390
+ {
1391
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
1392
+ "shape": [
1393
+ 1024,
1394
+ 32
1395
+ ],
1396
+ "dtype": "float16",
1397
+ "format": "f32-to-bf16",
1398
+ "nbytes": 65536,
1399
+ "byteOffset": 8290304
1400
+ },
1401
+ {
1402
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
1403
+ "shape": [
1404
+ 1024,
1405
+ 128
1406
+ ],
1407
+ "dtype": "uint32",
1408
+ "format": "f32-to-bf16",
1409
+ "nbytes": 524288,
1410
+ "byteOffset": 8355840
1411
+ },
1412
+ {
1413
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
1414
+ "shape": [
1415
+ 1024,
1416
+ 32
1417
+ ],
1418
+ "dtype": "float16",
1419
+ "format": "f32-to-bf16",
1420
+ "nbytes": 65536,
1421
+ "byteOffset": 8880128
1422
+ },
1423
+ {
1424
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
1425
+ "shape": [
1426
+ 1024,
1427
+ 128
1428
+ ],
1429
+ "dtype": "uint32",
1430
+ "format": "f32-to-bf16",
1431
+ "nbytes": 524288,
1432
+ "byteOffset": 8945664
1433
+ },
1434
+ {
1435
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
1436
+ "shape": [
1437
+ 1024,
1438
+ 32
1439
+ ],
1440
+ "dtype": "float16",
1441
+ "format": "f32-to-bf16",
1442
+ "nbytes": 65536,
1443
+ "byteOffset": 9469952
1444
+ },
1445
+ {
1446
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
1447
+ "shape": [
1448
+ 1024,
1449
+ 128
1450
+ ],
1451
+ "dtype": "uint32",
1452
+ "format": "f32-to-bf16",
1453
+ "nbytes": 524288,
1454
+ "byteOffset": 9535488
1455
+ },
1456
+ {
1457
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
1458
+ "shape": [
1459
+ 1024,
1460
+ 32
1461
+ ],
1462
+ "dtype": "float16",
1463
+ "format": "f32-to-bf16",
1464
+ "nbytes": 65536,
1465
+ "byteOffset": 10059776
1466
+ },
1467
+ {
1468
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
1469
+ "shape": [
1470
+ 1024,
1471
+ 128
1472
+ ],
1473
+ "dtype": "uint32",
1474
+ "format": "f32-to-bf16",
1475
+ "nbytes": 524288,
1476
+ "byteOffset": 10125312
1477
+ },
1478
+ {
1479
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
1480
+ "shape": [
1481
+ 1024,
1482
+ 32
1483
+ ],
1484
+ "dtype": "float16",
1485
+ "format": "f32-to-bf16",
1486
+ "nbytes": 65536,
1487
+ "byteOffset": 10649600
1488
+ },
1489
+ {
1490
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
1491
+ "shape": [
1492
+ 1024,
1493
+ 128
1494
+ ],
1495
+ "dtype": "uint32",
1496
+ "format": "f32-to-bf16",
1497
+ "nbytes": 524288,
1498
+ "byteOffset": 10715136
1499
+ },
1500
+ {
1501
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
1502
+ "shape": [
1503
+ 1024,
1504
+ 32
1505
+ ],
1506
+ "dtype": "float16",
1507
+ "format": "f32-to-bf16",
1508
+ "nbytes": 65536,
1509
+ "byteOffset": 11239424
1510
+ },
1511
+ {
1512
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
1513
+ "shape": [
1514
+ 1024,
1515
+ 128
1516
+ ],
1517
+ "dtype": "uint32",
1518
+ "format": "f32-to-bf16",
1519
+ "nbytes": 524288,
1520
+ "byteOffset": 11304960
1521
+ },
1522
+ {
1523
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
1524
+ "shape": [
1525
+ 1024,
1526
+ 32
1527
+ ],
1528
+ "dtype": "float16",
1529
+ "format": "f32-to-bf16",
1530
+ "nbytes": 65536,
1531
+ "byteOffset": 11829248
1532
+ },
1533
+ {
1534
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
1535
+ "shape": [
1536
+ 1024,
1537
+ 128
1538
+ ],
1539
+ "dtype": "uint32",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 524288,
1542
+ "byteOffset": 11894784
1543
+ },
1544
+ {
1545
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
1546
+ "shape": [
1547
+ 1024,
1548
+ 32
1549
+ ],
1550
+ "dtype": "float16",
1551
+ "format": "f32-to-bf16",
1552
+ "nbytes": 65536,
1553
+ "byteOffset": 12419072
1554
+ },
1555
+ {
1556
+ "name": "model.layers.0.moe.gate.weight",
1557
+ "shape": [
1558
+ 4,
1559
+ 1024
1560
+ ],
1561
+ "dtype": "float16",
1562
+ "format": "f32-to-bf16",
1563
+ "nbytes": 8192,
1564
+ "byteOffset": 12484608
1565
+ },
1566
+ {
1567
+ "name": "model.layers.1.moe.gate.weight",
1568
+ "shape": [
1569
+ 4,
1570
+ 1024
1571
+ ],
1572
+ "dtype": "float16",
1573
+ "format": "f32-to-bf16",
1574
+ "nbytes": 8192,
1575
+ "byteOffset": 12492800
1576
+ },
1577
+ {
1578
+ "name": "model.layers.2.moe.gate.weight",
1579
+ "shape": [
1580
+ 4,
1581
+ 1024
1582
+ ],
1583
+ "dtype": "float16",
1584
+ "format": "f32-to-bf16",
1585
+ "nbytes": 8192,
1586
+ "byteOffset": 12500992
1587
+ },
1588
+ {
1589
+ "name": "model.layers.3.moe.gate.weight",
1590
+ "shape": [
1591
+ 4,
1592
+ 1024
1593
+ ],
1594
+ "dtype": "float16",
1595
+ "format": "f32-to-bf16",
1596
+ "nbytes": 8192,
1597
+ "byteOffset": 12509184
1598
+ },
1599
+ {
1600
+ "name": "model.layers.4.moe.gate.weight",
1601
+ "shape": [
1602
+ 4,
1603
+ 1024
1604
+ ],
1605
+ "dtype": "float16",
1606
+ "format": "f32-to-bf16",
1607
+ "nbytes": 8192,
1608
+ "byteOffset": 12517376
1609
+ },
1610
+ {
1611
+ "name": "model.layers.5.moe.gate.weight",
1612
+ "shape": [
1613
+ 4,
1614
+ 1024
1615
+ ],
1616
+ "dtype": "float16",
1617
+ "format": "f32-to-bf16",
1618
+ "nbytes": 8192,
1619
+ "byteOffset": 12525568
1620
+ },
1621
+ {
1622
+ "name": "model.layers.6.moe.gate.weight",
1623
+ "shape": [
1624
+ 4,
1625
+ 1024
1626
+ ],
1627
+ "dtype": "float16",
1628
+ "format": "f32-to-bf16",
1629
+ "nbytes": 8192,
1630
+ "byteOffset": 12533760
1631
+ },
1632
+ {
1633
+ "name": "model.layers.7.moe.gate.weight",
1634
+ "shape": [
1635
+ 4,
1636
+ 1024
1637
+ ],
1638
+ "dtype": "float16",
1639
+ "format": "f32-to-bf16",
1640
+ "nbytes": 8192,
1641
+ "byteOffset": 12541952
1642
+ },
1643
+ {
1644
+ "name": "model.layers.8.moe.gate.weight",
1645
+ "shape": [
1646
+ 4,
1647
+ 1024
1648
+ ],
1649
+ "dtype": "float16",
1650
+ "format": "f32-to-bf16",
1651
+ "nbytes": 8192,
1652
+ "byteOffset": 12550144
1653
+ },
1654
+ {
1655
+ "name": "model.layers.9.moe.gate.weight",
1656
+ "shape": [
1657
+ 4,
1658
+ 1024
1659
+ ],
1660
+ "dtype": "float16",
1661
+ "format": "f32-to-bf16",
1662
+ "nbytes": 8192,
1663
+ "byteOffset": 12558336
1664
+ },
1665
+ {
1666
+ "name": "model.layers.10.moe.gate.weight",
1667
+ "shape": [
1668
+ 4,
1669
+ 1024
1670
+ ],
1671
+ "dtype": "float16",
1672
+ "format": "f32-to-bf16",
1673
+ "nbytes": 8192,
1674
+ "byteOffset": 12566528
1675
+ },
1676
+ {
1677
+ "name": "model.layers.11.moe.gate.weight",
1678
+ "shape": [
1679
+ 4,
1680
+ 1024
1681
+ ],
1682
+ "dtype": "float16",
1683
+ "format": "f32-to-bf16",
1684
+ "nbytes": 8192,
1685
+ "byteOffset": 12574720
1686
+ }
1687
+ ],
1688
+ "md5sum": "c81b7f0ed2e3e42b10a9de6ffc778b2e"
1689
+ }
1690
+ ]
1691
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb25219c93d4bcfe618ce8aa2a61b68437856e2e534671b2f740ce60675ee75
3
+ size 18435776
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13cb1b6916286166c8a791cbcfcd2cc9415328ce499cc15d61d70704e450d919
3
+ size 16777216
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dcbcaf25db8192b6b45b2d3b6dc4a0a07cd25109f206c354d27836e1d92fd6f
3
+ size 16777216