diff --git "a/quantization_config.json" "b/quantization_config.json" new file mode 100644--- /dev/null +++ "b/quantization_config.json" @@ -0,0 +1,17505 @@ +{ + "quant_method": "exl3", + "version": "0.0.6", + "bits": 6.0, + "head_bits": 6, + "calibration": { + "rows": 100, + "cols": 2048 + }, + "out_scales": "auto", + "tensor_storage": { + "model.embed_tokens": { + "stored_tensors": { + "model.embed_tokens.weight": { + "shape": [ + 131072, + 8192 + ], + "n_bytes": 2147483648, + "dtype": "torch.float16" + } + } + }, + "model.layers.0.attention_layernorm": { + "stored_tensors": { + "model.layers.0.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.0.self_attn.q_proj": { + "stored_tensors": { + "model.layers.0.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.0.self_attn.k_proj": { + "stored_tensors": { + "model.layers.0.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.0.self_attn.v_proj": { + "stored_tensors": { + "model.layers.0.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.0.self_attn.o_proj": { + "stored_tensors": { + "model.layers.0.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.0.self_attn.q_norm": { + "stored_tensors": { + "model.layers.0.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.0.self_attn.k_norm": { + "stored_tensors": { + "model.layers.0.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.0.feedforward_layernorm": { + "stored_tensors": { + "model.layers.0.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.0.mlp.up_proj": { + "stored_tensors": { + "model.layers.0.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.0.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.0.mlp.down_proj": { + "stored_tensors": { + "model.layers.0.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.0.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.0.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.1.attention_layernorm": { + "stored_tensors": { + "model.layers.1.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.1.self_attn.q_proj": { + "stored_tensors": { + "model.layers.1.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.1.self_attn.k_proj": { + "stored_tensors": { + "model.layers.1.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.1.self_attn.v_proj": { + "stored_tensors": { + "model.layers.1.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.1.self_attn.o_proj": { + "stored_tensors": { + "model.layers.1.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.1.self_attn.q_norm": { + "stored_tensors": { + "model.layers.1.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.1.self_attn.k_norm": { + "stored_tensors": { + "model.layers.1.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.1.feedforward_layernorm": { + "stored_tensors": { + "model.layers.1.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.1.mlp.up_proj": { + "stored_tensors": { + "model.layers.1.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.1.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.1.mlp.down_proj": { + "stored_tensors": { + "model.layers.1.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.1.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.1.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.2.attention_layernorm": { + "stored_tensors": { + "model.layers.2.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.2.self_attn.q_proj": { + "stored_tensors": { + "model.layers.2.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.2.self_attn.k_proj": { + "stored_tensors": { + "model.layers.2.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.2.self_attn.v_proj": { + "stored_tensors": { + "model.layers.2.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.2.self_attn.o_proj": { + "stored_tensors": { + "model.layers.2.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.2.self_attn.q_norm": { + "stored_tensors": { + "model.layers.2.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.2.self_attn.k_norm": { + "stored_tensors": { + "model.layers.2.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.2.feedforward_layernorm": { + "stored_tensors": { + "model.layers.2.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.2.mlp.up_proj": { + "stored_tensors": { + "model.layers.2.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.2.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.2.mlp.down_proj": { + "stored_tensors": { + "model.layers.2.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.2.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.2.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.3.attention_layernorm": { + "stored_tensors": { + "model.layers.3.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.3.self_attn.q_proj": { + "stored_tensors": { + "model.layers.3.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.3.self_attn.k_proj": { + "stored_tensors": { + "model.layers.3.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.3.self_attn.v_proj": { + "stored_tensors": { + "model.layers.3.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.3.self_attn.o_proj": { + "stored_tensors": { + "model.layers.3.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.3.self_attn.q_norm": { + "stored_tensors": { + "model.layers.3.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.3.self_attn.k_norm": { + "stored_tensors": { + "model.layers.3.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.3.feedforward_layernorm": { + "stored_tensors": { + "model.layers.3.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.3.mlp.up_proj": { + "stored_tensors": { + "model.layers.3.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.3.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.3.mlp.down_proj": { + "stored_tensors": { + "model.layers.3.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.3.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.3.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.4.attention_layernorm": { + "stored_tensors": { + "model.layers.4.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.4.self_attn.q_proj": { + "stored_tensors": { + "model.layers.4.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.4.self_attn.k_proj": { + "stored_tensors": { + "model.layers.4.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.4.self_attn.v_proj": { + "stored_tensors": { + "model.layers.4.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.4.self_attn.o_proj": { + "stored_tensors": { + "model.layers.4.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.4.self_attn.q_norm": { + "stored_tensors": { + "model.layers.4.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.4.self_attn.k_norm": { + "stored_tensors": { + "model.layers.4.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.4.feedforward_layernorm": { + "stored_tensors": { + "model.layers.4.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.4.mlp.up_proj": { + "stored_tensors": { + "model.layers.4.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.4.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.4.mlp.down_proj": { + "stored_tensors": { + "model.layers.4.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.4.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.4.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.5.attention_layernorm": { + "stored_tensors": { + "model.layers.5.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.5.self_attn.q_proj": { + "stored_tensors": { + "model.layers.5.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.5.self_attn.k_proj": { + "stored_tensors": { + "model.layers.5.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.5.self_attn.v_proj": { + "stored_tensors": { + "model.layers.5.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.5.self_attn.o_proj": { + "stored_tensors": { + "model.layers.5.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.5.self_attn.q_norm": { + "stored_tensors": { + "model.layers.5.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.5.self_attn.k_norm": { + "stored_tensors": { + "model.layers.5.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.5.feedforward_layernorm": { + "stored_tensors": { + "model.layers.5.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.5.mlp.up_proj": { + "stored_tensors": { + "model.layers.5.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.5.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.5.mlp.down_proj": { + "stored_tensors": { + "model.layers.5.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.5.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.5.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.6.attention_layernorm": { + "stored_tensors": { + "model.layers.6.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.6.self_attn.q_proj": { + "stored_tensors": { + "model.layers.6.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.6.self_attn.k_proj": { + "stored_tensors": { + "model.layers.6.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.6.self_attn.v_proj": { + "stored_tensors": { + "model.layers.6.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.6.self_attn.o_proj": { + "stored_tensors": { + "model.layers.6.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.6.self_attn.q_norm": { + "stored_tensors": { + "model.layers.6.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.6.self_attn.k_norm": { + "stored_tensors": { + "model.layers.6.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.6.feedforward_layernorm": { + "stored_tensors": { + "model.layers.6.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.6.mlp.up_proj": { + "stored_tensors": { + "model.layers.6.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.6.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.6.mlp.down_proj": { + "stored_tensors": { + "model.layers.6.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.6.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.6.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.7.attention_layernorm": { + "stored_tensors": { + "model.layers.7.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.7.self_attn.q_proj": { + "stored_tensors": { + "model.layers.7.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.7.self_attn.k_proj": { + "stored_tensors": { + "model.layers.7.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.7.self_attn.v_proj": { + "stored_tensors": { + "model.layers.7.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.7.self_attn.o_proj": { + "stored_tensors": { + "model.layers.7.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.7.self_attn.q_norm": { + "stored_tensors": { + "model.layers.7.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.7.self_attn.k_norm": { + "stored_tensors": { + "model.layers.7.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.7.feedforward_layernorm": { + "stored_tensors": { + "model.layers.7.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.7.mlp.up_proj": { + "stored_tensors": { + "model.layers.7.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.7.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.7.mlp.down_proj": { + "stored_tensors": { + "model.layers.7.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.7.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.7.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.8.attention_layernorm": { + "stored_tensors": { + "model.layers.8.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.8.self_attn.q_proj": { + "stored_tensors": { + "model.layers.8.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.8.self_attn.k_proj": { + "stored_tensors": { + "model.layers.8.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.8.self_attn.v_proj": { + "stored_tensors": { + "model.layers.8.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.8.self_attn.o_proj": { + "stored_tensors": { + "model.layers.8.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.8.self_attn.q_norm": { + "stored_tensors": { + "model.layers.8.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.8.self_attn.k_norm": { + "stored_tensors": { + "model.layers.8.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.8.feedforward_layernorm": { + "stored_tensors": { + "model.layers.8.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.8.mlp.up_proj": { + "stored_tensors": { + "model.layers.8.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.8.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.8.mlp.down_proj": { + "stored_tensors": { + "model.layers.8.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.8.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.8.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.9.attention_layernorm": { + "stored_tensors": { + "model.layers.9.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.9.self_attn.q_proj": { + "stored_tensors": { + "model.layers.9.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.9.self_attn.k_proj": { + "stored_tensors": { + "model.layers.9.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.9.self_attn.v_proj": { + "stored_tensors": { + "model.layers.9.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.9.self_attn.o_proj": { + "stored_tensors": { + "model.layers.9.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.9.self_attn.q_norm": { + "stored_tensors": { + "model.layers.9.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.9.self_attn.k_norm": { + "stored_tensors": { + "model.layers.9.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.9.feedforward_layernorm": { + "stored_tensors": { + "model.layers.9.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.9.mlp.up_proj": { + "stored_tensors": { + "model.layers.9.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.9.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.9.mlp.down_proj": { + "stored_tensors": { + "model.layers.9.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.9.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.9.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.10.attention_layernorm": { + "stored_tensors": { + "model.layers.10.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.10.self_attn.q_proj": { + "stored_tensors": { + "model.layers.10.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.10.self_attn.k_proj": { + "stored_tensors": { + "model.layers.10.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.10.self_attn.v_proj": { + "stored_tensors": { + "model.layers.10.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.10.self_attn.o_proj": { + "stored_tensors": { + "model.layers.10.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.10.self_attn.q_norm": { + "stored_tensors": { + "model.layers.10.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.10.self_attn.k_norm": { + "stored_tensors": { + "model.layers.10.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.10.feedforward_layernorm": { + "stored_tensors": { + "model.layers.10.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.10.mlp.up_proj": { + "stored_tensors": { + "model.layers.10.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.10.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.10.mlp.down_proj": { + "stored_tensors": { + "model.layers.10.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.10.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.10.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.11.attention_layernorm": { + "stored_tensors": { + "model.layers.11.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.11.self_attn.q_proj": { + "stored_tensors": { + "model.layers.11.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.11.self_attn.k_proj": { + "stored_tensors": { + "model.layers.11.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.11.self_attn.v_proj": { + "stored_tensors": { + "model.layers.11.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.11.self_attn.o_proj": { + "stored_tensors": { + "model.layers.11.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.11.self_attn.q_norm": { + "stored_tensors": { + "model.layers.11.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.11.self_attn.k_norm": { + "stored_tensors": { + "model.layers.11.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.11.feedforward_layernorm": { + "stored_tensors": { + "model.layers.11.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.11.mlp.up_proj": { + "stored_tensors": { + "model.layers.11.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.11.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.11.mlp.down_proj": { + "stored_tensors": { + "model.layers.11.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.11.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.11.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.12.attention_layernorm": { + "stored_tensors": { + "model.layers.12.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.12.self_attn.q_proj": { + "stored_tensors": { + "model.layers.12.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.12.self_attn.k_proj": { + "stored_tensors": { + "model.layers.12.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.12.self_attn.v_proj": { + "stored_tensors": { + "model.layers.12.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.12.self_attn.o_proj": { + "stored_tensors": { + "model.layers.12.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.12.self_attn.q_norm": { + "stored_tensors": { + "model.layers.12.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.12.self_attn.k_norm": { + "stored_tensors": { + "model.layers.12.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.12.feedforward_layernorm": { + "stored_tensors": { + "model.layers.12.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.12.mlp.up_proj": { + "stored_tensors": { + "model.layers.12.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.12.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.12.mlp.down_proj": { + "stored_tensors": { + "model.layers.12.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.12.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.12.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.13.attention_layernorm": { + "stored_tensors": { + "model.layers.13.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.13.self_attn.q_proj": { + "stored_tensors": { + "model.layers.13.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.13.self_attn.k_proj": { + "stored_tensors": { + "model.layers.13.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.13.self_attn.v_proj": { + "stored_tensors": { + "model.layers.13.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.13.self_attn.o_proj": { + "stored_tensors": { + "model.layers.13.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.13.self_attn.q_norm": { + "stored_tensors": { + "model.layers.13.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.13.self_attn.k_norm": { + "stored_tensors": { + "model.layers.13.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.13.feedforward_layernorm": { + "stored_tensors": { + "model.layers.13.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.13.mlp.up_proj": { + "stored_tensors": { + "model.layers.13.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.13.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.13.mlp.down_proj": { + "stored_tensors": { + "model.layers.13.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.13.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.13.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.14.attention_layernorm": { + "stored_tensors": { + "model.layers.14.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.14.self_attn.q_proj": { + "stored_tensors": { + "model.layers.14.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.14.self_attn.k_proj": { + "stored_tensors": { + "model.layers.14.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.14.self_attn.v_proj": { + "stored_tensors": { + "model.layers.14.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.14.self_attn.o_proj": { + "stored_tensors": { + "model.layers.14.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.14.self_attn.q_norm": { + "stored_tensors": { + "model.layers.14.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.14.self_attn.k_norm": { + "stored_tensors": { + "model.layers.14.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.14.feedforward_layernorm": { + "stored_tensors": { + "model.layers.14.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.14.mlp.up_proj": { + "stored_tensors": { + "model.layers.14.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.14.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.14.mlp.down_proj": { + "stored_tensors": { + "model.layers.14.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.14.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.14.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.15.attention_layernorm": { + "stored_tensors": { + "model.layers.15.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.15.self_attn.q_proj": { + "stored_tensors": { + "model.layers.15.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.15.self_attn.k_proj": { + "stored_tensors": { + "model.layers.15.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.15.self_attn.v_proj": { + "stored_tensors": { + "model.layers.15.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.15.self_attn.o_proj": { + "stored_tensors": { + "model.layers.15.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.15.self_attn.q_norm": { + "stored_tensors": { + "model.layers.15.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.15.self_attn.k_norm": { + "stored_tensors": { + "model.layers.15.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.15.feedforward_layernorm": { + "stored_tensors": { + "model.layers.15.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.15.mlp.up_proj": { + "stored_tensors": { + "model.layers.15.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.15.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.15.mlp.down_proj": { + "stored_tensors": { + "model.layers.15.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.15.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.15.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.16.attention_layernorm": { + "stored_tensors": { + "model.layers.16.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.16.self_attn.q_proj": { + "stored_tensors": { + "model.layers.16.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.16.self_attn.k_proj": { + "stored_tensors": { + "model.layers.16.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.16.self_attn.v_proj": { + "stored_tensors": { + "model.layers.16.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.16.self_attn.o_proj": { + "stored_tensors": { + "model.layers.16.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.16.self_attn.q_norm": { + "stored_tensors": { + "model.layers.16.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.16.self_attn.k_norm": { + "stored_tensors": { + "model.layers.16.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.16.feedforward_layernorm": { + "stored_tensors": { + "model.layers.16.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.16.mlp.up_proj": { + "stored_tensors": { + "model.layers.16.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.16.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.16.mlp.down_proj": { + "stored_tensors": { + "model.layers.16.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.16.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.16.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.17.attention_layernorm": { + "stored_tensors": { + "model.layers.17.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.17.self_attn.q_proj": { + "stored_tensors": { + "model.layers.17.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.17.self_attn.k_proj": { + "stored_tensors": { + "model.layers.17.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.17.self_attn.v_proj": { + "stored_tensors": { + "model.layers.17.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.17.self_attn.o_proj": { + "stored_tensors": { + "model.layers.17.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.17.self_attn.q_norm": { + "stored_tensors": { + "model.layers.17.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.17.self_attn.k_norm": { + "stored_tensors": { + "model.layers.17.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.17.feedforward_layernorm": { + "stored_tensors": { + "model.layers.17.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.17.mlp.up_proj": { + "stored_tensors": { + "model.layers.17.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.17.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.17.mlp.down_proj": { + "stored_tensors": { + "model.layers.17.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.17.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.17.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.18.attention_layernorm": { + "stored_tensors": { + "model.layers.18.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.18.self_attn.q_proj": { + "stored_tensors": { + "model.layers.18.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.18.self_attn.k_proj": { + "stored_tensors": { + "model.layers.18.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.18.self_attn.v_proj": { + "stored_tensors": { + "model.layers.18.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.18.self_attn.o_proj": { + "stored_tensors": { + "model.layers.18.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.18.self_attn.q_norm": { + "stored_tensors": { + "model.layers.18.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.18.self_attn.k_norm": { + "stored_tensors": { + "model.layers.18.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.18.feedforward_layernorm": { + "stored_tensors": { + "model.layers.18.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.18.mlp.up_proj": { + "stored_tensors": { + "model.layers.18.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.18.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.18.mlp.down_proj": { + "stored_tensors": { + "model.layers.18.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.18.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.18.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.19.attention_layernorm": { + "stored_tensors": { + "model.layers.19.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.19.self_attn.q_proj": { + "stored_tensors": { + "model.layers.19.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.19.self_attn.k_proj": { + "stored_tensors": { + "model.layers.19.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.19.self_attn.v_proj": { + "stored_tensors": { + "model.layers.19.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.19.self_attn.o_proj": { + "stored_tensors": { + "model.layers.19.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.19.self_attn.q_norm": { + "stored_tensors": { + "model.layers.19.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.19.self_attn.k_norm": { + "stored_tensors": { + "model.layers.19.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.19.feedforward_layernorm": { + "stored_tensors": { + "model.layers.19.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.19.mlp.up_proj": { + "stored_tensors": { + "model.layers.19.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.19.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.19.mlp.down_proj": { + "stored_tensors": { + "model.layers.19.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.19.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.19.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.20.attention_layernorm": { + "stored_tensors": { + "model.layers.20.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.20.self_attn.q_proj": { + "stored_tensors": { + "model.layers.20.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.20.self_attn.k_proj": { + "stored_tensors": { + "model.layers.20.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.20.self_attn.v_proj": { + "stored_tensors": { + "model.layers.20.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.20.self_attn.o_proj": { + "stored_tensors": { + "model.layers.20.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.20.self_attn.q_norm": { + "stored_tensors": { + "model.layers.20.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.20.self_attn.k_norm": { + "stored_tensors": { + "model.layers.20.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.20.feedforward_layernorm": { + "stored_tensors": { + "model.layers.20.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.20.mlp.up_proj": { + "stored_tensors": { + "model.layers.20.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.20.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.20.mlp.down_proj": { + "stored_tensors": { + "model.layers.20.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.20.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.20.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.21.attention_layernorm": { + "stored_tensors": { + "model.layers.21.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.21.self_attn.q_proj": { + "stored_tensors": { + "model.layers.21.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.21.self_attn.k_proj": { + "stored_tensors": { + "model.layers.21.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.21.self_attn.v_proj": { + "stored_tensors": { + "model.layers.21.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.21.self_attn.o_proj": { + "stored_tensors": { + "model.layers.21.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.21.self_attn.q_norm": { + "stored_tensors": { + "model.layers.21.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.21.self_attn.k_norm": { + "stored_tensors": { + "model.layers.21.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.21.feedforward_layernorm": { + "stored_tensors": { + "model.layers.21.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.21.mlp.up_proj": { + "stored_tensors": { + "model.layers.21.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.21.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.21.mlp.down_proj": { + "stored_tensors": { + "model.layers.21.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.21.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.21.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.22.attention_layernorm": { + "stored_tensors": { + "model.layers.22.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.22.self_attn.q_proj": { + "stored_tensors": { + "model.layers.22.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.22.self_attn.k_proj": { + "stored_tensors": { + "model.layers.22.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.22.self_attn.v_proj": { + "stored_tensors": { + "model.layers.22.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.22.self_attn.o_proj": { + "stored_tensors": { + "model.layers.22.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.22.self_attn.q_norm": { + "stored_tensors": { + "model.layers.22.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.22.self_attn.k_norm": { + "stored_tensors": { + "model.layers.22.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.22.feedforward_layernorm": { + "stored_tensors": { + "model.layers.22.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.22.mlp.up_proj": { + "stored_tensors": { + "model.layers.22.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.22.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.22.mlp.down_proj": { + "stored_tensors": { + "model.layers.22.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.22.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.22.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.23.attention_layernorm": { + "stored_tensors": { + "model.layers.23.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.23.self_attn.q_proj": { + "stored_tensors": { + "model.layers.23.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.23.self_attn.k_proj": { + "stored_tensors": { + "model.layers.23.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.23.self_attn.v_proj": { + "stored_tensors": { + "model.layers.23.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.23.self_attn.o_proj": { + "stored_tensors": { + "model.layers.23.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.23.self_attn.q_norm": { + "stored_tensors": { + "model.layers.23.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.23.self_attn.k_norm": { + "stored_tensors": { + "model.layers.23.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.23.feedforward_layernorm": { + "stored_tensors": { + "model.layers.23.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.23.mlp.up_proj": { + "stored_tensors": { + "model.layers.23.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.23.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.23.mlp.down_proj": { + "stored_tensors": { + "model.layers.23.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.23.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.23.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.24.attention_layernorm": { + "stored_tensors": { + "model.layers.24.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.24.self_attn.q_proj": { + "stored_tensors": { + "model.layers.24.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.24.self_attn.k_proj": { + "stored_tensors": { + "model.layers.24.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.24.self_attn.v_proj": { + "stored_tensors": { + "model.layers.24.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.24.self_attn.o_proj": { + "stored_tensors": { + "model.layers.24.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.24.self_attn.q_norm": { + "stored_tensors": { + "model.layers.24.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.24.self_attn.k_norm": { + "stored_tensors": { + "model.layers.24.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.24.feedforward_layernorm": { + "stored_tensors": { + "model.layers.24.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.24.mlp.up_proj": { + "stored_tensors": { + "model.layers.24.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.24.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.24.mlp.down_proj": { + "stored_tensors": { + "model.layers.24.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.24.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.24.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.25.attention_layernorm": { + "stored_tensors": { + "model.layers.25.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.25.self_attn.q_proj": { + "stored_tensors": { + "model.layers.25.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.25.self_attn.k_proj": { + "stored_tensors": { + "model.layers.25.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.25.self_attn.v_proj": { + "stored_tensors": { + "model.layers.25.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.25.self_attn.o_proj": { + "stored_tensors": { + "model.layers.25.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.25.self_attn.q_norm": { + "stored_tensors": { + "model.layers.25.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.25.self_attn.k_norm": { + "stored_tensors": { + "model.layers.25.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.25.feedforward_layernorm": { + "stored_tensors": { + "model.layers.25.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.25.mlp.up_proj": { + "stored_tensors": { + "model.layers.25.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.25.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.25.mlp.down_proj": { + "stored_tensors": { + "model.layers.25.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.25.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.25.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.26.attention_layernorm": { + "stored_tensors": { + "model.layers.26.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.26.self_attn.q_proj": { + "stored_tensors": { + "model.layers.26.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.26.self_attn.k_proj": { + "stored_tensors": { + "model.layers.26.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.26.self_attn.v_proj": { + "stored_tensors": { + "model.layers.26.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.26.self_attn.o_proj": { + "stored_tensors": { + "model.layers.26.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.26.self_attn.q_norm": { + "stored_tensors": { + "model.layers.26.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.26.self_attn.k_norm": { + "stored_tensors": { + "model.layers.26.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.26.feedforward_layernorm": { + "stored_tensors": { + "model.layers.26.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.26.mlp.up_proj": { + "stored_tensors": { + "model.layers.26.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.26.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.26.mlp.down_proj": { + "stored_tensors": { + "model.layers.26.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.26.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.26.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.27.attention_layernorm": { + "stored_tensors": { + "model.layers.27.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.27.self_attn.q_proj": { + "stored_tensors": { + "model.layers.27.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.27.self_attn.k_proj": { + "stored_tensors": { + "model.layers.27.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.27.self_attn.v_proj": { + "stored_tensors": { + "model.layers.27.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.27.self_attn.o_proj": { + "stored_tensors": { + "model.layers.27.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.27.self_attn.q_norm": { + "stored_tensors": { + "model.layers.27.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.27.self_attn.k_norm": { + "stored_tensors": { + "model.layers.27.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.27.feedforward_layernorm": { + "stored_tensors": { + "model.layers.27.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.27.mlp.up_proj": { + "stored_tensors": { + "model.layers.27.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.27.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.27.mlp.down_proj": { + "stored_tensors": { + "model.layers.27.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.27.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.27.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.28.attention_layernorm": { + "stored_tensors": { + "model.layers.28.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.28.self_attn.q_proj": { + "stored_tensors": { + "model.layers.28.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.28.self_attn.k_proj": { + "stored_tensors": { + "model.layers.28.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.28.self_attn.v_proj": { + "stored_tensors": { + "model.layers.28.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.28.self_attn.o_proj": { + "stored_tensors": { + "model.layers.28.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.28.self_attn.q_norm": { + "stored_tensors": { + "model.layers.28.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.28.self_attn.k_norm": { + "stored_tensors": { + "model.layers.28.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.28.feedforward_layernorm": { + "stored_tensors": { + "model.layers.28.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.28.mlp.up_proj": { + "stored_tensors": { + "model.layers.28.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.28.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.28.mlp.down_proj": { + "stored_tensors": { + "model.layers.28.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.28.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.28.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.29.attention_layernorm": { + "stored_tensors": { + "model.layers.29.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.29.self_attn.q_proj": { + "stored_tensors": { + "model.layers.29.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.29.self_attn.k_proj": { + "stored_tensors": { + "model.layers.29.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.29.self_attn.v_proj": { + "stored_tensors": { + "model.layers.29.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.29.self_attn.o_proj": { + "stored_tensors": { + "model.layers.29.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.29.self_attn.q_norm": { + "stored_tensors": { + "model.layers.29.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.29.self_attn.k_norm": { + "stored_tensors": { + "model.layers.29.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.29.feedforward_layernorm": { + "stored_tensors": { + "model.layers.29.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.29.mlp.up_proj": { + "stored_tensors": { + "model.layers.29.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.29.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.29.mlp.down_proj": { + "stored_tensors": { + "model.layers.29.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.29.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.29.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.30.attention_layernorm": { + "stored_tensors": { + "model.layers.30.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.30.self_attn.q_proj": { + "stored_tensors": { + "model.layers.30.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.30.self_attn.k_proj": { + "stored_tensors": { + "model.layers.30.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.30.self_attn.v_proj": { + "stored_tensors": { + "model.layers.30.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.30.self_attn.o_proj": { + "stored_tensors": { + "model.layers.30.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.30.self_attn.q_norm": { + "stored_tensors": { + "model.layers.30.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.30.self_attn.k_norm": { + "stored_tensors": { + "model.layers.30.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.30.feedforward_layernorm": { + "stored_tensors": { + "model.layers.30.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.30.mlp.up_proj": { + "stored_tensors": { + "model.layers.30.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.30.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.30.mlp.down_proj": { + "stored_tensors": { + "model.layers.30.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.30.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.30.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.31.attention_layernorm": { + "stored_tensors": { + "model.layers.31.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.31.self_attn.q_proj": { + "stored_tensors": { + "model.layers.31.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.31.self_attn.k_proj": { + "stored_tensors": { + "model.layers.31.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.31.self_attn.v_proj": { + "stored_tensors": { + "model.layers.31.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.31.self_attn.o_proj": { + "stored_tensors": { + "model.layers.31.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.31.self_attn.q_norm": { + "stored_tensors": { + "model.layers.31.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.31.self_attn.k_norm": { + "stored_tensors": { + "model.layers.31.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.31.feedforward_layernorm": { + "stored_tensors": { + "model.layers.31.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.31.mlp.up_proj": { + "stored_tensors": { + "model.layers.31.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.31.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.31.mlp.down_proj": { + "stored_tensors": { + "model.layers.31.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.31.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.31.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.32.attention_layernorm": { + "stored_tensors": { + "model.layers.32.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.32.self_attn.q_proj": { + "stored_tensors": { + "model.layers.32.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.32.self_attn.k_proj": { + "stored_tensors": { + "model.layers.32.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.32.self_attn.v_proj": { + "stored_tensors": { + "model.layers.32.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.32.self_attn.o_proj": { + "stored_tensors": { + "model.layers.32.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.32.self_attn.q_norm": { + "stored_tensors": { + "model.layers.32.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.32.self_attn.k_norm": { + "stored_tensors": { + "model.layers.32.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.32.feedforward_layernorm": { + "stored_tensors": { + "model.layers.32.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.32.mlp.up_proj": { + "stored_tensors": { + "model.layers.32.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.32.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.32.mlp.down_proj": { + "stored_tensors": { + "model.layers.32.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.32.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.32.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.33.attention_layernorm": { + "stored_tensors": { + "model.layers.33.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.33.self_attn.q_proj": { + "stored_tensors": { + "model.layers.33.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.33.self_attn.k_proj": { + "stored_tensors": { + "model.layers.33.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.33.self_attn.v_proj": { + "stored_tensors": { + "model.layers.33.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.33.self_attn.o_proj": { + "stored_tensors": { + "model.layers.33.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.33.self_attn.q_norm": { + "stored_tensors": { + "model.layers.33.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.33.self_attn.k_norm": { + "stored_tensors": { + "model.layers.33.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.33.feedforward_layernorm": { + "stored_tensors": { + "model.layers.33.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.33.mlp.up_proj": { + "stored_tensors": { + "model.layers.33.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.33.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.33.mlp.down_proj": { + "stored_tensors": { + "model.layers.33.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.33.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.33.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.34.attention_layernorm": { + "stored_tensors": { + "model.layers.34.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.34.self_attn.q_proj": { + "stored_tensors": { + "model.layers.34.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.34.self_attn.k_proj": { + "stored_tensors": { + "model.layers.34.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.34.self_attn.v_proj": { + "stored_tensors": { + "model.layers.34.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.34.self_attn.o_proj": { + "stored_tensors": { + "model.layers.34.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.34.self_attn.q_norm": { + "stored_tensors": { + "model.layers.34.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.34.self_attn.k_norm": { + "stored_tensors": { + "model.layers.34.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.34.feedforward_layernorm": { + "stored_tensors": { + "model.layers.34.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.34.mlp.up_proj": { + "stored_tensors": { + "model.layers.34.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.34.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.34.mlp.down_proj": { + "stored_tensors": { + "model.layers.34.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.34.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.34.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.35.attention_layernorm": { + "stored_tensors": { + "model.layers.35.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.35.self_attn.q_proj": { + "stored_tensors": { + "model.layers.35.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.35.self_attn.k_proj": { + "stored_tensors": { + "model.layers.35.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.35.self_attn.v_proj": { + "stored_tensors": { + "model.layers.35.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.35.self_attn.o_proj": { + "stored_tensors": { + "model.layers.35.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.35.self_attn.q_norm": { + "stored_tensors": { + "model.layers.35.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.35.self_attn.k_norm": { + "stored_tensors": { + "model.layers.35.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.35.feedforward_layernorm": { + "stored_tensors": { + "model.layers.35.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.35.mlp.up_proj": { + "stored_tensors": { + "model.layers.35.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.35.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.35.mlp.down_proj": { + "stored_tensors": { + "model.layers.35.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.35.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.35.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.36.attention_layernorm": { + "stored_tensors": { + "model.layers.36.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.36.self_attn.q_proj": { + "stored_tensors": { + "model.layers.36.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.36.self_attn.k_proj": { + "stored_tensors": { + "model.layers.36.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.36.self_attn.v_proj": { + "stored_tensors": { + "model.layers.36.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.36.self_attn.o_proj": { + "stored_tensors": { + "model.layers.36.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.36.self_attn.q_norm": { + "stored_tensors": { + "model.layers.36.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.36.self_attn.k_norm": { + "stored_tensors": { + "model.layers.36.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.36.feedforward_layernorm": { + "stored_tensors": { + "model.layers.36.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.36.mlp.up_proj": { + "stored_tensors": { + "model.layers.36.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.36.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.36.mlp.down_proj": { + "stored_tensors": { + "model.layers.36.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.36.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.36.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.37.attention_layernorm": { + "stored_tensors": { + "model.layers.37.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.37.self_attn.q_proj": { + "stored_tensors": { + "model.layers.37.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.37.self_attn.k_proj": { + "stored_tensors": { + "model.layers.37.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.37.self_attn.v_proj": { + "stored_tensors": { + "model.layers.37.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.37.self_attn.o_proj": { + "stored_tensors": { + "model.layers.37.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.37.self_attn.q_norm": { + "stored_tensors": { + "model.layers.37.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.37.self_attn.k_norm": { + "stored_tensors": { + "model.layers.37.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.37.feedforward_layernorm": { + "stored_tensors": { + "model.layers.37.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.37.mlp.up_proj": { + "stored_tensors": { + "model.layers.37.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.37.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.37.mlp.down_proj": { + "stored_tensors": { + "model.layers.37.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.37.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.37.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.38.attention_layernorm": { + "stored_tensors": { + "model.layers.38.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.38.self_attn.q_proj": { + "stored_tensors": { + "model.layers.38.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.38.self_attn.k_proj": { + "stored_tensors": { + "model.layers.38.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.38.self_attn.v_proj": { + "stored_tensors": { + "model.layers.38.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.38.self_attn.o_proj": { + "stored_tensors": { + "model.layers.38.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.38.self_attn.q_norm": { + "stored_tensors": { + "model.layers.38.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.38.self_attn.k_norm": { + "stored_tensors": { + "model.layers.38.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.38.feedforward_layernorm": { + "stored_tensors": { + "model.layers.38.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.38.mlp.up_proj": { + "stored_tensors": { + "model.layers.38.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.38.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.38.mlp.down_proj": { + "stored_tensors": { + "model.layers.38.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.38.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.38.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.39.attention_layernorm": { + "stored_tensors": { + "model.layers.39.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.39.self_attn.q_proj": { + "stored_tensors": { + "model.layers.39.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.39.self_attn.k_proj": { + "stored_tensors": { + "model.layers.39.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.39.self_attn.v_proj": { + "stored_tensors": { + "model.layers.39.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.39.self_attn.o_proj": { + "stored_tensors": { + "model.layers.39.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.39.self_attn.q_norm": { + "stored_tensors": { + "model.layers.39.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.39.self_attn.k_norm": { + "stored_tensors": { + "model.layers.39.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.39.feedforward_layernorm": { + "stored_tensors": { + "model.layers.39.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.39.mlp.up_proj": { + "stored_tensors": { + "model.layers.39.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.39.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.39.mlp.down_proj": { + "stored_tensors": { + "model.layers.39.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.39.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.39.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.40.attention_layernorm": { + "stored_tensors": { + "model.layers.40.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.40.self_attn.q_proj": { + "stored_tensors": { + "model.layers.40.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.40.self_attn.k_proj": { + "stored_tensors": { + "model.layers.40.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.40.self_attn.v_proj": { + "stored_tensors": { + "model.layers.40.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.40.self_attn.o_proj": { + "stored_tensors": { + "model.layers.40.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.40.self_attn.q_norm": { + "stored_tensors": { + "model.layers.40.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.40.self_attn.k_norm": { + "stored_tensors": { + "model.layers.40.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.40.feedforward_layernorm": { + "stored_tensors": { + "model.layers.40.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.40.mlp.up_proj": { + "stored_tensors": { + "model.layers.40.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.40.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.40.mlp.down_proj": { + "stored_tensors": { + "model.layers.40.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.40.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.40.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.41.attention_layernorm": { + "stored_tensors": { + "model.layers.41.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.41.self_attn.q_proj": { + "stored_tensors": { + "model.layers.41.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.41.self_attn.k_proj": { + "stored_tensors": { + "model.layers.41.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.41.self_attn.v_proj": { + "stored_tensors": { + "model.layers.41.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.41.self_attn.o_proj": { + "stored_tensors": { + "model.layers.41.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.41.self_attn.q_norm": { + "stored_tensors": { + "model.layers.41.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.41.self_attn.k_norm": { + "stored_tensors": { + "model.layers.41.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.41.feedforward_layernorm": { + "stored_tensors": { + "model.layers.41.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.41.mlp.up_proj": { + "stored_tensors": { + "model.layers.41.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.41.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.41.mlp.down_proj": { + "stored_tensors": { + "model.layers.41.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.41.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.41.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.42.attention_layernorm": { + "stored_tensors": { + "model.layers.42.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.42.self_attn.q_proj": { + "stored_tensors": { + "model.layers.42.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.42.self_attn.k_proj": { + "stored_tensors": { + "model.layers.42.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.42.self_attn.v_proj": { + "stored_tensors": { + "model.layers.42.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.42.self_attn.o_proj": { + "stored_tensors": { + "model.layers.42.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.42.self_attn.q_norm": { + "stored_tensors": { + "model.layers.42.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.42.self_attn.k_norm": { + "stored_tensors": { + "model.layers.42.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.42.feedforward_layernorm": { + "stored_tensors": { + "model.layers.42.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.42.mlp.up_proj": { + "stored_tensors": { + "model.layers.42.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.42.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.42.mlp.down_proj": { + "stored_tensors": { + "model.layers.42.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.42.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.42.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.43.attention_layernorm": { + "stored_tensors": { + "model.layers.43.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.43.self_attn.q_proj": { + "stored_tensors": { + "model.layers.43.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.43.self_attn.k_proj": { + "stored_tensors": { + "model.layers.43.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.43.self_attn.v_proj": { + "stored_tensors": { + "model.layers.43.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.43.self_attn.o_proj": { + "stored_tensors": { + "model.layers.43.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.43.self_attn.q_norm": { + "stored_tensors": { + "model.layers.43.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.43.self_attn.k_norm": { + "stored_tensors": { + "model.layers.43.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.43.feedforward_layernorm": { + "stored_tensors": { + "model.layers.43.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.43.mlp.up_proj": { + "stored_tensors": { + "model.layers.43.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.43.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.43.mlp.down_proj": { + "stored_tensors": { + "model.layers.43.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.43.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.43.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.44.attention_layernorm": { + "stored_tensors": { + "model.layers.44.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.44.self_attn.q_proj": { + "stored_tensors": { + "model.layers.44.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.44.self_attn.k_proj": { + "stored_tensors": { + "model.layers.44.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.44.self_attn.v_proj": { + "stored_tensors": { + "model.layers.44.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.44.self_attn.o_proj": { + "stored_tensors": { + "model.layers.44.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.44.self_attn.q_norm": { + "stored_tensors": { + "model.layers.44.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.44.self_attn.k_norm": { + "stored_tensors": { + "model.layers.44.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.44.feedforward_layernorm": { + "stored_tensors": { + "model.layers.44.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.44.mlp.up_proj": { + "stored_tensors": { + "model.layers.44.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.44.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.44.mlp.down_proj": { + "stored_tensors": { + "model.layers.44.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.44.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.44.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.45.attention_layernorm": { + "stored_tensors": { + "model.layers.45.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.45.self_attn.q_proj": { + "stored_tensors": { + "model.layers.45.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.45.self_attn.k_proj": { + "stored_tensors": { + "model.layers.45.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.45.self_attn.v_proj": { + "stored_tensors": { + "model.layers.45.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.45.self_attn.o_proj": { + "stored_tensors": { + "model.layers.45.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.45.self_attn.q_norm": { + "stored_tensors": { + "model.layers.45.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.45.self_attn.k_norm": { + "stored_tensors": { + "model.layers.45.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.45.feedforward_layernorm": { + "stored_tensors": { + "model.layers.45.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.45.mlp.up_proj": { + "stored_tensors": { + "model.layers.45.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.45.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.45.mlp.down_proj": { + "stored_tensors": { + "model.layers.45.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.45.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.45.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.46.attention_layernorm": { + "stored_tensors": { + "model.layers.46.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.46.self_attn.q_proj": { + "stored_tensors": { + "model.layers.46.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.46.self_attn.k_proj": { + "stored_tensors": { + "model.layers.46.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.46.self_attn.v_proj": { + "stored_tensors": { + "model.layers.46.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.46.self_attn.o_proj": { + "stored_tensors": { + "model.layers.46.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.46.self_attn.q_norm": { + "stored_tensors": { + "model.layers.46.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.46.self_attn.k_norm": { + "stored_tensors": { + "model.layers.46.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.46.feedforward_layernorm": { + "stored_tensors": { + "model.layers.46.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.46.mlp.up_proj": { + "stored_tensors": { + "model.layers.46.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.46.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.46.mlp.down_proj": { + "stored_tensors": { + "model.layers.46.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.46.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.46.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.47.attention_layernorm": { + "stored_tensors": { + "model.layers.47.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.47.self_attn.q_proj": { + "stored_tensors": { + "model.layers.47.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.47.self_attn.k_proj": { + "stored_tensors": { + "model.layers.47.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.47.self_attn.v_proj": { + "stored_tensors": { + "model.layers.47.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.47.self_attn.o_proj": { + "stored_tensors": { + "model.layers.47.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.47.self_attn.q_norm": { + "stored_tensors": { + "model.layers.47.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.47.self_attn.k_norm": { + "stored_tensors": { + "model.layers.47.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.47.feedforward_layernorm": { + "stored_tensors": { + "model.layers.47.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.47.mlp.up_proj": { + "stored_tensors": { + "model.layers.47.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.47.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.47.mlp.down_proj": { + "stored_tensors": { + "model.layers.47.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.47.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.47.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.48.attention_layernorm": { + "stored_tensors": { + "model.layers.48.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.48.self_attn.q_proj": { + "stored_tensors": { + "model.layers.48.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.48.self_attn.k_proj": { + "stored_tensors": { + "model.layers.48.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.48.self_attn.v_proj": { + "stored_tensors": { + "model.layers.48.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.48.self_attn.o_proj": { + "stored_tensors": { + "model.layers.48.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.48.self_attn.q_norm": { + "stored_tensors": { + "model.layers.48.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.48.self_attn.k_norm": { + "stored_tensors": { + "model.layers.48.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.48.feedforward_layernorm": { + "stored_tensors": { + "model.layers.48.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.48.mlp.up_proj": { + "stored_tensors": { + "model.layers.48.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.48.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.48.mlp.down_proj": { + "stored_tensors": { + "model.layers.48.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.48.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.48.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.49.attention_layernorm": { + "stored_tensors": { + "model.layers.49.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.49.self_attn.q_proj": { + "stored_tensors": { + "model.layers.49.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.49.self_attn.k_proj": { + "stored_tensors": { + "model.layers.49.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.49.self_attn.v_proj": { + "stored_tensors": { + "model.layers.49.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.49.self_attn.o_proj": { + "stored_tensors": { + "model.layers.49.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.49.self_attn.q_norm": { + "stored_tensors": { + "model.layers.49.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.49.self_attn.k_norm": { + "stored_tensors": { + "model.layers.49.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.49.feedforward_layernorm": { + "stored_tensors": { + "model.layers.49.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.49.mlp.up_proj": { + "stored_tensors": { + "model.layers.49.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.49.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.49.mlp.down_proj": { + "stored_tensors": { + "model.layers.49.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.49.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.49.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.50.attention_layernorm": { + "stored_tensors": { + "model.layers.50.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.50.self_attn.q_proj": { + "stored_tensors": { + "model.layers.50.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.50.self_attn.k_proj": { + "stored_tensors": { + "model.layers.50.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.50.self_attn.v_proj": { + "stored_tensors": { + "model.layers.50.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.50.self_attn.o_proj": { + "stored_tensors": { + "model.layers.50.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.50.self_attn.q_norm": { + "stored_tensors": { + "model.layers.50.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.50.self_attn.k_norm": { + "stored_tensors": { + "model.layers.50.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.50.feedforward_layernorm": { + "stored_tensors": { + "model.layers.50.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.50.mlp.up_proj": { + "stored_tensors": { + "model.layers.50.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.50.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.50.mlp.down_proj": { + "stored_tensors": { + "model.layers.50.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.50.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.50.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.51.attention_layernorm": { + "stored_tensors": { + "model.layers.51.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.51.self_attn.q_proj": { + "stored_tensors": { + "model.layers.51.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.51.self_attn.k_proj": { + "stored_tensors": { + "model.layers.51.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.51.self_attn.v_proj": { + "stored_tensors": { + "model.layers.51.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.51.self_attn.o_proj": { + "stored_tensors": { + "model.layers.51.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.51.self_attn.q_norm": { + "stored_tensors": { + "model.layers.51.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.51.self_attn.k_norm": { + "stored_tensors": { + "model.layers.51.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.51.feedforward_layernorm": { + "stored_tensors": { + "model.layers.51.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.51.mlp.up_proj": { + "stored_tensors": { + "model.layers.51.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.51.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.51.mlp.down_proj": { + "stored_tensors": { + "model.layers.51.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.51.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.51.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.52.attention_layernorm": { + "stored_tensors": { + "model.layers.52.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.52.self_attn.q_proj": { + "stored_tensors": { + "model.layers.52.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.52.self_attn.k_proj": { + "stored_tensors": { + "model.layers.52.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.52.self_attn.v_proj": { + "stored_tensors": { + "model.layers.52.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.52.self_attn.o_proj": { + "stored_tensors": { + "model.layers.52.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.52.self_attn.q_norm": { + "stored_tensors": { + "model.layers.52.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.52.self_attn.k_norm": { + "stored_tensors": { + "model.layers.52.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.52.feedforward_layernorm": { + "stored_tensors": { + "model.layers.52.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.52.mlp.up_proj": { + "stored_tensors": { + "model.layers.52.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.52.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.52.mlp.down_proj": { + "stored_tensors": { + "model.layers.52.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.52.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.52.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.53.attention_layernorm": { + "stored_tensors": { + "model.layers.53.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.53.self_attn.q_proj": { + "stored_tensors": { + "model.layers.53.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.53.self_attn.k_proj": { + "stored_tensors": { + "model.layers.53.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.53.self_attn.v_proj": { + "stored_tensors": { + "model.layers.53.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.53.self_attn.o_proj": { + "stored_tensors": { + "model.layers.53.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.53.self_attn.q_norm": { + "stored_tensors": { + "model.layers.53.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.53.self_attn.k_norm": { + "stored_tensors": { + "model.layers.53.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.53.feedforward_layernorm": { + "stored_tensors": { + "model.layers.53.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.53.mlp.up_proj": { + "stored_tensors": { + "model.layers.53.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.53.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.53.mlp.down_proj": { + "stored_tensors": { + "model.layers.53.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.53.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.53.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.54.attention_layernorm": { + "stored_tensors": { + "model.layers.54.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.54.self_attn.q_proj": { + "stored_tensors": { + "model.layers.54.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.54.self_attn.k_proj": { + "stored_tensors": { + "model.layers.54.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.54.self_attn.v_proj": { + "stored_tensors": { + "model.layers.54.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.54.self_attn.o_proj": { + "stored_tensors": { + "model.layers.54.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.54.self_attn.q_norm": { + "stored_tensors": { + "model.layers.54.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.54.self_attn.k_norm": { + "stored_tensors": { + "model.layers.54.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.54.feedforward_layernorm": { + "stored_tensors": { + "model.layers.54.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.54.mlp.up_proj": { + "stored_tensors": { + "model.layers.54.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.54.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.54.mlp.down_proj": { + "stored_tensors": { + "model.layers.54.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.54.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.54.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.55.attention_layernorm": { + "stored_tensors": { + "model.layers.55.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.55.self_attn.q_proj": { + "stored_tensors": { + "model.layers.55.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.55.self_attn.k_proj": { + "stored_tensors": { + "model.layers.55.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.55.self_attn.v_proj": { + "stored_tensors": { + "model.layers.55.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.55.self_attn.o_proj": { + "stored_tensors": { + "model.layers.55.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.55.self_attn.q_norm": { + "stored_tensors": { + "model.layers.55.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.55.self_attn.k_norm": { + "stored_tensors": { + "model.layers.55.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.55.feedforward_layernorm": { + "stored_tensors": { + "model.layers.55.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.55.mlp.up_proj": { + "stored_tensors": { + "model.layers.55.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.55.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.55.mlp.down_proj": { + "stored_tensors": { + "model.layers.55.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.55.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.55.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.56.attention_layernorm": { + "stored_tensors": { + "model.layers.56.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.56.self_attn.q_proj": { + "stored_tensors": { + "model.layers.56.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.56.self_attn.k_proj": { + "stored_tensors": { + "model.layers.56.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.56.self_attn.v_proj": { + "stored_tensors": { + "model.layers.56.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.56.self_attn.o_proj": { + "stored_tensors": { + "model.layers.56.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.56.self_attn.q_norm": { + "stored_tensors": { + "model.layers.56.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.56.self_attn.k_norm": { + "stored_tensors": { + "model.layers.56.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.56.feedforward_layernorm": { + "stored_tensors": { + "model.layers.56.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.56.mlp.up_proj": { + "stored_tensors": { + "model.layers.56.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.56.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.56.mlp.down_proj": { + "stored_tensors": { + "model.layers.56.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.56.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.56.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.57.attention_layernorm": { + "stored_tensors": { + "model.layers.57.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.57.self_attn.q_proj": { + "stored_tensors": { + "model.layers.57.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.57.self_attn.k_proj": { + "stored_tensors": { + "model.layers.57.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.57.self_attn.v_proj": { + "stored_tensors": { + "model.layers.57.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.57.self_attn.o_proj": { + "stored_tensors": { + "model.layers.57.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.57.self_attn.q_norm": { + "stored_tensors": { + "model.layers.57.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.57.self_attn.k_norm": { + "stored_tensors": { + "model.layers.57.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.57.feedforward_layernorm": { + "stored_tensors": { + "model.layers.57.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.57.mlp.up_proj": { + "stored_tensors": { + "model.layers.57.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.57.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.57.mlp.down_proj": { + "stored_tensors": { + "model.layers.57.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.57.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.57.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.58.attention_layernorm": { + "stored_tensors": { + "model.layers.58.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.58.self_attn.q_proj": { + "stored_tensors": { + "model.layers.58.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.58.self_attn.k_proj": { + "stored_tensors": { + "model.layers.58.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.58.self_attn.v_proj": { + "stored_tensors": { + "model.layers.58.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.58.self_attn.o_proj": { + "stored_tensors": { + "model.layers.58.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.58.self_attn.q_norm": { + "stored_tensors": { + "model.layers.58.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.58.self_attn.k_norm": { + "stored_tensors": { + "model.layers.58.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.58.feedforward_layernorm": { + "stored_tensors": { + "model.layers.58.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.58.mlp.up_proj": { + "stored_tensors": { + "model.layers.58.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.58.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.58.mlp.down_proj": { + "stored_tensors": { + "model.layers.58.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.58.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.58.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.59.attention_layernorm": { + "stored_tensors": { + "model.layers.59.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.59.self_attn.q_proj": { + "stored_tensors": { + "model.layers.59.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.59.self_attn.k_proj": { + "stored_tensors": { + "model.layers.59.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.59.self_attn.v_proj": { + "stored_tensors": { + "model.layers.59.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.59.self_attn.o_proj": { + "stored_tensors": { + "model.layers.59.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.59.self_attn.q_norm": { + "stored_tensors": { + "model.layers.59.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.59.self_attn.k_norm": { + "stored_tensors": { + "model.layers.59.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.59.feedforward_layernorm": { + "stored_tensors": { + "model.layers.59.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.59.mlp.up_proj": { + "stored_tensors": { + "model.layers.59.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.59.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.59.mlp.down_proj": { + "stored_tensors": { + "model.layers.59.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.59.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.59.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.60.attention_layernorm": { + "stored_tensors": { + "model.layers.60.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.60.self_attn.q_proj": { + "stored_tensors": { + "model.layers.60.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.60.self_attn.k_proj": { + "stored_tensors": { + "model.layers.60.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.60.self_attn.v_proj": { + "stored_tensors": { + "model.layers.60.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.60.self_attn.o_proj": { + "stored_tensors": { + "model.layers.60.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.60.self_attn.q_norm": { + "stored_tensors": { + "model.layers.60.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.60.self_attn.k_norm": { + "stored_tensors": { + "model.layers.60.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.60.feedforward_layernorm": { + "stored_tensors": { + "model.layers.60.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.60.mlp.up_proj": { + "stored_tensors": { + "model.layers.60.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.60.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.60.mlp.down_proj": { + "stored_tensors": { + "model.layers.60.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.60.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.60.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.61.attention_layernorm": { + "stored_tensors": { + "model.layers.61.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.61.self_attn.q_proj": { + "stored_tensors": { + "model.layers.61.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.61.self_attn.k_proj": { + "stored_tensors": { + "model.layers.61.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.61.self_attn.v_proj": { + "stored_tensors": { + "model.layers.61.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.61.self_attn.o_proj": { + "stored_tensors": { + "model.layers.61.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.61.self_attn.q_norm": { + "stored_tensors": { + "model.layers.61.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.61.self_attn.k_norm": { + "stored_tensors": { + "model.layers.61.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.61.feedforward_layernorm": { + "stored_tensors": { + "model.layers.61.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.61.mlp.up_proj": { + "stored_tensors": { + "model.layers.61.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.61.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.61.mlp.down_proj": { + "stored_tensors": { + "model.layers.61.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.61.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.61.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.62.attention_layernorm": { + "stored_tensors": { + "model.layers.62.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.62.self_attn.q_proj": { + "stored_tensors": { + "model.layers.62.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.62.self_attn.k_proj": { + "stored_tensors": { + "model.layers.62.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.62.self_attn.v_proj": { + "stored_tensors": { + "model.layers.62.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.62.self_attn.o_proj": { + "stored_tensors": { + "model.layers.62.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.62.self_attn.q_norm": { + "stored_tensors": { + "model.layers.62.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.62.self_attn.k_norm": { + "stored_tensors": { + "model.layers.62.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.62.feedforward_layernorm": { + "stored_tensors": { + "model.layers.62.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.62.mlp.up_proj": { + "stored_tensors": { + "model.layers.62.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.62.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.62.mlp.down_proj": { + "stored_tensors": { + "model.layers.62.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.62.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.62.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.63.attention_layernorm": { + "stored_tensors": { + "model.layers.63.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.63.self_attn.q_proj": { + "stored_tensors": { + "model.layers.63.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.63.self_attn.k_proj": { + "stored_tensors": { + "model.layers.63.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.63.self_attn.v_proj": { + "stored_tensors": { + "model.layers.63.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.63.self_attn.o_proj": { + "stored_tensors": { + "model.layers.63.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.63.self_attn.q_norm": { + "stored_tensors": { + "model.layers.63.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.63.self_attn.k_norm": { + "stored_tensors": { + "model.layers.63.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.63.feedforward_layernorm": { + "stored_tensors": { + "model.layers.63.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.63.mlp.up_proj": { + "stored_tensors": { + "model.layers.63.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.63.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.63.mlp.down_proj": { + "stored_tensors": { + "model.layers.63.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.63.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.63.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.64.attention_layernorm": { + "stored_tensors": { + "model.layers.64.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.64.self_attn.q_proj": { + "stored_tensors": { + "model.layers.64.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.64.self_attn.k_proj": { + "stored_tensors": { + "model.layers.64.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.64.self_attn.v_proj": { + "stored_tensors": { + "model.layers.64.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.64.self_attn.o_proj": { + "stored_tensors": { + "model.layers.64.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.64.self_attn.q_norm": { + "stored_tensors": { + "model.layers.64.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.64.self_attn.k_norm": { + "stored_tensors": { + "model.layers.64.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.64.feedforward_layernorm": { + "stored_tensors": { + "model.layers.64.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.64.mlp.up_proj": { + "stored_tensors": { + "model.layers.64.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.64.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.64.mlp.down_proj": { + "stored_tensors": { + "model.layers.64.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.64.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.64.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.65.attention_layernorm": { + "stored_tensors": { + "model.layers.65.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.65.self_attn.q_proj": { + "stored_tensors": { + "model.layers.65.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.65.self_attn.k_proj": { + "stored_tensors": { + "model.layers.65.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.65.self_attn.v_proj": { + "stored_tensors": { + "model.layers.65.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.65.self_attn.o_proj": { + "stored_tensors": { + "model.layers.65.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.65.self_attn.q_norm": { + "stored_tensors": { + "model.layers.65.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.65.self_attn.k_norm": { + "stored_tensors": { + "model.layers.65.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.65.feedforward_layernorm": { + "stored_tensors": { + "model.layers.65.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.65.mlp.up_proj": { + "stored_tensors": { + "model.layers.65.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.65.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.65.mlp.down_proj": { + "stored_tensors": { + "model.layers.65.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.65.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.65.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.66.attention_layernorm": { + "stored_tensors": { + "model.layers.66.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.66.self_attn.q_proj": { + "stored_tensors": { + "model.layers.66.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.66.self_attn.k_proj": { + "stored_tensors": { + "model.layers.66.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.66.self_attn.v_proj": { + "stored_tensors": { + "model.layers.66.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.66.self_attn.o_proj": { + "stored_tensors": { + "model.layers.66.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.66.self_attn.q_norm": { + "stored_tensors": { + "model.layers.66.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.66.self_attn.k_norm": { + "stored_tensors": { + "model.layers.66.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.66.feedforward_layernorm": { + "stored_tensors": { + "model.layers.66.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.66.mlp.up_proj": { + "stored_tensors": { + "model.layers.66.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.66.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.66.mlp.down_proj": { + "stored_tensors": { + "model.layers.66.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.66.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.66.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.67.attention_layernorm": { + "stored_tensors": { + "model.layers.67.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.67.self_attn.q_proj": { + "stored_tensors": { + "model.layers.67.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.67.self_attn.k_proj": { + "stored_tensors": { + "model.layers.67.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.67.self_attn.v_proj": { + "stored_tensors": { + "model.layers.67.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.67.self_attn.o_proj": { + "stored_tensors": { + "model.layers.67.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.67.self_attn.q_norm": { + "stored_tensors": { + "model.layers.67.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.67.self_attn.k_norm": { + "stored_tensors": { + "model.layers.67.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.67.feedforward_layernorm": { + "stored_tensors": { + "model.layers.67.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.67.mlp.up_proj": { + "stored_tensors": { + "model.layers.67.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.67.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.67.mlp.down_proj": { + "stored_tensors": { + "model.layers.67.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.67.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.67.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.68.attention_layernorm": { + "stored_tensors": { + "model.layers.68.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.68.self_attn.q_proj": { + "stored_tensors": { + "model.layers.68.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.68.self_attn.k_proj": { + "stored_tensors": { + "model.layers.68.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.68.self_attn.v_proj": { + "stored_tensors": { + "model.layers.68.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.68.self_attn.o_proj": { + "stored_tensors": { + "model.layers.68.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.68.self_attn.q_norm": { + "stored_tensors": { + "model.layers.68.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.68.self_attn.k_norm": { + "stored_tensors": { + "model.layers.68.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.68.feedforward_layernorm": { + "stored_tensors": { + "model.layers.68.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.68.mlp.up_proj": { + "stored_tensors": { + "model.layers.68.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.68.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.68.mlp.down_proj": { + "stored_tensors": { + "model.layers.68.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.68.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.68.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.69.attention_layernorm": { + "stored_tensors": { + "model.layers.69.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.69.self_attn.q_proj": { + "stored_tensors": { + "model.layers.69.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.69.self_attn.k_proj": { + "stored_tensors": { + "model.layers.69.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.69.self_attn.v_proj": { + "stored_tensors": { + "model.layers.69.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.69.self_attn.o_proj": { + "stored_tensors": { + "model.layers.69.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.69.self_attn.q_norm": { + "stored_tensors": { + "model.layers.69.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.69.self_attn.k_norm": { + "stored_tensors": { + "model.layers.69.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.69.feedforward_layernorm": { + "stored_tensors": { + "model.layers.69.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.69.mlp.up_proj": { + "stored_tensors": { + "model.layers.69.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.69.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.69.mlp.down_proj": { + "stored_tensors": { + "model.layers.69.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.69.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.69.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.70.attention_layernorm": { + "stored_tensors": { + "model.layers.70.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.70.self_attn.q_proj": { + "stored_tensors": { + "model.layers.70.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.70.self_attn.k_proj": { + "stored_tensors": { + "model.layers.70.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.70.self_attn.v_proj": { + "stored_tensors": { + "model.layers.70.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.70.self_attn.o_proj": { + "stored_tensors": { + "model.layers.70.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.70.self_attn.q_norm": { + "stored_tensors": { + "model.layers.70.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.70.self_attn.k_norm": { + "stored_tensors": { + "model.layers.70.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.70.feedforward_layernorm": { + "stored_tensors": { + "model.layers.70.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.70.mlp.up_proj": { + "stored_tensors": { + "model.layers.70.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.70.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.70.mlp.down_proj": { + "stored_tensors": { + "model.layers.70.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.70.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.70.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.71.attention_layernorm": { + "stored_tensors": { + "model.layers.71.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.71.self_attn.q_proj": { + "stored_tensors": { + "model.layers.71.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.71.self_attn.k_proj": { + "stored_tensors": { + "model.layers.71.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.71.self_attn.v_proj": { + "stored_tensors": { + "model.layers.71.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.71.self_attn.o_proj": { + "stored_tensors": { + "model.layers.71.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.71.self_attn.q_norm": { + "stored_tensors": { + "model.layers.71.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.71.self_attn.k_norm": { + "stored_tensors": { + "model.layers.71.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.71.feedforward_layernorm": { + "stored_tensors": { + "model.layers.71.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.71.mlp.up_proj": { + "stored_tensors": { + "model.layers.71.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.71.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.71.mlp.down_proj": { + "stored_tensors": { + "model.layers.71.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.71.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.71.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.72.attention_layernorm": { + "stored_tensors": { + "model.layers.72.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.72.self_attn.q_proj": { + "stored_tensors": { + "model.layers.72.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.72.self_attn.k_proj": { + "stored_tensors": { + "model.layers.72.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.72.self_attn.v_proj": { + "stored_tensors": { + "model.layers.72.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.72.self_attn.o_proj": { + "stored_tensors": { + "model.layers.72.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.72.self_attn.q_norm": { + "stored_tensors": { + "model.layers.72.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.72.self_attn.k_norm": { + "stored_tensors": { + "model.layers.72.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.72.feedforward_layernorm": { + "stored_tensors": { + "model.layers.72.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.72.mlp.up_proj": { + "stored_tensors": { + "model.layers.72.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.72.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.72.mlp.down_proj": { + "stored_tensors": { + "model.layers.72.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.72.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.72.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.73.attention_layernorm": { + "stored_tensors": { + "model.layers.73.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.73.self_attn.q_proj": { + "stored_tensors": { + "model.layers.73.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.73.self_attn.k_proj": { + "stored_tensors": { + "model.layers.73.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.73.self_attn.v_proj": { + "stored_tensors": { + "model.layers.73.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.73.self_attn.o_proj": { + "stored_tensors": { + "model.layers.73.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.73.self_attn.q_norm": { + "stored_tensors": { + "model.layers.73.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.73.self_attn.k_norm": { + "stored_tensors": { + "model.layers.73.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.73.feedforward_layernorm": { + "stored_tensors": { + "model.layers.73.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.73.mlp.up_proj": { + "stored_tensors": { + "model.layers.73.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.73.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.73.mlp.down_proj": { + "stored_tensors": { + "model.layers.73.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.73.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.73.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.74.attention_layernorm": { + "stored_tensors": { + "model.layers.74.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.74.self_attn.q_proj": { + "stored_tensors": { + "model.layers.74.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.74.self_attn.k_proj": { + "stored_tensors": { + "model.layers.74.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.74.self_attn.v_proj": { + "stored_tensors": { + "model.layers.74.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.74.self_attn.o_proj": { + "stored_tensors": { + "model.layers.74.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.74.self_attn.q_norm": { + "stored_tensors": { + "model.layers.74.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.74.self_attn.k_norm": { + "stored_tensors": { + "model.layers.74.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.74.feedforward_layernorm": { + "stored_tensors": { + "model.layers.74.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.74.mlp.up_proj": { + "stored_tensors": { + "model.layers.74.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.74.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.74.mlp.down_proj": { + "stored_tensors": { + "model.layers.74.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.74.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.74.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.75.attention_layernorm": { + "stored_tensors": { + "model.layers.75.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.75.self_attn.q_proj": { + "stored_tensors": { + "model.layers.75.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.75.self_attn.k_proj": { + "stored_tensors": { + "model.layers.75.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.75.self_attn.v_proj": { + "stored_tensors": { + "model.layers.75.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.75.self_attn.o_proj": { + "stored_tensors": { + "model.layers.75.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.75.self_attn.q_norm": { + "stored_tensors": { + "model.layers.75.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.75.self_attn.k_norm": { + "stored_tensors": { + "model.layers.75.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.75.feedforward_layernorm": { + "stored_tensors": { + "model.layers.75.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.75.mlp.up_proj": { + "stored_tensors": { + "model.layers.75.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.75.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.75.mlp.down_proj": { + "stored_tensors": { + "model.layers.75.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.75.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.75.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.76.attention_layernorm": { + "stored_tensors": { + "model.layers.76.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.76.self_attn.q_proj": { + "stored_tensors": { + "model.layers.76.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.76.self_attn.k_proj": { + "stored_tensors": { + "model.layers.76.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.76.self_attn.v_proj": { + "stored_tensors": { + "model.layers.76.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.76.self_attn.o_proj": { + "stored_tensors": { + "model.layers.76.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.76.self_attn.q_norm": { + "stored_tensors": { + "model.layers.76.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.76.self_attn.k_norm": { + "stored_tensors": { + "model.layers.76.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.76.feedforward_layernorm": { + "stored_tensors": { + "model.layers.76.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.76.mlp.up_proj": { + "stored_tensors": { + "model.layers.76.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.76.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.76.mlp.down_proj": { + "stored_tensors": { + "model.layers.76.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.76.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.76.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.77.attention_layernorm": { + "stored_tensors": { + "model.layers.77.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.77.self_attn.q_proj": { + "stored_tensors": { + "model.layers.77.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.77.self_attn.k_proj": { + "stored_tensors": { + "model.layers.77.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.77.self_attn.v_proj": { + "stored_tensors": { + "model.layers.77.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.77.self_attn.o_proj": { + "stored_tensors": { + "model.layers.77.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.77.self_attn.q_norm": { + "stored_tensors": { + "model.layers.77.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.77.self_attn.k_norm": { + "stored_tensors": { + "model.layers.77.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.77.feedforward_layernorm": { + "stored_tensors": { + "model.layers.77.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.77.mlp.up_proj": { + "stored_tensors": { + "model.layers.77.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.77.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.77.mlp.down_proj": { + "stored_tensors": { + "model.layers.77.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.77.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.77.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.78.attention_layernorm": { + "stored_tensors": { + "model.layers.78.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.78.self_attn.q_proj": { + "stored_tensors": { + "model.layers.78.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.78.self_attn.k_proj": { + "stored_tensors": { + "model.layers.78.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.78.self_attn.v_proj": { + "stored_tensors": { + "model.layers.78.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.78.self_attn.o_proj": { + "stored_tensors": { + "model.layers.78.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.78.self_attn.q_norm": { + "stored_tensors": { + "model.layers.78.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.78.self_attn.k_norm": { + "stored_tensors": { + "model.layers.78.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.78.feedforward_layernorm": { + "stored_tensors": { + "model.layers.78.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.78.mlp.up_proj": { + "stored_tensors": { + "model.layers.78.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.78.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.78.mlp.down_proj": { + "stored_tensors": { + "model.layers.78.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.78.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.78.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.79.attention_layernorm": { + "stored_tensors": { + "model.layers.79.attention_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.79.self_attn.q_proj": { + "stored_tensors": { + "model.layers.79.self_attn.q_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.q_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.q_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.79.self_attn.k_proj": { + "stored_tensors": { + "model.layers.79.self_attn.k_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.k_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.k_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.79.self_attn.v_proj": { + "stored_tensors": { + "model.layers.79.self_attn.v_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.v_proj.svh": { + "shape": [ + 1024 + ], + "n_bytes": 2048, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.v_proj.trellis": { + "shape": [ + 512, + 64, + 96 + ], + "n_bytes": 6291456, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.79.self_attn.o_proj": { + "stored_tensors": { + "model.layers.79.self_attn.o_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.o_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.self_attn.o_proj.trellis": { + "shape": [ + 512, + 512, + 96 + ], + "n_bytes": 50331648, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.79.self_attn.q_norm": { + "stored_tensors": { + "model.layers.79.self_attn.q_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.79.self_attn.k_norm": { + "stored_tensors": { + "model.layers.79.self_attn.k_norm.weight": { + "shape": [ + 128 + ], + "n_bytes": 256, + "dtype": "torch.float16" + } + } + }, + "model.layers.79.feedforward_layernorm": { + "stored_tensors": { + "model.layers.79.feedforward_layernorm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "model.layers.79.mlp.up_proj": { + "stored_tensors": { + "model.layers.79.mlp.up_proj.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.mlp.up_proj.svh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.79.mlp.up_proj.trellis": { + "shape": [ + 512, + 2688, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.layers.79.mlp.down_proj": { + "stored_tensors": { + "model.layers.79.mlp.down_proj.suh": { + "shape": [ + 43008 + ], + "n_bytes": 86016, + "dtype": "torch.float16" + }, + "model.layers.79.mlp.down_proj.svh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "model.layers.79.mlp.down_proj.trellis": { + "shape": [ + 2688, + 512, + 96 + ], + "n_bytes": 264241152, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + }, + "model.norm": { + "stored_tensors": { + "model.norm.weight": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + } + } + }, + "lm_head": { + "stored_tensors": { + "lm_head.suh": { + "shape": [ + 8192 + ], + "n_bytes": 16384, + "dtype": "torch.float16" + }, + "lm_head.svh": { + "shape": [ + 131072 + ], + "n_bytes": 262144, + "dtype": "torch.float16" + }, + "lm_head.trellis": { + "shape": [ + 512, + 8192, + 96 + ], + "n_bytes": 805306368, + "dtype": "torch.int16" + } + }, + "quant_format": "exl3", + "bits_per_weight": 6 + } + } +} \ No newline at end of file