BramVanroy commited on Apr 27, 2024

Commit

78a8636

verified ·

1 Parent(s): 2ed6441

Training in progress, epoch 2, checkpoint

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

checkpoint-356/added_tokens.json +42 -0
checkpoint-356/config.json +34 -0
checkpoint-356/generation_config.json +7 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_10_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_11_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_12_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_13_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_14_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_15_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_8_mp_rank_00_model_states.pt +3 -0
checkpoint-356/global_step356/zero_pp_rank_9_mp_rank_00_model_states.pt +3 -0
checkpoint-356/latest +1 -0
checkpoint-356/merges.txt +0 -0
checkpoint-356/model-00001-of-00002.safetensors +3 -0
checkpoint-356/model-00002-of-00002.safetensors +3 -0
checkpoint-356/model.safetensors.index.json +460 -0
checkpoint-356/rng_state_0.pth +3 -0
checkpoint-356/rng_state_1.pth +3 -0
checkpoint-356/rng_state_10.pth +3 -0
checkpoint-356/rng_state_11.pth +3 -0
checkpoint-356/rng_state_12.pth +3 -0
checkpoint-356/rng_state_13.pth +3 -0
checkpoint-356/rng_state_14.pth +3 -0
checkpoint-356/rng_state_15.pth +3 -0
checkpoint-356/rng_state_2.pth +3 -0
checkpoint-356/rng_state_3.pth +3 -0

checkpoint-356/added_tokens.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "\t\t": 50294,
+  "\t\t\t": 50293,
+  "\t\t\t\t": 50292,
+  "\t\t\t\t\t": 50291,
+  "\t\t\t\t\t\t": 50290,
+  "\t\t\t\t\t\t\t": 50289,
+  "\t\t\t\t\t\t\t\t": 50288,
+  "\t\t\t\t\t\t\t\t\t": 50287,
+  "  ": 50286,
+  "   ": 50285,
+  "    ": 50284,
+  "     ": 50283,
+  "      ": 50282,
+  "       ": 50281,
+  "        ": 50280,
+  "         ": 50279,
+  "          ": 50278,
+  "           ": 50277,
+  "            ": 50276,
+  "             ": 50275,
+  "              ": 50274,
+  "               ": 50273,
+  "                ": 50272,
+  "                 ": 50271,
+  "                  ": 50270,
+  "                   ": 50269,
+  "                    ": 50268,
+  "                     ": 50267,
+  "                      ": 50266,
+  "                       ": 50265,
+  "                        ": 50264,
+  "                         ": 50263,
+  "                          ": 50262,
+  "                           ": 50261,
+  "                            ": 50260,
+  "                             ": 50259,
+  "                              ": 50258,
+  "                               ": 50257,
+  "<|im_end|>": 50296,
+  "<|im_start|>": 50295
+}

checkpoint-356/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "BramVanroy/fietje-2b",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "microsoft/phi-2--configuration_phi.PhiConfig",
+    "AutoModelForCausalLM": "microsoft/phi-2--modeling_phi.PhiForCausalLM"
+  },
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 10240,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.4,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.1,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.39.1",
+  "use_cache": false,
+  "vocab_size": 50297
+}

checkpoint-356/generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50295,
+  "eos_token_id": 50296,
+  "pad_token_id": 50296,
+  "transformers_version": "4.39.1"
+}

checkpoint-356/global_step356/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f619455e89983b92d461d683a28a3891d1f8b72bcfb09bceaa872f6f7e4d8a8
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6001f2cfd4d13afa9fe29a688ad1faa2f5fea5618c2d8e78b21b12b5be4fb7
+size 2081299516

checkpoint-356/global_step356/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb2eff90ab97a97e20973a656130331d7e4e253e7b8649b7abbbc81b9446bcb8
+size 2081299516

checkpoint-356/global_step356/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33c14a24a6ccf846ff2c94c27a07e53a46a72cd55366c96b4623d694fc27872e
+size 2081299516

checkpoint-356/global_step356/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6949389dd0f2a893d81263c330ea8d38c9e4f9456f4b788fdb91ee5bfc41bcd
+size 2081299516

checkpoint-356/global_step356/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c32ddb73ae1ea254286f71ed81cfb3a2e762f71a170d3f5e1b71393894892c1
+size 2081299516

checkpoint-356/global_step356/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b424888e71c65fcfe817fe6404864a16e795fe66fec815f01b26c9e8d388d90e
+size 2081299516

checkpoint-356/global_step356/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a8af346482281f2bfa8f8ee8dabb82e96a4ca8f96204e21f5bba5b162f6bdbf
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60052da7cb1bf8626335d44b2b7983d8c236dcdad942f6b08598ee20de808998
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:703c3eed3da862f4cbdcdbda25e31ce8498d50dc577b001f31cff033d01dc16c
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:abaa6eeab66a5a29ed6f3753e3f6d867d76162c82fd074999e3d01c843f644cc
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee438e056c3038818ef538e408979707c8988217d905f7b39e7c1a37bc312b0d
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c227626551de9b20b04b507ebee683c93afb721e18a6466d68cc48ec1001c4c5
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b0bb8f3f4b6229a5ce63909c89da4c93b4811e63792fa6409b1c1afe2b9f26a
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a75834b04c8cabb2ec25c6c86f8b733687a2d900312d81b4be6aa296beb1e27
+size 2081299504

checkpoint-356/global_step356/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0dbaeab98fe2375f990ae84aaa85ea11dc5b45a54e681fbc0644777585bf4887
+size 2081299504

checkpoint-356/global_step356/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b5f492b0bfa89d6a334c62d470176c5d569bb3d62eae734aebd98ec716f0422
+size 214911

checkpoint-356/global_step356/zero_pp_rank_10_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e65ea4c47ad2231d73a7b12149a38e83d7897896acc47ebde8006085362f8db
+size 215368

checkpoint-356/global_step356/zero_pp_rank_11_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16ab0618670195f402da21c61bf0620d77105acda212df982563e3c1a5d18d30
+size 215368

checkpoint-356/global_step356/zero_pp_rank_12_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b21ad3d682c5059e7e775a19bd55ff5661f5f33ff86184a5144e1304d90ad067
+size 215368

checkpoint-356/global_step356/zero_pp_rank_13_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea762eb33d50a50e903082193d78a71203fac4b19e4b9712856dddecf5d756a0
+size 215368

checkpoint-356/global_step356/zero_pp_rank_14_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92f19ffb6c901daa4ddf8c36e1a54f01881f50ad6bdde05eb4316d3a1d7c7a33
+size 215368

checkpoint-356/global_step356/zero_pp_rank_15_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c67440262a460707b3e11a7064ce9c25340a7fbc0e206e6345cd9d8ead40eb7
+size 215368

checkpoint-356/global_step356/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c19474d0a1e6f8943b3bea3f1ca847036bef29cbc36525ac8d79661919afb206
+size 214911

checkpoint-356/global_step356/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1b62960c402fd3b539a500ce59a3dd0729d612cda3fbf6c5d5573c320f7ffab
+size 214911

checkpoint-356/global_step356/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:263f545a3fb0e920609b61d0eaf2851499375a46549f5de7087d698222cc855d
+size 214911

checkpoint-356/global_step356/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d1ff5c96c2eac1dfe8411c458256725a7c82bbb2c87bea0f275b7b1126800f7e
+size 214911

checkpoint-356/global_step356/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66a46c064565d45d82009f1635fed7e5c0cb6412a4e7d6a9266566a8e56cdc5e
+size 214911

checkpoint-356/global_step356/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c973ba14d1776b19fe6c7b08133fc11d045e1d088b755e2e0548955869534ee
+size 214911

checkpoint-356/global_step356/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07531259840589cb0561d1eb76a76d304a8e1506783115824c57471b7b20ccd6
+size 214911

checkpoint-356/global_step356/zero_pp_rank_8_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f01eb8538900e019c18aaf7a0b628df0dcdc9b6a69f5e78817be81b7576635f0
+size 214911

checkpoint-356/global_step356/zero_pp_rank_9_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b01eec4f6f99f9f3b3802cb54773f3525b16d0a2e20b5b9e582f3c53c69bef20
+size 214911

checkpoint-356/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step356

checkpoint-356/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-356/model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5523ffe3857bc7857f9960b306d0e5ac79e3c8f23322017701308cfdbc3db80
+size 4990961488

checkpoint-356/model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:570421aeca3dc077e5183fb4eda8f8dfa4a0364d40e7c581ff2b12f82d4426e4
+size 559207842

checkpoint-356/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,460 @@

+{
+  "metadata": {
+    "total_size": 5550119154
+  },
+  "weight_map": {
+    "lm_head.bias": "model-00002-of-00002.safetensors",
+    "lm_head.weight": "model-00002-of-00002.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.final_layernorm.bias": "model-00002-of-00002.safetensors",
+    "model.final_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.0.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc1.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc1.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc2.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc2.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.dense.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.dense.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc1.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc1.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc2.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc2.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.dense.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.dense.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors"
+  }
+}

checkpoint-356/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04a66994055cfee1f4f69f094158fce0b87efd623d8739e11c4c38f3bab58a0c
+size 14960

checkpoint-356/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0924e2504c3f172304c9c11c2be529e0a5038784a66c51f80c116664d7c2f93
+size 14960

checkpoint-356/rng_state_10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5bd7c1dd7a8a0f977e0ee498f1b4afb7d1814a5734b2a09b53fb27a96e6353f
+size 14969

checkpoint-356/rng_state_11.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bcba09d55eabbd0d53300b5a60697b37a763d9fcfaa6397be9c9921de2797ee
+size 14969

checkpoint-356/rng_state_12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4945752cdbe6d8b6522a20696f282f8fa6b8ac8574445ea9a6dd1c3e45019e13
+size 14969

checkpoint-356/rng_state_13.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:872214d44e52a26d877df9ac5f92b7330cf03a60921e69c33bfddf17f0ca3d1d
+size 14969

checkpoint-356/rng_state_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbf563d28d76e69bd8e24fd20c8b36f4cdea377d3befa7c8fa26e11396541567
+size 14969

checkpoint-356/rng_state_15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:56347cf2ce3eccf8bd1affc1bd307a99453151b1b5b6b27ebe345d7a16baf630
+size 14969

checkpoint-356/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d47b7c54facc2071865e79c5c749a952bc25cb9c1ca62283af3ba93baea0afba
+size 14960

checkpoint-356/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e32ec85dc9dc21b36aacc0040c75a3e8b61956d2270ae517aa4716c812bee4f
+size 14960