diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bc5f30d6632ac0efdc7be2e9095e9e9579af2e33 --- /dev/null +++ b/README.md @@ -0,0 +1,199 @@ +--- +library_name: transformers +tags: [] +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + +This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated. + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ace2efdf13eef4f53c2d44ea8e9ba06f3e090f52 --- /dev/null +++ b/config.json @@ -0,0 +1,152 @@ +{ + "architectures": [ + "Llama4ForCausalLM" + ], + "attention_bias": false, + "attention_chunk_size": 8192, + "attention_dropout": 0.0, + "attn_scale": 0.1, + "attn_temperature_tuning": 4, + "bos_token_id": 200000, + "cache_implementation": "hybrid", + "eos_token_id": [ + 200001, + 200007, + 200008 + ], + "floor_scale": 8192, + "for_llm_compressor": false, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "interleave_moe_layer_step": 1, + "intermediate_size": 8192, + "intermediate_size_mlp": 16384, + "max_position_embeddings": 262144, + "model_type": "llama4_text", + "moe_layers": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47 + ], + "no_rope_layers": [ + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 0 + ], + "num_attention_heads": 40, + "num_experts_per_tok": 1, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "num_local_experts": 16, + "output_router_logits": false, + "pad_token_id": 200018, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.1", + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 202048 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f6ddf3a05bb736defb3852c5e4a2580f3b77cf33 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 200000, + "do_sample": true, + "eos_token_id": [ + 200001, + 200007, + 200008 + ], + "pad_token_id": 200018, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.1" +} diff --git a/model-00001-of-00049.safetensors b/model-00001-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebaf69c43599c3b83c57b2cff9ec8b400d75a48d --- /dev/null +++ b/model-00001-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44517196e1cc7aad68a9fed29cf244536af055faeb910485297b5f034a492aa +size 4879155936 diff --git a/model-00002-of-00049.safetensors b/model-00002-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1b07fcc6497643aa512dc975410d35fc9d7c1e4 --- /dev/null +++ b/model-00002-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5687511ac909e6d6db4cad27e6707d623ca886231c089418fb04eff9d2d4b7d6 +size 4404205040 diff --git a/model-00003-of-00049.safetensors b/model-00003-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28b5af950c2145adc956982d6c40f681adda9343 --- /dev/null +++ b/model-00003-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665f2e3f645a5a029c88438f003f78cf2897fc964ed261c5a5a84311a1473e57 +size 4404205040 diff --git a/model-00004-of-00049.safetensors b/model-00004-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..758c77d88f0a735de3b3fa7d1a4099c2127cb341 --- /dev/null +++ b/model-00004-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7965d2ae00ca62ef5a12113c0cdceeb7c7ca71d435a54cae02085cffffa61a9 +size 4404205040 diff --git a/model-00005-of-00049.safetensors b/model-00005-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ef685d42f56cf3ec1eafe7379249f9e90b266ff --- /dev/null +++ b/model-00005-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7ab522ec7735ca46fd0b67dceeb6641264d6d4809f6b80d78a624ee71a1407 +size 4404205040 diff --git a/model-00006-of-00049.safetensors b/model-00006-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb2509c764b567e699ab85f3a6c613456d0b0aff --- /dev/null +++ b/model-00006-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c265ae85a8bf9dbcef10ff100c0ad5594e51f0c9cbaa4cf6d4d639a4da3ad5c4 +size 4404205040 diff --git a/model-00007-of-00049.safetensors b/model-00007-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..560948a03608f08e569998d5a9fec65d12cb4e0e --- /dev/null +++ b/model-00007-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49fad02ed80c2340971a8b9865c1f079f1631b8a6faa1b5cd5b5c021f62c271b +size 4404205040 diff --git a/model-00008-of-00049.safetensors b/model-00008-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5513ba62415487bf76b8b2536d20a9dfc477988 --- /dev/null +++ b/model-00008-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed3ffded0e0fb3ee0325cb87015b686749c46717da45682f1ef424e71f0da4b +size 4404205040 diff --git a/model-00009-of-00049.safetensors b/model-00009-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3aac6d761c2b952244facdef46cbb38740d2371 --- /dev/null +++ b/model-00009-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699ccdc982d2e1a48ecfb5819c3b2dae341b82192a86d39252f6773ad7a91506 +size 4404205040 diff --git a/model-00010-of-00049.safetensors b/model-00010-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..162f5ea499c6996951bf871e89f738b767e1ff14 --- /dev/null +++ b/model-00010-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d127c2bc7dff9fadd45f45a034a70d989d16be3038672318ed671b608baf82c +size 4404205040 diff --git a/model-00011-of-00049.safetensors b/model-00011-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88b5020d7f201907fc87414db9f83849f3640fb7 --- /dev/null +++ b/model-00011-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4d991c556680ebddf078129416ae9ccb3649754420e293f015f6944e519daa +size 4404205040 diff --git a/model-00012-of-00049.safetensors b/model-00012-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e89f54a46cd0eed6f79a907d1e236de299ba26e --- /dev/null +++ b/model-00012-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66888b737cb154904dec902e2547796c0885d8ca2c0e2a9697fbe2f384daba00 +size 4404205048 diff --git a/model-00013-of-00049.safetensors b/model-00013-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5760151f58923cc843eac9b2fc18ff61839c823 --- /dev/null +++ b/model-00013-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a0902bbd84309ee42b9acc800ac4091e13b625a675672059ee902058cf5ac9 +size 4404205048 diff --git a/model-00014-of-00049.safetensors b/model-00014-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08399245c3b1d86dbf236a7aa4a664fdd7e26075 --- /dev/null +++ b/model-00014-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2057ee0c610c68050bc23f542d2ba1abc98575eebe85047396e8ca8ec1db01 +size 4404205048 diff --git a/model-00015-of-00049.safetensors b/model-00015-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6d93d4a89f1675fd768cd12156b70e33e41d6a5 --- /dev/null +++ b/model-00015-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:129d6b6b1b6344fe8c8b3acb806bf9e0c5ee9a8caee1eeeb70dccd1d02743f02 +size 4404205048 diff --git a/model-00016-of-00049.safetensors b/model-00016-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58a14bd32c53e73fd2ee52c4150e70b3ee876797 --- /dev/null +++ b/model-00016-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc88efa2cdbcd3c8153bea15fadc55e7f304e4266a4a10089ccf466e11ef26d8 +size 4404205048 diff --git a/model-00017-of-00049.safetensors b/model-00017-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..040b0df36d43f205f091de0d672d3711526f6c36 --- /dev/null +++ b/model-00017-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d6780eaba25011fd6c579939b2c5a0ab0c82328c7c430beb2db87f8f80b41a +size 4404205048 diff --git a/model-00018-of-00049.safetensors b/model-00018-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ce5f07103f1d117cd8f84c0ba7949cfc54626c3 --- /dev/null +++ b/model-00018-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9bc30336f1188114520d0ab9637b434f417c196aae1e8c46d2bc0cfb0c93b7 +size 4404205048 diff --git a/model-00019-of-00049.safetensors b/model-00019-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1388750f4b2c3e405226ad2249b80660ebab04d1 --- /dev/null +++ b/model-00019-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e88e09c4066accfcdec90def01a4c95743c1637e47ffe81344107ad1c25aa9 +size 4404205048 diff --git a/model-00020-of-00049.safetensors b/model-00020-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..27e288dbe6f5196c2cebc49bee7b96c4947211d3 --- /dev/null +++ b/model-00020-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:798a3557f58ccbb72e5951e53f877b6115dfe913d3f7a7a228c2bf57c5925c99 +size 4404205048 diff --git a/model-00021-of-00049.safetensors b/model-00021-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f80355806b41f1409b0caa6e714efd8d2ef3269d --- /dev/null +++ b/model-00021-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30294cc6ed865deb82cc4f6249d532c12429d0e018b5434b2db1b25d44a7ee95 +size 4404205048 diff --git a/model-00022-of-00049.safetensors b/model-00022-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e517e516d962a6824567e1eb01cbf62e48bc9069 --- /dev/null +++ b/model-00022-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e527f77286b4e65ea4ebd19bf116a7299f691aaea17d9d86be09a9ad2719ab75 +size 4404205048 diff --git a/model-00023-of-00049.safetensors b/model-00023-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3321346d633f3546f78070c244f4a64319dac33b --- /dev/null +++ b/model-00023-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f0aac0eac9c75b9378cabcaaf32ad08639648d1ac472e2ce75c473025323d6 +size 4404205048 diff --git a/model-00024-of-00049.safetensors b/model-00024-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf27d0694bb703d0f8aa6b8c40739f9dba62b702 --- /dev/null +++ b/model-00024-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31ba15bd94eab3fdd8f861ce2397c551f96d5587c1ef58999a896e5cefb7a8c5 +size 4404205048 diff --git a/model-00025-of-00049.safetensors b/model-00025-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bff87fd04be45e1456999de711fab4456ac6f581 --- /dev/null +++ b/model-00025-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25324211544eaf72745d6c9bedf69de2a2d298f2ffea728a632a6ca1daa61df0 +size 4404205048 diff --git a/model-00026-of-00049.safetensors b/model-00026-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35d18b3aad7ae3b114337e30c2603078d5ba27a7 --- /dev/null +++ b/model-00026-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f01db6e6e786ca5c526d5f8ad0a036650273ff254f9c839525b1262b207739 +size 4404205048 diff --git a/model-00027-of-00049.safetensors b/model-00027-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1ee1a7025a00e3532cdaf82ab6b3a4f1cbb6200 --- /dev/null +++ b/model-00027-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b2c2489cb33a39481db473765d151a02d1f1394fe42fdf239e67cb98fba4e4 +size 4404205048 diff --git a/model-00028-of-00049.safetensors b/model-00028-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66f3c08dfa74e474cef7ffe59a58a438f7c9f92f --- /dev/null +++ b/model-00028-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a0ab930a8cb9bb173ef164c04b2da6c3d01bd250abb93f193454fc68f94b74 +size 4404205048 diff --git a/model-00029-of-00049.safetensors b/model-00029-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..609a50cb01b024d94014eb37efef881fc1978149 --- /dev/null +++ b/model-00029-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b0b105c48ebd9e5acb08e2d2f4aa7e54b3a207e2d64948af406c4c1d1758e59 +size 4404205048 diff --git a/model-00030-of-00049.safetensors b/model-00030-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d54a17394daf03d9b54ac8375a8b2e63d3a800f --- /dev/null +++ b/model-00030-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b691698dce078e381cb49ae4ed4482d3dba3a263e698be06cd91816ad696422 +size 4404205048 diff --git a/model-00031-of-00049.safetensors b/model-00031-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2829625619a03f98c6cafb779046f10a8a4968dc --- /dev/null +++ b/model-00031-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6e2a980b450fd29a4a7986b94a04dccfaacd59f45ba95218fcbd262c7bc7fe +size 4404205048 diff --git a/model-00032-of-00049.safetensors b/model-00032-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0980ba20873878830a4323af0da5bd37d5433eeb --- /dev/null +++ b/model-00032-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ef7e4c5abe8f85fdd856ba38ab30c798c4237f1f3d384825e086d84d080fe69 +size 4404205048 diff --git a/model-00033-of-00049.safetensors b/model-00033-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d0d6311d97cc99a09c6571102119472632392c9 --- /dev/null +++ b/model-00033-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85589d62191034f854077e7dcb1f7e6f223f5c75a6bd7e19abf0479a8e2743ad +size 4404205048 diff --git a/model-00034-of-00049.safetensors b/model-00034-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..502d8e44ccfa859bcf49fcbd902fb434222f7b7e --- /dev/null +++ b/model-00034-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44910083a3c6823a456ef3b7a50ecadc9240a75235db62e59f17e0caa2b3a5ac +size 4404205048 diff --git a/model-00035-of-00049.safetensors b/model-00035-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3238ba171c441d58ff14a0fc4333d7479863492 --- /dev/null +++ b/model-00035-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a0ea0ec4f7ea14fcfe5d11f022b74fed76dc2e232b866f0346c8f1d7c58daa9 +size 4404205048 diff --git a/model-00036-of-00049.safetensors b/model-00036-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cd8b9b49815836503456c732016b2d953355acb --- /dev/null +++ b/model-00036-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf5338c3f1b4dcc4ea09ae3c04d1dcd0f979d20de38ad16e8e688e719d342f5 +size 4404205048 diff --git a/model-00037-of-00049.safetensors b/model-00037-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..644c666919806bf2175abe60ca69509865fa63fc --- /dev/null +++ b/model-00037-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c1dc1d69042728cc7a4f64f3c0e9dfdd00713cbf459561ec5908bca396ec0de +size 4404205048 diff --git a/model-00038-of-00049.safetensors b/model-00038-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3786184a53a99199fcba43db4f6c72c76f5e5242 --- /dev/null +++ b/model-00038-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3204b3596fec5a7a7e88a4348bff922f563d1690b37dfb4acb211b1c3885aba8 +size 4404205048 diff --git a/model-00039-of-00049.safetensors b/model-00039-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82f82157f64fadaa4f127bb864bc4c55a61f1c77 --- /dev/null +++ b/model-00039-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d48c6ec25f48c2f65ede66ca830d0dd18582298198e085e9b30912137a4794f8 +size 4404205048 diff --git a/model-00040-of-00049.safetensors b/model-00040-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22f69d78d4426669121562c248f9f1d76c0cb328 --- /dev/null +++ b/model-00040-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd5705775999f38b15bb4b4b3ead7f3f4f5aaac9d3b73234515d1b2d2a39f9e +size 4404205048 diff --git a/model-00041-of-00049.safetensors b/model-00041-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6821eb81d64cab9509dca5ee8f6413c597356b63 --- /dev/null +++ b/model-00041-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3557d952e5e8962f662b100eb8e81891198be2bb4dad8f60b6cb7ec00339d5a6 +size 4404205048 diff --git a/model-00042-of-00049.safetensors b/model-00042-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a01d526fcb81a17347fac0aa818c577cbeed830 --- /dev/null +++ b/model-00042-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834f4c0a0f59416b69ce8bd43bf6bd71c8062450ce8dc12f8add375f64549929 +size 4404205048 diff --git a/model-00043-of-00049.safetensors b/model-00043-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5e7f1a37ed8cf8e68cb645edba985677def244d --- /dev/null +++ b/model-00043-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae18c698991038f8a466eba19426e0589ce61274f43869580db7bd4452d4aa1 +size 4404205048 diff --git a/model-00044-of-00049.safetensors b/model-00044-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dec47f92be9d9d37f75a50f90364f4822b77095 --- /dev/null +++ b/model-00044-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27ec00433760f3cd655cb9d7eb97e57de7b527f39a99789f88a89c9b2732c03 +size 4404205048 diff --git a/model-00045-of-00049.safetensors b/model-00045-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b49529846c0b228d10fa788ab36c3e1baba3700c --- /dev/null +++ b/model-00045-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4937795dd1f389fb8f08509a571f37b44f33da4b320acce5f7e5da53485276e1 +size 4404205048 diff --git a/model-00046-of-00049.safetensors b/model-00046-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..44f11ae8a4c26d6589210cc86886a3a5797e65ec --- /dev/null +++ b/model-00046-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907d51bf74aec90ac1247a83573e68eec916a03325270454a2c5d26b976957b7 +size 4404205048 diff --git a/model-00047-of-00049.safetensors b/model-00047-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a969423411a492837a6f1599f3a9f125d6707dd5 --- /dev/null +++ b/model-00047-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290687e729c253f146eefa800206dca72637fd83459e53217c7f2342a6a3e233 +size 4404205048 diff --git a/model-00048-of-00049.safetensors b/model-00048-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53f28fe24bd2640e9f103759b7f25ddf075244cf --- /dev/null +++ b/model-00048-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54b5606ee30b71c2f065558b4bf373b30e33ae0aae4a6ce3844aa274c942d8a8 +size 4404205048 diff --git a/model-00049-of-00049.safetensors b/model-00049-of-00049.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dabc0dcef8c9e9cb1eb9e1e6c99f1dfba7415a04 --- /dev/null +++ b/model-00049-of-00049.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0d0c71edec6a4b33b69884ade01b9d098eece28b3ab81ee54b57fd2697402c +size 3663002712 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..6817f0f2cf4a2643d3be27852c873d1ae48650bb --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,586 @@ +{ + "metadata": { + "total_size": 215539722240 + }, + "weight_map": { + "lm_head.weight": "model-00049-of-00049.safetensors", + "model.embed_tokens.weight": "model-00001-of-00049.safetensors", + "model.layers.0.feed_forward.experts.down_proj": "model-00002-of-00049.safetensors", + "model.layers.0.feed_forward.experts.gate_up_proj": "model-00001-of-00049.safetensors", + "model.layers.0.feed_forward.router.weight": "model-00002-of-00049.safetensors", + "model.layers.0.feed_forward.shared_expert.down_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.feed_forward.shared_expert.gate_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.feed_forward.shared_expert.up_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00049.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00049.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00049.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00049.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00049.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00049.safetensors", + "model.layers.1.feed_forward.experts.down_proj": "model-00003-of-00049.safetensors", + "model.layers.1.feed_forward.experts.gate_up_proj": "model-00002-of-00049.safetensors", + "model.layers.1.feed_forward.router.weight": "model-00003-of-00049.safetensors", + "model.layers.1.feed_forward.shared_expert.down_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.feed_forward.shared_expert.gate_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.feed_forward.shared_expert.up_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00049.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00049.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00049.safetensors", + "model.layers.10.feed_forward.experts.down_proj": "model-00012-of-00049.safetensors", + "model.layers.10.feed_forward.experts.gate_up_proj": "model-00011-of-00049.safetensors", + "model.layers.10.feed_forward.router.weight": "model-00012-of-00049.safetensors", + "model.layers.10.feed_forward.shared_expert.down_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.10.feed_forward.shared_expert.gate_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.10.feed_forward.shared_expert.up_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.10.input_layernorm.weight": "model-00012-of-00049.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00012-of-00049.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.11.feed_forward.experts.down_proj": "model-00013-of-00049.safetensors", + "model.layers.11.feed_forward.experts.gate_up_proj": "model-00012-of-00049.safetensors", + "model.layers.11.feed_forward.router.weight": "model-00013-of-00049.safetensors", + "model.layers.11.feed_forward.shared_expert.down_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.11.feed_forward.shared_expert.gate_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.11.feed_forward.shared_expert.up_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.11.input_layernorm.weight": "model-00013-of-00049.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00013-of-00049.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00012-of-00049.safetensors", + "model.layers.12.feed_forward.experts.down_proj": "model-00014-of-00049.safetensors", + "model.layers.12.feed_forward.experts.gate_up_proj": "model-00013-of-00049.safetensors", + "model.layers.12.feed_forward.router.weight": "model-00014-of-00049.safetensors", + "model.layers.12.feed_forward.shared_expert.down_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.12.feed_forward.shared_expert.gate_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.12.feed_forward.shared_expert.up_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.12.input_layernorm.weight": "model-00014-of-00049.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00014-of-00049.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00013-of-00049.safetensors", + "model.layers.13.feed_forward.experts.down_proj": "model-00015-of-00049.safetensors", + "model.layers.13.feed_forward.experts.gate_up_proj": "model-00014-of-00049.safetensors", + "model.layers.13.feed_forward.router.weight": "model-00015-of-00049.safetensors", + "model.layers.13.feed_forward.shared_expert.down_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.13.feed_forward.shared_expert.gate_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.13.feed_forward.shared_expert.up_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.13.input_layernorm.weight": "model-00015-of-00049.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00015-of-00049.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00014-of-00049.safetensors", + "model.layers.14.feed_forward.experts.down_proj": "model-00016-of-00049.safetensors", + "model.layers.14.feed_forward.experts.gate_up_proj": "model-00015-of-00049.safetensors", + "model.layers.14.feed_forward.router.weight": "model-00016-of-00049.safetensors", + "model.layers.14.feed_forward.shared_expert.down_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.14.feed_forward.shared_expert.gate_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.14.feed_forward.shared_expert.up_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.14.input_layernorm.weight": "model-00016-of-00049.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00016-of-00049.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00015-of-00049.safetensors", + "model.layers.15.feed_forward.experts.down_proj": "model-00017-of-00049.safetensors", + "model.layers.15.feed_forward.experts.gate_up_proj": "model-00016-of-00049.safetensors", + "model.layers.15.feed_forward.router.weight": "model-00017-of-00049.safetensors", + "model.layers.15.feed_forward.shared_expert.down_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.15.feed_forward.shared_expert.gate_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.15.feed_forward.shared_expert.up_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.15.input_layernorm.weight": "model-00017-of-00049.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00017-of-00049.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00016-of-00049.safetensors", + "model.layers.16.feed_forward.experts.down_proj": "model-00018-of-00049.safetensors", + "model.layers.16.feed_forward.experts.gate_up_proj": "model-00017-of-00049.safetensors", + "model.layers.16.feed_forward.router.weight": "model-00018-of-00049.safetensors", + "model.layers.16.feed_forward.shared_expert.down_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.16.feed_forward.shared_expert.gate_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.16.feed_forward.shared_expert.up_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.16.input_layernorm.weight": "model-00018-of-00049.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00018-of-00049.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00017-of-00049.safetensors", + "model.layers.17.feed_forward.experts.down_proj": "model-00019-of-00049.safetensors", + "model.layers.17.feed_forward.experts.gate_up_proj": "model-00018-of-00049.safetensors", + "model.layers.17.feed_forward.router.weight": "model-00019-of-00049.safetensors", + "model.layers.17.feed_forward.shared_expert.down_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.17.feed_forward.shared_expert.gate_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.17.feed_forward.shared_expert.up_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.17.input_layernorm.weight": "model-00019-of-00049.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00019-of-00049.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00018-of-00049.safetensors", + "model.layers.18.feed_forward.experts.down_proj": "model-00020-of-00049.safetensors", + "model.layers.18.feed_forward.experts.gate_up_proj": "model-00019-of-00049.safetensors", + "model.layers.18.feed_forward.router.weight": "model-00020-of-00049.safetensors", + "model.layers.18.feed_forward.shared_expert.down_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.18.feed_forward.shared_expert.gate_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.18.feed_forward.shared_expert.up_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.18.input_layernorm.weight": "model-00020-of-00049.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00020-of-00049.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00019-of-00049.safetensors", + "model.layers.19.feed_forward.experts.down_proj": "model-00021-of-00049.safetensors", + "model.layers.19.feed_forward.experts.gate_up_proj": "model-00020-of-00049.safetensors", + "model.layers.19.feed_forward.router.weight": "model-00021-of-00049.safetensors", + "model.layers.19.feed_forward.shared_expert.down_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.19.feed_forward.shared_expert.gate_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.19.feed_forward.shared_expert.up_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.19.input_layernorm.weight": "model-00021-of-00049.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00021-of-00049.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00049.safetensors", + "model.layers.2.feed_forward.experts.down_proj": "model-00004-of-00049.safetensors", + "model.layers.2.feed_forward.experts.gate_up_proj": "model-00003-of-00049.safetensors", + "model.layers.2.feed_forward.router.weight": "model-00004-of-00049.safetensors", + "model.layers.2.feed_forward.shared_expert.down_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.2.feed_forward.shared_expert.gate_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.2.feed_forward.shared_expert.up_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00049.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00049.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00049.safetensors", + "model.layers.20.feed_forward.experts.down_proj": "model-00022-of-00049.safetensors", + "model.layers.20.feed_forward.experts.gate_up_proj": "model-00021-of-00049.safetensors", + "model.layers.20.feed_forward.router.weight": "model-00022-of-00049.safetensors", + "model.layers.20.feed_forward.shared_expert.down_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.20.feed_forward.shared_expert.gate_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.20.feed_forward.shared_expert.up_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.20.input_layernorm.weight": "model-00022-of-00049.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00022-of-00049.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00049.safetensors", + "model.layers.21.feed_forward.experts.down_proj": "model-00023-of-00049.safetensors", + "model.layers.21.feed_forward.experts.gate_up_proj": "model-00022-of-00049.safetensors", + "model.layers.21.feed_forward.router.weight": "model-00023-of-00049.safetensors", + "model.layers.21.feed_forward.shared_expert.down_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.21.feed_forward.shared_expert.gate_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.21.feed_forward.shared_expert.up_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.21.input_layernorm.weight": "model-00023-of-00049.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00023-of-00049.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00022-of-00049.safetensors", + "model.layers.22.feed_forward.experts.down_proj": "model-00024-of-00049.safetensors", + "model.layers.22.feed_forward.experts.gate_up_proj": "model-00023-of-00049.safetensors", + "model.layers.22.feed_forward.router.weight": "model-00024-of-00049.safetensors", + "model.layers.22.feed_forward.shared_expert.down_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.22.feed_forward.shared_expert.gate_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.22.feed_forward.shared_expert.up_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.22.input_layernorm.weight": "model-00024-of-00049.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00024-of-00049.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00023-of-00049.safetensors", + "model.layers.23.feed_forward.experts.down_proj": "model-00025-of-00049.safetensors", + "model.layers.23.feed_forward.experts.gate_up_proj": "model-00024-of-00049.safetensors", + "model.layers.23.feed_forward.router.weight": "model-00025-of-00049.safetensors", + "model.layers.23.feed_forward.shared_expert.down_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.23.feed_forward.shared_expert.gate_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.23.feed_forward.shared_expert.up_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.23.input_layernorm.weight": "model-00025-of-00049.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00025-of-00049.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00024-of-00049.safetensors", + "model.layers.24.feed_forward.experts.down_proj": "model-00026-of-00049.safetensors", + "model.layers.24.feed_forward.experts.gate_up_proj": "model-00025-of-00049.safetensors", + "model.layers.24.feed_forward.router.weight": "model-00026-of-00049.safetensors", + "model.layers.24.feed_forward.shared_expert.down_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.24.feed_forward.shared_expert.gate_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.24.feed_forward.shared_expert.up_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.24.input_layernorm.weight": "model-00026-of-00049.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00026-of-00049.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00025-of-00049.safetensors", + "model.layers.25.feed_forward.experts.down_proj": "model-00027-of-00049.safetensors", + "model.layers.25.feed_forward.experts.gate_up_proj": "model-00026-of-00049.safetensors", + "model.layers.25.feed_forward.router.weight": "model-00027-of-00049.safetensors", + "model.layers.25.feed_forward.shared_expert.down_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.25.feed_forward.shared_expert.gate_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.25.feed_forward.shared_expert.up_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.25.input_layernorm.weight": "model-00027-of-00049.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00027-of-00049.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00026-of-00049.safetensors", + "model.layers.26.feed_forward.experts.down_proj": "model-00028-of-00049.safetensors", + "model.layers.26.feed_forward.experts.gate_up_proj": "model-00027-of-00049.safetensors", + "model.layers.26.feed_forward.router.weight": "model-00028-of-00049.safetensors", + "model.layers.26.feed_forward.shared_expert.down_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.26.feed_forward.shared_expert.gate_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.26.feed_forward.shared_expert.up_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.26.input_layernorm.weight": "model-00028-of-00049.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00028-of-00049.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00027-of-00049.safetensors", + "model.layers.27.feed_forward.experts.down_proj": "model-00029-of-00049.safetensors", + "model.layers.27.feed_forward.experts.gate_up_proj": "model-00028-of-00049.safetensors", + "model.layers.27.feed_forward.router.weight": "model-00029-of-00049.safetensors", + "model.layers.27.feed_forward.shared_expert.down_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.27.feed_forward.shared_expert.gate_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.27.feed_forward.shared_expert.up_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.27.input_layernorm.weight": "model-00029-of-00049.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00029-of-00049.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00028-of-00049.safetensors", + "model.layers.28.feed_forward.experts.down_proj": "model-00030-of-00049.safetensors", + "model.layers.28.feed_forward.experts.gate_up_proj": "model-00029-of-00049.safetensors", + "model.layers.28.feed_forward.router.weight": "model-00030-of-00049.safetensors", + "model.layers.28.feed_forward.shared_expert.down_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.28.feed_forward.shared_expert.gate_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.28.feed_forward.shared_expert.up_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.28.input_layernorm.weight": "model-00030-of-00049.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00030-of-00049.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00029-of-00049.safetensors", + "model.layers.29.feed_forward.experts.down_proj": "model-00031-of-00049.safetensors", + "model.layers.29.feed_forward.experts.gate_up_proj": "model-00030-of-00049.safetensors", + "model.layers.29.feed_forward.router.weight": "model-00031-of-00049.safetensors", + "model.layers.29.feed_forward.shared_expert.down_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.29.feed_forward.shared_expert.gate_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.29.feed_forward.shared_expert.up_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.29.input_layernorm.weight": "model-00031-of-00049.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00031-of-00049.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00030-of-00049.safetensors", + "model.layers.3.feed_forward.experts.down_proj": "model-00005-of-00049.safetensors", + "model.layers.3.feed_forward.experts.gate_up_proj": "model-00004-of-00049.safetensors", + "model.layers.3.feed_forward.router.weight": "model-00005-of-00049.safetensors", + "model.layers.3.feed_forward.shared_expert.down_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.3.feed_forward.shared_expert.gate_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.3.feed_forward.shared_expert.up_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00049.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00049.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00049.safetensors", + "model.layers.30.feed_forward.experts.down_proj": "model-00032-of-00049.safetensors", + "model.layers.30.feed_forward.experts.gate_up_proj": "model-00031-of-00049.safetensors", + "model.layers.30.feed_forward.router.weight": "model-00032-of-00049.safetensors", + "model.layers.30.feed_forward.shared_expert.down_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.30.feed_forward.shared_expert.gate_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.30.feed_forward.shared_expert.up_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.30.input_layernorm.weight": "model-00032-of-00049.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00032-of-00049.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00031-of-00049.safetensors", + "model.layers.31.feed_forward.experts.down_proj": "model-00033-of-00049.safetensors", + "model.layers.31.feed_forward.experts.gate_up_proj": "model-00032-of-00049.safetensors", + "model.layers.31.feed_forward.router.weight": "model-00033-of-00049.safetensors", + "model.layers.31.feed_forward.shared_expert.down_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.31.feed_forward.shared_expert.gate_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.31.feed_forward.shared_expert.up_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.31.input_layernorm.weight": "model-00033-of-00049.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00033-of-00049.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00032-of-00049.safetensors", + "model.layers.32.feed_forward.experts.down_proj": "model-00034-of-00049.safetensors", + "model.layers.32.feed_forward.experts.gate_up_proj": "model-00033-of-00049.safetensors", + "model.layers.32.feed_forward.router.weight": "model-00034-of-00049.safetensors", + "model.layers.32.feed_forward.shared_expert.down_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.32.feed_forward.shared_expert.gate_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.32.feed_forward.shared_expert.up_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.32.input_layernorm.weight": "model-00034-of-00049.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00034-of-00049.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00033-of-00049.safetensors", + "model.layers.33.feed_forward.experts.down_proj": "model-00035-of-00049.safetensors", + "model.layers.33.feed_forward.experts.gate_up_proj": "model-00034-of-00049.safetensors", + "model.layers.33.feed_forward.router.weight": "model-00035-of-00049.safetensors", + "model.layers.33.feed_forward.shared_expert.down_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.33.feed_forward.shared_expert.gate_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.33.feed_forward.shared_expert.up_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.33.input_layernorm.weight": "model-00035-of-00049.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00035-of-00049.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00034-of-00049.safetensors", + "model.layers.34.feed_forward.experts.down_proj": "model-00036-of-00049.safetensors", + "model.layers.34.feed_forward.experts.gate_up_proj": "model-00035-of-00049.safetensors", + "model.layers.34.feed_forward.router.weight": "model-00036-of-00049.safetensors", + "model.layers.34.feed_forward.shared_expert.down_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.34.feed_forward.shared_expert.gate_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.34.feed_forward.shared_expert.up_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.34.input_layernorm.weight": "model-00036-of-00049.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00036-of-00049.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00035-of-00049.safetensors", + "model.layers.35.feed_forward.experts.down_proj": "model-00037-of-00049.safetensors", + "model.layers.35.feed_forward.experts.gate_up_proj": "model-00036-of-00049.safetensors", + "model.layers.35.feed_forward.router.weight": "model-00037-of-00049.safetensors", + "model.layers.35.feed_forward.shared_expert.down_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.35.feed_forward.shared_expert.gate_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.35.feed_forward.shared_expert.up_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.35.input_layernorm.weight": "model-00037-of-00049.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00037-of-00049.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00036-of-00049.safetensors", + "model.layers.36.feed_forward.experts.down_proj": "model-00038-of-00049.safetensors", + "model.layers.36.feed_forward.experts.gate_up_proj": "model-00037-of-00049.safetensors", + "model.layers.36.feed_forward.router.weight": "model-00038-of-00049.safetensors", + "model.layers.36.feed_forward.shared_expert.down_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.36.feed_forward.shared_expert.gate_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.36.feed_forward.shared_expert.up_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.36.input_layernorm.weight": "model-00038-of-00049.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00038-of-00049.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00037-of-00049.safetensors", + "model.layers.37.feed_forward.experts.down_proj": "model-00039-of-00049.safetensors", + "model.layers.37.feed_forward.experts.gate_up_proj": "model-00038-of-00049.safetensors", + "model.layers.37.feed_forward.router.weight": "model-00039-of-00049.safetensors", + "model.layers.37.feed_forward.shared_expert.down_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.37.feed_forward.shared_expert.gate_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.37.feed_forward.shared_expert.up_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.37.input_layernorm.weight": "model-00039-of-00049.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00039-of-00049.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00038-of-00049.safetensors", + "model.layers.38.feed_forward.experts.down_proj": "model-00040-of-00049.safetensors", + "model.layers.38.feed_forward.experts.gate_up_proj": "model-00039-of-00049.safetensors", + "model.layers.38.feed_forward.router.weight": "model-00040-of-00049.safetensors", + "model.layers.38.feed_forward.shared_expert.down_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.38.feed_forward.shared_expert.gate_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.38.feed_forward.shared_expert.up_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.38.input_layernorm.weight": "model-00040-of-00049.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00040-of-00049.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00039-of-00049.safetensors", + "model.layers.39.feed_forward.experts.down_proj": "model-00041-of-00049.safetensors", + "model.layers.39.feed_forward.experts.gate_up_proj": "model-00040-of-00049.safetensors", + "model.layers.39.feed_forward.router.weight": "model-00041-of-00049.safetensors", + "model.layers.39.feed_forward.shared_expert.down_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.39.feed_forward.shared_expert.gate_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.39.feed_forward.shared_expert.up_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.39.input_layernorm.weight": "model-00041-of-00049.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00041-of-00049.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00040-of-00049.safetensors", + "model.layers.4.feed_forward.experts.down_proj": "model-00006-of-00049.safetensors", + "model.layers.4.feed_forward.experts.gate_up_proj": "model-00005-of-00049.safetensors", + "model.layers.4.feed_forward.router.weight": "model-00006-of-00049.safetensors", + "model.layers.4.feed_forward.shared_expert.down_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.4.feed_forward.shared_expert.gate_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.4.feed_forward.shared_expert.up_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.4.input_layernorm.weight": "model-00006-of-00049.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00006-of-00049.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00049.safetensors", + "model.layers.40.feed_forward.experts.down_proj": "model-00042-of-00049.safetensors", + "model.layers.40.feed_forward.experts.gate_up_proj": "model-00041-of-00049.safetensors", + "model.layers.40.feed_forward.router.weight": "model-00042-of-00049.safetensors", + "model.layers.40.feed_forward.shared_expert.down_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.40.feed_forward.shared_expert.gate_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.40.feed_forward.shared_expert.up_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.40.input_layernorm.weight": "model-00042-of-00049.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00042-of-00049.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00041-of-00049.safetensors", + "model.layers.41.feed_forward.experts.down_proj": "model-00043-of-00049.safetensors", + "model.layers.41.feed_forward.experts.gate_up_proj": "model-00042-of-00049.safetensors", + "model.layers.41.feed_forward.router.weight": "model-00043-of-00049.safetensors", + "model.layers.41.feed_forward.shared_expert.down_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.41.feed_forward.shared_expert.gate_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.41.feed_forward.shared_expert.up_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.41.input_layernorm.weight": "model-00043-of-00049.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00043-of-00049.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00042-of-00049.safetensors", + "model.layers.42.feed_forward.experts.down_proj": "model-00044-of-00049.safetensors", + "model.layers.42.feed_forward.experts.gate_up_proj": "model-00043-of-00049.safetensors", + "model.layers.42.feed_forward.router.weight": "model-00044-of-00049.safetensors", + "model.layers.42.feed_forward.shared_expert.down_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.42.feed_forward.shared_expert.gate_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.42.feed_forward.shared_expert.up_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.42.input_layernorm.weight": "model-00044-of-00049.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00044-of-00049.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00043-of-00049.safetensors", + "model.layers.43.feed_forward.experts.down_proj": "model-00045-of-00049.safetensors", + "model.layers.43.feed_forward.experts.gate_up_proj": "model-00044-of-00049.safetensors", + "model.layers.43.feed_forward.router.weight": "model-00045-of-00049.safetensors", + "model.layers.43.feed_forward.shared_expert.down_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.43.feed_forward.shared_expert.gate_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.43.feed_forward.shared_expert.up_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.43.input_layernorm.weight": "model-00045-of-00049.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00045-of-00049.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00044-of-00049.safetensors", + "model.layers.44.feed_forward.experts.down_proj": "model-00046-of-00049.safetensors", + "model.layers.44.feed_forward.experts.gate_up_proj": "model-00045-of-00049.safetensors", + "model.layers.44.feed_forward.router.weight": "model-00046-of-00049.safetensors", + "model.layers.44.feed_forward.shared_expert.down_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.44.feed_forward.shared_expert.gate_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.44.feed_forward.shared_expert.up_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.44.input_layernorm.weight": "model-00046-of-00049.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00046-of-00049.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00045-of-00049.safetensors", + "model.layers.45.feed_forward.experts.down_proj": "model-00047-of-00049.safetensors", + "model.layers.45.feed_forward.experts.gate_up_proj": "model-00046-of-00049.safetensors", + "model.layers.45.feed_forward.router.weight": "model-00047-of-00049.safetensors", + "model.layers.45.feed_forward.shared_expert.down_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.45.feed_forward.shared_expert.gate_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.45.feed_forward.shared_expert.up_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.45.input_layernorm.weight": "model-00047-of-00049.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00047-of-00049.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00046-of-00049.safetensors", + "model.layers.46.feed_forward.experts.down_proj": "model-00048-of-00049.safetensors", + "model.layers.46.feed_forward.experts.gate_up_proj": "model-00047-of-00049.safetensors", + "model.layers.46.feed_forward.router.weight": "model-00048-of-00049.safetensors", + "model.layers.46.feed_forward.shared_expert.down_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.46.feed_forward.shared_expert.gate_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.46.feed_forward.shared_expert.up_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.46.input_layernorm.weight": "model-00048-of-00049.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00048-of-00049.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00047-of-00049.safetensors", + "model.layers.47.feed_forward.experts.down_proj": "model-00049-of-00049.safetensors", + "model.layers.47.feed_forward.experts.gate_up_proj": "model-00048-of-00049.safetensors", + "model.layers.47.feed_forward.router.weight": "model-00049-of-00049.safetensors", + "model.layers.47.feed_forward.shared_expert.down_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.47.feed_forward.shared_expert.gate_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.47.feed_forward.shared_expert.up_proj.weight": "model-00049-of-00049.safetensors", + "model.layers.47.input_layernorm.weight": "model-00049-of-00049.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00049-of-00049.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00048-of-00049.safetensors", + "model.layers.5.feed_forward.experts.down_proj": "model-00007-of-00049.safetensors", + "model.layers.5.feed_forward.experts.gate_up_proj": "model-00006-of-00049.safetensors", + "model.layers.5.feed_forward.router.weight": "model-00007-of-00049.safetensors", + "model.layers.5.feed_forward.shared_expert.down_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.5.feed_forward.shared_expert.gate_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.5.feed_forward.shared_expert.up_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.5.input_layernorm.weight": "model-00007-of-00049.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00007-of-00049.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00049.safetensors", + "model.layers.6.feed_forward.experts.down_proj": "model-00008-of-00049.safetensors", + "model.layers.6.feed_forward.experts.gate_up_proj": "model-00007-of-00049.safetensors", + "model.layers.6.feed_forward.router.weight": "model-00008-of-00049.safetensors", + "model.layers.6.feed_forward.shared_expert.down_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.6.feed_forward.shared_expert.gate_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.6.feed_forward.shared_expert.up_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.6.input_layernorm.weight": "model-00008-of-00049.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00008-of-00049.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00007-of-00049.safetensors", + "model.layers.7.feed_forward.experts.down_proj": "model-00009-of-00049.safetensors", + "model.layers.7.feed_forward.experts.gate_up_proj": "model-00008-of-00049.safetensors", + "model.layers.7.feed_forward.router.weight": "model-00009-of-00049.safetensors", + "model.layers.7.feed_forward.shared_expert.down_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.7.feed_forward.shared_expert.gate_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.7.feed_forward.shared_expert.up_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.7.input_layernorm.weight": "model-00009-of-00049.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00049.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00008-of-00049.safetensors", + "model.layers.8.feed_forward.experts.down_proj": "model-00010-of-00049.safetensors", + "model.layers.8.feed_forward.experts.gate_up_proj": "model-00009-of-00049.safetensors", + "model.layers.8.feed_forward.router.weight": "model-00010-of-00049.safetensors", + "model.layers.8.feed_forward.shared_expert.down_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.8.feed_forward.shared_expert.gate_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.8.feed_forward.shared_expert.up_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.8.input_layernorm.weight": "model-00010-of-00049.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00049.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00049.safetensors", + "model.layers.9.feed_forward.experts.down_proj": "model-00011-of-00049.safetensors", + "model.layers.9.feed_forward.experts.gate_up_proj": "model-00010-of-00049.safetensors", + "model.layers.9.feed_forward.router.weight": "model-00011-of-00049.safetensors", + "model.layers.9.feed_forward.shared_expert.down_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.9.feed_forward.shared_expert.gate_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.9.feed_forward.shared_expert.up_proj.weight": "model-00011-of-00049.safetensors", + "model.layers.9.input_layernorm.weight": "model-00011-of-00049.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00011-of-00049.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00049.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00049.safetensors", + "model.norm.weight": "model-00049-of-00049.safetensors" + } +}