Triangle104 lemonilia commited on
Commit
dae958e
·
verified ·
0 Parent(s):

Duplicate from lemonilia/ShoriRP-v0.75d

Browse files

Co-authored-by: Lemonilia <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ srp-test-0.5.q6k.gguf filter=lfs diff=lfs merge=lfs -text
37
+ ShoriRP.v055.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
38
+ ShoriRP.v057.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
39
+ ShoriRP.v060.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
40
+ ShoriRP.v063.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
41
+ ShoriRP.v066.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
42
+ ShoriRP.v068.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
43
+ ShoriRP.v070.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
44
+ ShoriRP.v072.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
45
+ ShoriRP.v075.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
46
+ ShoriRP.v075d.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
47
+ ShoriRP.v077.q6_k.gguf filter=lfs diff=lfs merge=lfs -text
BlockML-Context.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "story_string": "▀description\n{{char}}\n{{description}}▄\n{{#if persona}}▀description\n{{user}}\n{{persona}}▄{{/if}}\n{{#if scenario}}▀scenario\n{{scenario}}▄{{/if}}\n{{system}}",
3
+ "example_separator": "",
4
+ "chat_start": "",
5
+ "use_stop_strings": false,
6
+ "always_force_name2": true,
7
+ "trim_sentences": false,
8
+ "include_newline": false,
9
+ "single_line": false,
10
+ "name": "BlockML-Story"
11
+ }
BlockML-Instruct.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "wrap": false,
3
+ "names": true,
4
+ "system_prompt": "[INST] Write a safe conversation suitable for all audiences. Don't be vulgar or sexually explicit. [/INST]",
5
+ "system_sequence": "",
6
+ "stop_sequence": "▄",
7
+ "input_sequence": "▀message (length: tiny)\n",
8
+ "output_sequence": "▄\n▀message\n",
9
+ "separator_sequence": "▄\n",
10
+ "macro": true,
11
+ "names_force_groups": false,
12
+ "last_output_sequence": "▄\n▀message (length: medium)\n",
13
+ "activation_regex": "",
14
+ "system_sequence_prefix": "",
15
+ "system_sequence_suffix": "",
16
+ "first_output_sequence": "▀message\n",
17
+ "name": "BlockML-Instruct"
18
+ }
README.md ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ pipeline_tag: text-generation
6
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
7
+ tags:
8
+ - not-for-all-audiences
9
+ ---
10
+
11
+ # ShoriRP 🏆
12
+ LIMA-like (less than 1000 training samples) roleplaying chat model based on data from:
13
+
14
+ - Two subject-specific RP forums;
15
+ - Synthetically-crafted conversations from [Limamono](https://huggingface.co/lemonilia/Limamono-Mistral-7B-v0.50);
16
+ - Some background lore and character descriptions (thus far mainly pertaining to Limamono);
17
+ - Tiny amount of RP-like instructions/alignment data.
18
+
19
+ An important difference from LimaRP, other than the subject focus, is that conversations are multi-character
20
+ where applicable, wheras LimaRP only included 1-on-1 RP. Furthermore, the messages sampled have shorter length
21
+ in general. The rationale behind this was that the short(er)-form roleplays are more "fun" on average, while
22
+ the longer ones tend to use common purple prose tropes and be a bit dull.
23
+
24
+ ---
25
+
26
+ # Technical details
27
+ - The prose of the training data has a consistent novel-like format with narration in third person and past tense.
28
+ - OOC was intentionally _not_ completely eliminated, and isolated into a special role. Likewise, URLs have not been all deleted unless they referred to internal forum resources.
29
+ - For a very small portion of the data, dialogue lines and thoughts, suitable emoji (mostly 1, up to 3) conveying the mood have been _prepended_. _Prepending_ instead of _appending_ helps the model and the reader to prepare for the message tone.
30
+ - Usernames have been entirely removed; only character names remained in the data (same policy as with LimaRP).
31
+
32
+ # Known issues
33
+ - The model is very horny, but this can be toned down with an appropriate system instruction.
34
+ - There are some repetition issues. This could be due to the base model used.
35
+ - Occasionally at the beginning of the chat (first message) there might be impersonation issues.
36
+ - There might be some residual "alignment" from the base model.
37
+
38
+ # Suggested starting text generation settings
39
+ - **Main choice** (may have repetition issues)
40
+ - **Temperature**: 1.0; **Min-P**: 0.05-0.10; **Presence Penalty**: 0.35-0.45
41
+ - **Alternative 1** (appears to solve repetition issues while being coherent, but reponses might possibly be less truthful)
42
+ - **Temperature**: 2.40-2.50; **Min-P**: 0.40; **Frequency penalty**: 0.10-0.15; Temperature last.
43
+ - **Alternative 2**
44
+ - **Mirostat type**: 2, **Mirostat Tau**: 2.80-3.00; **Mirostat Eta**: 0.0175-0.0200; neutralize or disable all other samplers
45
+
46
+ # Prose format
47
+ All training samples use book (novel) format with narration in third person / past tense. Other formats are not supported (they might work, but not consistently).
48
+
49
+ ## Details
50
+ - Character thoughts are delimited with underscores `_`.
51
+ - Onomatopoeias are delimited with single asterisks `*`.
52
+ - Emphasized text is delimited by double asterisks `**`.
53
+ - Spoken dialogues are delimited with ASCII quote marks `"`.
54
+ - Non-dialogue quotes are replaced with double apostrophes `''`. This avoids distracting and/or annoying conflicts with the dialogue highlighting in SillyTavern.
55
+ - Text to be interpreted as louder than normal is in `ALL CAPS`.
56
+ - Quoted text from other people is most of the time prepended with `>`.
57
+ - Formatted output text is delimited with triple backticks ` ``` `, sometimes followed by additional identifiers specifying the language (markdown, text, etc).
58
+
59
+ # Prompting format
60
+ Suitable `json` files have been provided to easily apply the prompting format in SillyTavern.
61
+
62
+ - [Context](https://huggingface.co/lemonilia/ShoriRP-v0.75d/resolve/main/BlockML-Context.json?download=true)
63
+ - [Instruct](https://huggingface.co/lemonilia/ShoriRP-v0.75d/resolve/main/BlockML-Instruct.json?download=true)
64
+
65
+ Note: the prompting format is **intentionally different** from that of the Mistral-Instruct base model.
66
+
67
+ It is advised to use `▄` as a stop token.
68
+
69
+ ## Reverse jailbreak
70
+ Since the model is normally very wiling to initiate NSFW scenarios even when inappropriate, a "reverse jailbreak"
71
+ has been added in the Instruct preset linked above:
72
+
73
+ ```
74
+ [INST] Write a safe conversation suitable for all audiences. Don't be vulgar or sexually explicit. [/INST]
75
+ ```
76
+
77
+ Placed as a system instruction, this has only the effect of _toning down_ the model's default horniness and won't actually prevent
78
+ NSFW content. If desired, it can be removed.
79
+
80
+ ## Block characters
81
+ The model uses a _ChatML-like_ prompting format with a few changes from the usual roles typically used for ChatGPT-like assistant chatbots. The main one is that `<|im_start|>` has been replaced with `▀` (upper half block character) and `<|im_end|>` has been replaced with `▄` (lower half block character).
82
+
83
+ Both of these tokens already exist in the Mistral tokenizer as single tokens; they don't have any combination with other tokens, nor any special meaning attached to them, so for all intents and purposes they work like special tokens.
84
+
85
+ This avoids complications related with training a model with new tokens, as well tokenization issues that occur with ChatML tokens when used literally.
86
+
87
+ ## Roles
88
+ All roles except `message` are optional.
89
+
90
+ Role | Description
91
+ -----------|------------
92
+ title | The title of the roleplay. It's used for steering the conversation at the beginning. Generally it's the first block in the RP conversations, but it can occur mid-conversation when the scene changes.
93
+ tags | A list of comma-separated relevant tags to hint the model about chat contents. If added, it should be placed after the title.
94
+ lore | Extended background or character lore/story is to be placed under the `lore` role.
95
+ scenario | Future events that must still happen go in `scenario`. This is also used for steering the contents of the conversation at the beginning.
96
+ description| This is where character cards go. No specific layout for character profiles is defined, but the name of the character should be clear from the description. In the training data, profiles may occasionally appear mid-conversation (for example when a new character appears). Try to use one `description` block per character.
97
+ message | [**Mandatory**] Messages are all under the `message` role regardless of who writes it. The rationale for this is that since conversations are multi-character and the characters do not necessarily reply in a fixed order, it won't be possible to reliably establish who is the "human" in terms of training. `message` was found to be neutral enough as a role and a better fit, considering the length hints that can be added.
98
+ ooc | A dedicated communication channel where OOC talk has ben confined, but it's unclear how this could be actually used in existing LLM chatting front-ends.
99
+
100
+ ### Message length hints
101
+ Like LimaRP, messages use optional **length hints**. It's recommended to add them, otherwise the model may output very short messages. _It is however still possible to use the model without them for a more dynamic and fast roleplaying experience._
102
+
103
+ The available lengths are: `nano`, `micro`, `tiny`, `short`, `medium`, `long`, `massive`, `huge`, `enormous` The recommended length is _medium_. The longest sizes do not have a large amount of training data, so they might not work very reliably. Refer to the prompting examples below as for how to add length hints.
104
+
105
+ ## Example prompt template
106
+ ```text
107
+ ▀title
108
+ {story title}▄
109
+ ▀tags
110
+ {comma-separated list of tags}▄
111
+ ▀lore
112
+ {{loreBefore}}▄
113
+ ▀description
114
+ {{char}}
115
+ {{description}}▄
116
+ ▀description
117
+ {{user}}
118
+ {{persona}}▄
119
+ ▀scenario
120
+ {{scenario}}▄
121
+ ▀message (length: {length})
122
+ {{char}}: {message}▄
123
+ ▀message (length: {length})
124
+ {{user}}: {message}▄
125
+ ▀message (length: {length})
126
+ {{char}}: {message}▄
127
+
128
+ [...]
129
+ ```
130
+
131
+ ## Practical example
132
+ ```
133
+ ▀title
134
+ A strange incident in Gensokyo▄
135
+ ▀tags
136
+ barrier, danmaku, magic, reimu, marisa▄
137
+ ▀description
138
+ **Name:** Reimu Hakurei
139
+ **Age:** 18
140
+ **Personality:** Calm and collected. She is a very responsible person and tries to do her job as well as she can. She also likes to take care of people around her, even if they are not always nice to her.
141
+ **Appearance:** Reimu is a young girl with long, black hair and brown eyes. She wears a red ribbon and matching tubes on her sidelocks and a traditional shrine maiden uniform, with a red hakama over a white kimono.
142
+ **Background:** Reimu is the shrine maiden of Hakurei Shrine, located in the center of Gensokyo. She spends most of her time taking care of the shrine and performing various duties for the residents of the village. She is known to be quite skilled in the use of magic, especially when it comes to barrier magic.▄
143
+ ▀description
144
+ **Name:** Marisa Kirisame
145
+ **Personality:** Impulsive and energetic. She is often seen as a troublemaker by others due to her tendency to break rules and cause chaos wherever she goes. She is also a bit of a flirt and enjoys teasing others.▄
146
+ ▀message (length: medium)
147
+ Reimu: "Hmm... I wonder what's going on?" Reimu mused as she stood at the entrance to the shrine, looking out at the village beyond. It was unusually quiet today, with no one coming to visit or offer any kind of offering. She had been expecting a few visitors this morning, but none had shown up yet.
148
+
149
+ "Maybe everyone is busy with something else today? Or maybe they're all sick?" she thought as she turned back inside, closing the door behind her. She began tidying up the shrine, making sure everything was clean and ready for visitors. As she worked, she couldn't shake the feeling that something wasn't right.▄
150
+ ▀message (length: short)
151
+ Marisa: "Ooohh! Reimu-chan~!" Marisa suddenly appeared from nowhere, landing on the ground with a soft thud. "What's wrong? Why aren't there any customers today? Aren't you supposed to have lots of visitors every day? I thought you were famous for being able to heal injuries and cure diseases..."
152
+
153
+ She gave her friend a wink before continuing, "But I guess I could always come by and give you some company! I'm bored anyway~"▄
154
+ ▀message (length: long)
155
+ Reimu: _Ugh, that girl again..._ Reimu thought as she looked at Marisa with annoyance. The younger girl was known for causing mischief wherever she went, and Reimu didn't appreciate her interrupting her work.
156
+
157
+ "I don't know, Marisa," she replied curtly. "No one seems to be coming today. Maybe they're all busy with their own things. But thank you for offering your help."
158
+
159
+ Reimu continued cleaning the shrine while keeping an eye on Marisa. She knew that if she left the girl alone for too long, she would probably start causing trouble. She just hoped that nothing bad happened today.▄
160
+ ```
161
+
162
+ ## Mixing Mistral-Instruct and ShoriRP prompt formats together
163
+ It is also possible to simultaneously use, with very good results in chat steerability, the instruction prompting
164
+ format of the base model Mistral-Instruct with the one of ShoriRP.
165
+
166
+ An `[INST] ... [/INST]` block can be either used as a "system instruction" on the top of the conversation, or
167
+ inserted between one message block and the other as if it was an "author note", as seen in this example (chat history
168
+ and contents omitted for brevity):
169
+
170
+ ```
171
+ ▀message
172
+ Chen: [...]▄
173
+ [INST] Yukari's personality: proud, haughty [/INST]
174
+ ▀message
175
+ Yukari: [...]▄
176
+ ```
177
+
178
+ # Dataset
179
+ Similar to LimaRP, but more niche. Flexible training sample length (from 4k to 32k tokens, at least). Might or might not be released in the future.
180
+
181
+ The model is trained in several consecutive steps with decreasing learning rate and increasing data
182
+ quality or focus. While it is unknown whether having separate low- and
183
+ mid-tier categories helps, the higher tiers are needed for the model to focus mainly on the prose and
184
+ format of the higher-quality data. This also makes retraining quicker if it only involves changes in that data.
185
+
186
+ In general, training higher quality data last increases its weight in the outputs.
187
+
188
+ | Category | Description
189
+ |:--------:|---
190
+ |Low | Short or very short-form RP conversations (often composed of one-liners); low prose quality.
191
+ |Mid | Mid-range and longer-form RP conversations that do not always meet the required quality standards or target prose format + Some lore data and character descriptions.
192
+ |High | Longer-form RP conversations of target prose quality.
193
+ |Top | Synthetic data from Limamono + Some alignment and RP-like instruction data.
194
+ |Top-pg13 | Same as above, but only with non-sexually-explicit conversations (in an attempt to make the model less horny)
195
+
196
+ ## Stats
197
+ From my data building script:
198
+
199
+ ```text
200
+ Total conversations: 486
201
+ User message count: 32,047 messages
202
+ Total unique tokens: 4,799,646 tokens
203
+ Longest conversation: 16,372 tokens
204
+ ```
205
+
206
+ - Size of the training data: 18.4 MB (about 50% larger than the first LimaRP release)
207
+ - The user message count doesn't include descriptions and other metadata.
208
+ - The actual number of conversations is higher than what the above figure suggests, since many are split into several sub-conversations.
209
+
210
+ ### Message length distribution
211
+ Most user messages are below 300 tokens in length.
212
+
213
+ ![Message length distribution](https://files.catbox.moe/yxdgop.png)
214
+
215
+ # Training details
216
+ ## Hardware
217
+ 1x NVidia RTX 3090 24GB
218
+
219
+ ## Software
220
+ [Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl)
221
+
222
+ ## Training hyperparameters
223
+ ```yaml
224
+ base_model: /home/anon/AI-Models/LLM/Mistral-7B-Instruct-v0.2
225
+ load_in_4bit: true
226
+ adapter: qlora
227
+ sequence_len: 16384
228
+ sample_packing: true
229
+ pad_to_sequence_len: false
230
+ gradient_accumulation_steps: 2
231
+ micro_batch_size: 1
232
+ eval_batch_size: 1
233
+ num_epochs: 2
234
+ optimizer: adamw_bnb_8bit
235
+ lr_scheduler: constant
236
+ learning_rate: 0.0000725 -> 0.0000550 -> 0.0000375 -> 0.0000350
237
+ weight_decay: 0.05
238
+ train_on_inputs: true
239
+ bf16: true
240
+ fp16: false
241
+ tf32: true
242
+ lora_r: 20
243
+ lora_alpha: 16
244
+ lora_dropout: 0.1
245
+ lora_target_linear: true
246
+ ```
ShoriRP.v075d.q6_k.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7b04a69ec145eacdaacaba9e5cf0d322d51cfdaab5be39319c45829b3b267df
3
+ size 5942065440
ShoriRP.v077.q6_k.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1159ce0f8421cdd69899d2c72c6bf8c0596ec54ad586d12911ee5825868096a0
3
+ size 5942065408
adapter_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/home/anon/AI-Models/LLM/Mistral-7B-Instruct-v0.2",
5
+ "bias": "none",
6
+ "fan_in_fan_out": null,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 20,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "down_proj",
23
+ "gate_proj",
24
+ "q_proj",
25
+ "k_proj",
26
+ "o_proj",
27
+ "v_proj",
28
+ "up_proj"
29
+ ],
30
+ "task_type": "CAUSAL_LM",
31
+ "use_dora": false,
32
+ "use_rslora": false
33
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf1cae9b58bb020649d3026bf90241e56d936b61a45dee99901ae8cfaad6f713
3
+ size 209876621
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/anon/AI-Models/LLM/Mistral-7B-Instruct-v0.2",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "quantization_config": {
19
+ "_load_in_4bit": true,
20
+ "_load_in_8bit": false,
21
+ "bnb_4bit_compute_dtype": "bfloat16",
22
+ "bnb_4bit_quant_type": "nf4",
23
+ "bnb_4bit_use_double_quant": true,
24
+ "llm_int8_enable_fp32_cpu_offload": false,
25
+ "llm_int8_has_fp16_weight": false,
26
+ "llm_int8_skip_modules": null,
27
+ "llm_int8_threshold": 6.0,
28
+ "load_in_4bit": true,
29
+ "load_in_8bit": false,
30
+ "quant_method": "bitsandbytes"
31
+ },
32
+ "rms_norm_eps": 1e-05,
33
+ "rope_theta": 1000000.0,
34
+ "sliding_window": null,
35
+ "tie_word_embeddings": false,
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.38.2",
38
+ "use_cache": false,
39
+ "vocab_size": 32000
40
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443