Update README.md
Browse files
README.md
CHANGED
@@ -6,14 +6,15 @@ library_name: transformers
|
|
6 |
tags:
|
7 |
- mergekit
|
8 |
- merge
|
9 |
-
- not-for-all-audiences
|
10 |
- text-generation-inference
|
11 |
-
|
|
|
12 |
language:
|
13 |
- en
|
|
|
14 |
---
|
15 |
|
16 |
-
|
17 |
|
18 |
|
19 |
# [GGUF]
|
@@ -50,30 +51,30 @@ slices:
|
|
50 |
parameters:
|
51 |
t:
|
52 |
- filter: self_attn
|
53 |
-
value: [0.1, 0.3, 0.7, 0.9, 1.0] #
|
54 |
- filter: mlp
|
55 |
-
value: [1.0, 0.7, 0.4, 0.1, 0.0] #
|
56 |
- filter: layer_norm
|
57 |
-
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, #
|
58 |
-
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, #
|
59 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
|
60 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
|
61 |
-
- value: 0.9 #
|
62 |
|
63 |
merge_method: slerp # maintain slerp
|
64 |
|
65 |
-
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v #
|
66 |
|
67 |
-
dtype: bfloat16 #
|
68 |
|
69 |
-
#
|
70 |
regularization:
|
71 |
-
- method: l2_norm #
|
72 |
-
scale: 0.005 #
|
73 |
|
74 |
postprocessing:
|
75 |
-
- operation: smoothing #
|
76 |
-
kernel_size: 5 #
|
77 |
-
- operation: normalize #
|
78 |
|
79 |
```
|
|
|
6 |
tags:
|
7 |
- mergekit
|
8 |
- merge
|
|
|
9 |
- text-generation-inference
|
10 |
+
- not-for-all-audiences
|
11 |
+
license: apache-2.0
|
12 |
language:
|
13 |
- en
|
14 |
+
- ko
|
15 |
---
|
16 |
|
17 |
+
<a href="#" target="_blank"><img src="https://huggingface.co/bamec66557/MISCHIEVOUS-12B/resolve/main/00001-321918068.gif"></a>
|
18 |
|
19 |
|
20 |
# [GGUF]
|
|
|
51 |
parameters:
|
52 |
t:
|
53 |
- filter: self_attn
|
54 |
+
value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change
|
55 |
- filter: mlp
|
56 |
+
value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline
|
57 |
- filter: layer_norm
|
58 |
+
value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers
|
59 |
+
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers
|
60 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
|
61 |
0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
|
62 |
+
- value: 0.9 # Set the default merge ratio to high
|
63 |
|
64 |
merge_method: slerp # maintain slerp
|
65 |
|
66 |
+
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # Base model
|
67 |
|
68 |
+
dtype: bfloat16 # Data types for fast merges
|
69 |
|
70 |
+
# Additional options
|
71 |
regularization:
|
72 |
+
- method: l2_norm # Stabilise after merging with L2 normalisation
|
73 |
+
scale: 0.005 # Reduce normalisation strength to allow for variation
|
74 |
|
75 |
postprocessing:
|
76 |
+
- operation: smoothing # Smoothing weights after merging
|
77 |
+
kernel_size: 5 # Smoothing larger ranges with increased kernel size
|
78 |
+
- operation: normalize # Normalise after merge
|
79 |
|
80 |
```
|