bamec66557 commited on
Commit
141bfb2
ยท
verified ยท
1 Parent(s): be34b9a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -16
README.md CHANGED
@@ -6,14 +6,15 @@ library_name: transformers
6
  tags:
7
  - mergekit
8
  - merge
9
- - not-for-all-audiences
10
  - text-generation-inference
11
- pipeline_tag: text-generation
 
12
  language:
13
  - en
 
14
  ---
15
 
16
- ![00001-321918068.gif](https://huggingface.co/bamec66557/MISCHIEVOUS-12B/resolve/main/00001-321918068.gif)
17
 
18
 
19
  # [GGUF]
@@ -50,30 +51,30 @@ slices:
50
  parameters:
51
  t:
52
  - filter: self_attn
53
- value: [0.1, 0.3, 0.7, 0.9, 1.0] # ๊ทน์ ์ธ ๋ณ€ํ™”๋ฅผ ์œ„ํ•œ ๊ธ‰๊ฒฉํ•œ ์ฆ๊ฐ€
54
  - filter: mlp
55
- value: [1.0, 0.7, 0.4, 0.1, 0.0] # ๋ฐ˜๋Œ€๋กœ ๊ธ‰๊ฒฉํžˆ ๊ฐ์†Œ
56
  - filter: layer_norm
57
- value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # ์ฒซ 10๊ฐœ ๋ ˆ์ด์–ด
58
- 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # ๋‚˜๋จธ์ง€ 30๊ฐœ ๋ ˆ์ด์–ด
59
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
60
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
61
- - value: 0.9 # ๊ธฐ๋ณธ ๋ณ‘ํ•ฉ ๋น„์œจ์„ ๋†’๊ฒŒ ์„ค์ •
62
 
63
  merge_method: slerp # maintain slerp
64
 
65
- base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # ๊ธฐ๋ณธ ๋ชจ๋ธ
66
 
67
- dtype: bfloat16 # ๋น ๋ฅธ ๋ณ‘ํ•ฉ์„ ์œ„ํ•œ ๋ฐ์ดํ„ฐ ํƒ€์ž…
68
 
69
- # ์ถ”๊ฐ€ ์˜ต์…˜
70
  regularization:
71
- - method: l2_norm # L2 ์ •๊ทœํ™”๋กœ ๋ณ‘ํ•ฉ ํ›„ ์•ˆ์ •ํ™”
72
- scale: 0.005 # ์ •๊ทœํ™” ๊ฐ•๋„๋ฅผ ๋‚ฎ์ถฐ ๋ณ€ํ™” ํ—ˆ์šฉ
73
 
74
  postprocessing:
75
- - operation: smoothing # ๋ณ‘ํ•ฉ ํ›„ ๊ฐ€์ค‘์น˜ ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ์กฐ์ •
76
- kernel_size: 5 # ์ปค๋„ ํฌ๊ธฐ ์ฆ๊ฐ€๋กœ ๋” ๋„“์€ ๋ฒ”์œ„ ์Šค๋ฌด๋”ฉ
77
- - operation: normalize # ๋ณ‘ํ•ฉ ํ›„ ์ •๊ทœํ™”
78
 
79
  ```
 
6
  tags:
7
  - mergekit
8
  - merge
 
9
  - text-generation-inference
10
+ - not-for-all-audiences
11
+ license: apache-2.0
12
  language:
13
  - en
14
+ - ko
15
  ---
16
 
17
+ <a href="#" target="_blank"><img src="https://huggingface.co/bamec66557/MISCHIEVOUS-12B/resolve/main/00001-321918068.gif"></a>
18
 
19
 
20
  # [GGUF]
 
51
  parameters:
52
  t:
53
  - filter: self_attn
54
+ value: [0.1, 0.3, 0.7, 0.9, 1.0] # Spikes for dramatic change
55
  - filter: mlp
56
+ value: [1.0, 0.7, 0.4, 0.1, 0.0] # Conversely, a sharp decline
57
  - filter: layer_norm
58
+ value: [0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, 0.7, # First 10 layers
59
+ 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, # The remaining 30 layers
60
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
61
  0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3]
62
+ - value: 0.9 # Set the default merge ratio to high
63
 
64
  merge_method: slerp # maintain slerp
65
 
66
+ base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.2v # Base model
67
 
68
+ dtype: bfloat16 # Data types for fast merges
69
 
70
+ # Additional options
71
  regularization:
72
+ - method: l2_norm # Stabilise after merging with L2 normalisation
73
+ scale: 0.005 # Reduce normalisation strength to allow for variation
74
 
75
  postprocessing:
76
+ - operation: smoothing # Smoothing weights after merging
77
+ kernel_size: 5 # Smoothing larger ranges with increased kernel size
78
+ - operation: normalize # Normalise after merge
79
 
80
  ```