Upload 2 files
Browse files- config.yaml +134 -0
- file.txt +217 -0
config.yaml
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
description: Merging MISCHIEVOUS-12B-Mix models with sliced slerp
|
2 |
+
|
3 |
+
# Metadata and Rationale
|
4 |
+
model_description: |
|
5 |
+
This configuration merges two versions of the MISCHIEVOUS-12B-Mix model: 0.4v and 0.3v.
|
6 |
+
0.3v was further fine-tuned on a specific dataset (ADD DATASET NAME HERE if known).
|
7 |
+
The sliced slerp approach allows for layer-specific control over the merging process.
|
8 |
+
|
9 |
+
base_model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v
|
10 |
+
dtype: bfloat16
|
11 |
+
merge_method: slerp
|
12 |
+
tokenizer_source: union
|
13 |
+
|
14 |
+
# Slices Configuration (Layer-Specific Merging)
|
15 |
+
slices:
|
16 |
+
- sources:
|
17 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v
|
18 |
+
layer_range: [0, 10]
|
19 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.3v
|
20 |
+
layer_range: [0, 10]
|
21 |
+
parameters:
|
22 |
+
t:
|
23 |
+
- name: self_attn
|
24 |
+
value: [0.8, 0.85, 0.9, 0.95, 1.0]
|
25 |
+
- name: mlp
|
26 |
+
value: [0.9, 0.95, 1.0, 1.05, 1.1]
|
27 |
+
- name: layer_norm
|
28 |
+
value: [0.6, 0.65, 0.7, 0.75, 0.8]
|
29 |
+
- name: embed_tokens
|
30 |
+
value: [1.0]
|
31 |
+
|
32 |
+
- sources:
|
33 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v
|
34 |
+
layer_range: [10, 20]
|
35 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.3v
|
36 |
+
layer_range: [10, 20]
|
37 |
+
parameters:
|
38 |
+
t:
|
39 |
+
- name: self_attn
|
40 |
+
value: [0.7, 0.75, 0.8, 0.85, 0.9]
|
41 |
+
- name: mlp
|
42 |
+
value: [1.0, 0.95, 0.9, 0.85, 0.8]
|
43 |
+
- name: layer_norm
|
44 |
+
value: [0.5, 0.55, 0.6, 0.65, 0.7]
|
45 |
+
- name: embed_tokens
|
46 |
+
value: [1.0]
|
47 |
+
|
48 |
+
- sources:
|
49 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v
|
50 |
+
layer_range: [20, 30]
|
51 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.3v
|
52 |
+
layer_range: [20, 30]
|
53 |
+
parameters:
|
54 |
+
t:
|
55 |
+
- name: self_attn
|
56 |
+
value: [0.6, 0.65, 0.7, 0.75, 0.8]
|
57 |
+
- name: mlp
|
58 |
+
value: [0.8, 0.75, 0.7, 0.65, 0.6]
|
59 |
+
- name: layer_norm
|
60 |
+
value: [0.4, 0.45, 0.5, 0.55, 0.6]
|
61 |
+
- name: embed_tokens
|
62 |
+
value: [1.0]
|
63 |
+
|
64 |
+
- sources:
|
65 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.4v
|
66 |
+
layer_range: [30, 40]
|
67 |
+
- model: bamec66557/MISCHIEVOUS-12B-Mix_0.3v
|
68 |
+
layer_range: [30, 40]
|
69 |
+
parameters:
|
70 |
+
t:
|
71 |
+
- name: self_attn
|
72 |
+
value: [0.9, 1.0, 1.1, 1.2, 1.3]
|
73 |
+
- name: mlp
|
74 |
+
value: [0.7, 0.65, 0.6, 0.55, 0.5]
|
75 |
+
- name: layer_norm
|
76 |
+
value: [0.7, 0.75, 0.8, 0.85, 0.9]
|
77 |
+
- name: embed_tokens
|
78 |
+
value: [1.0]
|
79 |
+
|
80 |
+
# Regularization (Prevent Overfitting During Merging)
|
81 |
+
regularization:
|
82 |
+
- method: weight_clipping
|
83 |
+
clip_range: [-0.2, 0.2]
|
84 |
+
- method: random_noise
|
85 |
+
scale: 0.015
|
86 |
+
- method: l2_norm
|
87 |
+
scale: 0.01
|
88 |
+
|
89 |
+
# Postprocessing (Enhance Merged Model Quality)
|
90 |
+
postprocessing:
|
91 |
+
- operation: random_noise
|
92 |
+
scale: 0.0025
|
93 |
+
- operation: non_linear_scaling
|
94 |
+
parameters:
|
95 |
+
function: tanh
|
96 |
+
- operation: sharpening
|
97 |
+
intensity: 0.3
|
98 |
+
- operation: gaussian_smoothing
|
99 |
+
sigma: 1.5
|
100 |
+
- operation: smoothing
|
101 |
+
parameters:
|
102 |
+
adaptive: true
|
103 |
+
range: [0.8, 1.2]
|
104 |
+
kernel_size: 5
|
105 |
+
- operation: normalize
|
106 |
+
- operation: dynamic_scaling
|
107 |
+
scale_range: [0.75, 1.25]
|
108 |
+
|
109 |
+
# Evaluation (Crucial for Assessing Merge Quality)
|
110 |
+
evaluation:
|
111 |
+
metrics:
|
112 |
+
- perplexity
|
113 |
+
- accuracy # If applicable (e.g., classification tasks)
|
114 |
+
- bleu # For translation tasks
|
115 |
+
- rouge # For summarization tasks
|
116 |
+
datasets:
|
117 |
+
- wikitext # General language understanding
|
118 |
+
- lambada # Long-range dependency modeling
|
119 |
+
- (ADD RELEVANT TASK-SPECIFIC DATASETS HERE)
|
120 |
+
prompts: # Example prompts β REPLACE WITH YOUR OWN
|
121 |
+
- "The quick brown fox jumps over the lazy dog."
|
122 |
+
- "Translate 'Thank you' to Spanish:"
|
123 |
+
- "Write a short summary of the French Revolution."
|
124 |
+
|
125 |
+
# Logging and Output
|
126 |
+
logging:
|
127 |
+
output_dir: ./merged_models
|
128 |
+
log_level: INFO
|
129 |
+
|
130 |
+
# Optional: Ties Merging (Advanced Technique)
|
131 |
+
# ties:
|
132 |
+
# enabled: true
|
133 |
+
# method: greedy # Or "optimal", "random"
|
134 |
+
# layers: [0, 10, 20, 30] # Example layers for ties merging
|
file.txt
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2024-12-16 20:48:34] [INFO] Merge configuration saved in /tmp/tmpbiob0b1i/merged/config.yaml
|
2 |
+
[2024-12-16 20:48:34] [INFO] Creating repo MISCHIEVOUS-12B-Mix_0.5v
|
3 |
+
[2024-12-16 20:48:35] [INFO] Repo created: https://huggingface.co/bamec66557/MISCHIEVOUS-12B-Mix_0.5v
|
4 |
+
[2024-12-16 20:48:35] [INFO] Running mergekit-yaml config.yaml merge --copy-tokenizer --cuda --low-cpu-memory --allow-crimes --lora-merge-cache /tmp/tmpbiob0b1i/.lora_cache
|
5 |
+
[2024-12-16 20:48:38] [INFO]
|
6 |
+
[2024-12-16 20:48:38] [INFO]
|
7 |
+
[2024-12-16 20:49:01] [INFO] Warmup loader cache: 0%| | 0/2 [00:00<?, ?it/s][A
|
8 |
+
[2024-12-16 20:49:01] [INFO]
|
9 |
+
[2024-12-16 20:49:32] [INFO] Warmup loader cache: 50%|βββββ | 1/2 [00:22<00:22, 22.91s/it][A
|
10 |
+
[2024-12-16 20:49:32] [INFO]
|
11 |
+
[2024-12-16 20:49:32] [INFO] Warmup loader cache: 100%|ββββββββββ| 2/2 [00:54<00:00, 27.76s/it][A
|
12 |
+
[2024-12-16 20:49:32] [INFO] Warmup loader cache: 100%|ββββββββββ| 2/2 [00:54<00:00, 27.03s/it]
|
13 |
+
[2024-12-16 20:49:36] [INFO]
|
14 |
+
[2024-12-16 20:49:36] [INFO]
|
15 |
+
[2024-12-16 20:49:39] [INFO] Executing graph: 0%| | 0/1820 [00:00<?, ?it/s][A
|
16 |
+
[2024-12-16 20:49:39] [INFO]
|
17 |
+
[2024-12-16 20:49:39] [INFO] Building tokenizer permutations: 0%| | 0/2 [00:00<?, ?it/s][A
|
18 |
+
[2024-12-16 20:49:39] [INFO] Building tokenizer permutations: 100%|ββββββββββ| 2/2 [00:00<00:00, 6.91it/s]
|
19 |
+
[2024-12-16 20:49:39] [INFO]
|
20 |
+
[2024-12-16 20:49:39] [INFO]
|
21 |
+
[2024-12-16 20:49:49] [INFO] Executing graph: 0%| | 2/1820 [00:03<55:11, 1.82s/it][A
|
22 |
+
[2024-12-16 20:49:49] [INFO]
|
23 |
+
[2024-12-16 20:50:00] [INFO] Executing graph: 0%| | 6/1820 [00:13<1:10:05, 2.32s/it][A
|
24 |
+
[2024-12-16 20:50:00] [INFO]
|
25 |
+
[2024-12-16 20:50:10] [INFO] Executing graph: 0%| | 7/1820 [00:24<2:03:15, 4.08s/it][A
|
26 |
+
[2024-12-16 20:50:10] [INFO]
|
27 |
+
[2024-12-16 20:50:20] [INFO] Executing graph: 1%| | 12/1820 [00:34<1:24:27, 2.80s/it][A
|
28 |
+
[2024-12-16 20:50:20] [INFO]
|
29 |
+
[2024-12-16 20:50:21] [INFO] Executing graph: 1%| | 13/1820 [00:44<1:58:33, 3.94s/it][A
|
30 |
+
[2024-12-16 20:50:21] [INFO]
|
31 |
+
[2024-12-16 20:50:23] [INFO] Executing graph: 1%|β | 23/1820 [00:45<40:35, 1.36s/it] [A
|
32 |
+
[2024-12-16 20:50:23] [INFO]
|
33 |
+
[2024-12-16 20:50:24] [INFO] Executing graph: 2%|β | 28/1820 [00:47<29:35, 1.01it/s][A
|
34 |
+
[2024-12-16 20:50:24] [INFO]
|
35 |
+
[2024-12-16 20:50:26] [INFO] Executing graph: 2%|β | 33/1820 [00:48<22:21, 1.33it/s][A
|
36 |
+
[2024-12-16 20:50:26] [INFO]
|
37 |
+
[2024-12-16 20:50:27] [INFO] Executing graph: 4%|β | 68/1820 [00:50<06:29, 4.50it/s][A
|
38 |
+
[2024-12-16 20:50:27] [INFO]
|
39 |
+
[2024-12-16 20:50:28] [INFO] Executing graph: 4%|β | 77/1820 [00:51<05:55, 4.90it/s][A
|
40 |
+
[2024-12-16 20:50:28] [INFO]
|
41 |
+
[2024-12-16 20:50:30] [INFO] Executing graph: 5%|β | 86/1820 [00:52<05:20, 5.40it/s][A
|
42 |
+
[2024-12-16 20:50:30] [INFO]
|
43 |
+
[2024-12-16 20:50:32] [INFO] Executing graph: 6%|β | 113/1820 [00:54<03:39, 7.77it/s][A
|
44 |
+
[2024-12-16 20:50:32] [INFO]
|
45 |
+
[2024-12-16 20:50:34] [INFO] Executing graph: 7%|β | 123/1820 [00:56<04:15, 6.64it/s][A
|
46 |
+
[2024-12-16 20:50:34] [INFO]
|
47 |
+
[2024-12-16 20:50:37] [INFO] Executing graph: 9%|β | 158/1820 [00:58<02:50, 9.75it/s][A
|
48 |
+
[2024-12-16 20:50:37] [INFO]
|
49 |
+
[2024-12-16 20:50:38] [INFO] Executing graph: 9%|β | 169/1820 [01:01<03:23, 8.12it/s][A
|
50 |
+
[2024-12-16 20:50:38] [INFO]
|
51 |
+
[2024-12-16 20:50:49] [INFO] Executing graph: 11%|β | 201/1820 [01:02<02:16, 11.84it/s][A
|
52 |
+
[2024-12-16 20:50:49] [INFO]
|
53 |
+
[2024-12-16 20:50:50] [INFO] Executing graph: 11%|β | 201/1820 [01:13<02:16, 11.84it/s][A
|
54 |
+
[2024-12-16 20:50:50] [INFO]
|
55 |
+
[2024-12-16 20:50:52] [INFO] Executing graph: 12%|ββ | 213/1820 [01:14<06:49, 3.93it/s][A
|
56 |
+
[2024-12-16 20:50:52] [INFO]
|
57 |
+
[2024-12-16 20:50:54] [INFO] Executing graph: 14%|ββ | 248/1820 [01:16<04:22, 5.98it/s][A
|
58 |
+
[2024-12-16 20:50:54] [INFO]
|
59 |
+
[2024-12-16 20:50:56] [INFO] Executing graph: 15%|ββ | 264/1820 [01:18<04:13, 6.15it/s][A
|
60 |
+
[2024-12-16 20:50:56] [INFO]
|
61 |
+
[2024-12-16 20:50:58] [INFO] Executing graph: 16%|ββ | 293/1820 [01:20<03:15, 7.80it/s][A
|
62 |
+
[2024-12-16 20:50:58] [INFO]
|
63 |
+
[2024-12-16 20:51:00] [INFO] Executing graph: 17%|ββ | 307/1820 [01:22<03:25, 7.35it/s][A
|
64 |
+
[2024-12-16 20:51:00] [INFO]
|
65 |
+
[2024-12-16 20:51:03] [INFO] Executing graph: 19%|ββ | 338/1820 [01:24<02:39, 9.27it/s][A
|
66 |
+
[2024-12-16 20:51:03] [INFO]
|
67 |
+
[2024-12-16 20:51:05] [INFO] Executing graph: 19%|ββ | 351/1820 [01:27<02:58, 8.23it/s][A
|
68 |
+
[2024-12-16 20:51:05] [INFO]
|
69 |
+
[2024-12-16 20:51:07] [INFO] Executing graph: 21%|ββ | 383/1820 [01:28<02:20, 10.26it/s][A
|
70 |
+
[2024-12-16 20:51:07] [INFO]
|
71 |
+
[2024-12-16 20:51:09] [INFO] Executing graph: 22%|βββ | 396/1820 [01:31<02:40, 8.86it/s][A
|
72 |
+
[2024-12-16 20:51:09] [INFO]
|
73 |
+
[2024-12-16 20:51:11] [INFO] Executing graph: 24%|βββ | 428/1820 [01:33<02:08, 10.84it/s][A
|
74 |
+
[2024-12-16 20:51:11] [INFO]
|
75 |
+
[2024-12-16 20:51:13] [INFO] Executing graph: 24%|βββ | 441/1820 [01:35<02:29, 9.21it/s][A
|
76 |
+
[2024-12-16 20:51:13] [INFO]
|
77 |
+
[2024-12-16 20:51:15] [INFO] Executing graph: 26%|βββ | 473/1820 [01:37<02:00, 11.15it/s][A
|
78 |
+
[2024-12-16 20:51:15] [INFO]
|
79 |
+
[2024-12-16 20:51:17] [INFO] Executing graph: 27%|βββ | 485/1820 [01:39<02:24, 9.26it/s][A
|
80 |
+
[2024-12-16 20:51:17] [INFO]
|
81 |
+
[2024-12-16 20:51:20] [INFO] Executing graph: 28%|βββ | 518/1820 [01:41<01:54, 11.36it/s][A
|
82 |
+
[2024-12-16 20:51:20] [INFO]
|
83 |
+
[2024-12-16 20:51:22] [INFO] Executing graph: 29%|βββ | 531/1820 [01:44<02:15, 9.52it/s][A
|
84 |
+
[2024-12-16 20:51:22] [INFO]
|
85 |
+
[2024-12-16 20:51:24] [INFO] Executing graph: 31%|βββ | 563/1820 [01:46<01:50, 11.42it/s][A
|
86 |
+
[2024-12-16 20:51:24] [INFO]
|
87 |
+
[2024-12-16 20:51:25] [INFO] Executing graph: 32%|ββββ | 575/1820 [01:48<02:12, 9.43it/s][A
|
88 |
+
[2024-12-16 20:51:25] [INFO]
|
89 |
+
[2024-12-16 20:51:36] [INFO] Executing graph: 33%|ββββ | 606/1820 [01:49<01:34, 12.81it/s][A
|
90 |
+
[2024-12-16 20:51:36] [INFO]
|
91 |
+
[2024-12-16 20:51:38] [INFO] Executing graph: 34%|ββββ | 622/1820 [02:00<04:24, 4.54it/s][A
|
92 |
+
[2024-12-16 20:51:38] [INFO]
|
93 |
+
[2024-12-16 20:51:41] [INFO] Executing graph: 36%|ββββ | 653/1820 [02:02<03:04, 6.34it/s][A
|
94 |
+
[2024-12-16 20:51:41] [INFO]
|
95 |
+
[2024-12-16 20:51:42] [INFO] Executing graph: 37%|ββββ | 670/1820 [02:04<02:55, 6.54it/s][A
|
96 |
+
[2024-12-16 20:51:42] [INFO]
|
97 |
+
[2024-12-16 20:51:45] [INFO] Executing graph: 38%|ββββ | 698/1820 [02:06<02:16, 8.20it/s][A
|
98 |
+
[2024-12-16 20:51:45] [INFO]
|
99 |
+
[2024-12-16 20:51:47] [INFO] Executing graph: 39%|ββββ | 713/1820 [02:09<02:22, 7.74it/s][A
|
100 |
+
[2024-12-16 20:51:47] [INFO]
|
101 |
+
[2024-12-16 20:51:49] [INFO] Executing graph: 41%|ββββ | 743/1820 [02:10<01:51, 9.66it/s][A
|
102 |
+
[2024-12-16 20:51:49] [INFO]
|
103 |
+
[2024-12-16 20:51:51] [INFO] Executing graph: 42%|βββββ | 757/1820 [02:13<02:03, 8.61it/s][A
|
104 |
+
[2024-12-16 20:51:51] [INFO]
|
105 |
+
[2024-12-16 20:51:53] [INFO] Executing graph: 43%|βββββ | 788/1820 [02:15<01:37, 10.57it/s][A
|
106 |
+
[2024-12-16 20:51:53] [INFO]
|
107 |
+
[2024-12-16 20:51:55] [INFO] Executing graph: 44%|βββββ | 801/1820 [02:17<01:52, 9.04it/s][A
|
108 |
+
[2024-12-16 20:51:55] [INFO]
|
109 |
+
[2024-12-16 20:51:57] [INFO] Executing graph: 46%|βββββ | 833/1820 [02:19<01:29, 11.00it/s][A
|
110 |
+
[2024-12-16 20:51:57] [INFO]
|
111 |
+
[2024-12-16 20:51:59] [INFO] Executing graph: 46%|βββββ | 846/1820 [02:21<01:44, 9.31it/s][A
|
112 |
+
[2024-12-16 20:51:59] [INFO]
|
113 |
+
[2024-12-16 20:52:02] [INFO] Executing graph: 48%|βββββ | 878/1820 [02:23<01:23, 11.22it/s][A
|
114 |
+
[2024-12-16 20:52:02] [INFO]
|
115 |
+
[2024-12-16 20:52:04] [INFO] Executing graph: 49%|βββββ | 891/1820 [02:26<01:38, 9.44it/s][A
|
116 |
+
[2024-12-16 20:52:04] [INFO]
|
117 |
+
[2024-12-16 20:52:06] [INFO] Executing graph: 51%|βββββ | 923/1820 [02:28<01:19, 11.32it/s][A
|
118 |
+
[2024-12-16 20:52:06] [INFO]
|
119 |
+
[2024-12-16 20:52:08] [INFO] Executing graph: 51%|ββββββ | 935/1820 [02:30<01:34, 9.36it/s][A
|
120 |
+
[2024-12-16 20:52:08] [INFO]
|
121 |
+
[2024-12-16 20:52:10] [INFO] Executing graph: 53%|ββββββ | 968/1820 [02:32<01:14, 11.41it/s][A
|
122 |
+
[2024-12-16 20:52:10] [INFO]
|
123 |
+
[2024-12-16 20:52:11] [INFO] Executing graph: 54%|ββββββ | 980/1820 [02:34<01:29, 9.41it/s][A
|
124 |
+
[2024-12-16 20:52:11] [INFO]
|
125 |
+
[2024-12-16 20:52:23] [INFO] Executing graph: 56%|ββββββ | 1011/1820 [02:35<01:03, 12.76it/s][A
|
126 |
+
[2024-12-16 20:52:23] [INFO]
|
127 |
+
[2024-12-16 20:52:24] [INFO] Executing graph: 56%|ββββββ | 1027/1820 [02:47<02:54, 4.56it/s][A
|
128 |
+
[2024-12-16 20:52:24] [INFO]
|
129 |
+
[2024-12-16 20:52:27] [INFO] Executing graph: 58%|ββββββ | 1058/1820 [02:48<01:59, 6.36it/s][A
|
130 |
+
[2024-12-16 20:52:27] [INFO]
|
131 |
+
[2024-12-16 20:52:29] [INFO] Executing graph: 59%|ββββββ | 1075/1820 [02:51<01:53, 6.55it/s][A
|
132 |
+
[2024-12-16 20:52:29] [INFO]
|
133 |
+
[2024-12-16 20:52:31] [INFO] Executing graph: 61%|ββββββ | 1103/1820 [02:52<01:26, 8.25it/s][A
|
134 |
+
[2024-12-16 20:52:31] [INFO]
|
135 |
+
[2024-12-16 20:52:33] [INFO] Executing graph: 61%|βββββββ | 1118/1820 [02:55<01:30, 7.77it/s][A
|
136 |
+
[2024-12-16 20:52:33] [INFO]
|
137 |
+
[2024-12-16 20:52:35] [INFO] Executing graph: 63%|βββββββ | 1148/1820 [02:57<01:08, 9.75it/s][A
|
138 |
+
[2024-12-16 20:52:35] [INFO]
|
139 |
+
[2024-12-16 20:52:37] [INFO] Executing graph: 64%|βββββββ | 1162/1820 [02:59<01:15, 8.66it/s][A
|
140 |
+
[2024-12-16 20:52:37] [INFO]
|
141 |
+
[2024-12-16 20:52:39] [INFO] Executing graph: 66%|βββββββ | 1193/1820 [03:01<00:59, 10.54it/s][A
|
142 |
+
[2024-12-16 20:52:39] [INFO]
|
143 |
+
[2024-12-16 20:52:41] [INFO] Executing graph: 66%|βββββββ | 1206/1820 [03:03<01:07, 9.03it/s][A
|
144 |
+
[2024-12-16 20:52:41] [INFO]
|
145 |
+
[2024-12-16 20:52:44] [INFO] Executing graph: 68%|βββββββ | 1238/1820 [03:05<00:53, 10.98it/s][A
|
146 |
+
[2024-12-16 20:52:44] [INFO]
|
147 |
+
[2024-12-16 20:52:46] [INFO] Executing graph: 69%|βββββββ | 1251/1820 [03:08<01:01, 9.28it/s][A
|
148 |
+
[2024-12-16 20:52:46] [INFO]
|
149 |
+
[2024-12-16 20:52:48] [INFO] Executing graph: 70%|βββββββ | 1283/1820 [03:10<00:48, 11.17it/s][A
|
150 |
+
[2024-12-16 20:52:48] [INFO]
|
151 |
+
[2024-12-16 20:52:50] [INFO] Executing graph: 71%|βββββββ | 1296/1820 [03:12<00:55, 9.39it/s][A
|
152 |
+
[2024-12-16 20:52:50] [INFO]
|
153 |
+
[2024-12-16 20:52:52] [INFO] Executing graph: 73%|ββββββββ | 1328/1820 [03:14<00:43, 11.27it/s][A
|
154 |
+
[2024-12-16 20:52:52] [INFO]
|
155 |
+
[2024-12-16 20:52:54] [INFO] Executing graph: 74%|ββββββββ | 1340/1820 [03:16<00:51, 9.32it/s][A
|
156 |
+
[2024-12-16 20:52:54] [INFO]
|
157 |
+
[2024-12-16 20:52:57] [INFO] Executing graph: 75%|ββββββββ | 1373/1820 [03:18<00:39, 11.36it/s][A
|
158 |
+
[2024-12-16 20:52:57] [INFO]
|
159 |
+
[2024-12-16 20:52:58] [INFO] Executing graph: 76%|ββββββββ | 1385/1820 [03:20<00:46, 9.38it/s][A
|
160 |
+
[2024-12-16 20:52:58] [INFO]
|
161 |
+
[2024-12-16 20:53:09] [INFO] Executing graph: 78%|ββββββββ | 1416/1820 [03:22<00:31, 12.73it/s][A
|
162 |
+
[2024-12-16 20:53:09] [INFO]
|
163 |
+
[2024-12-16 20:53:11] [INFO] Executing graph: 79%|ββββββββ | 1432/1820 [03:33<01:26, 4.50it/s][A
|
164 |
+
[2024-12-16 20:53:11] [INFO]
|
165 |
+
[2024-12-16 20:53:13] [INFO] Executing graph: 80%|ββββββββ | 1463/1820 [03:35<00:56, 6.30it/s][A
|
166 |
+
[2024-12-16 20:53:13] [INFO]
|
167 |
+
[2024-12-16 20:53:15] [INFO] Executing graph: 81%|βββββββββ | 1480/1820 [03:37<00:52, 6.49it/s][A
|
168 |
+
[2024-12-16 20:53:15] [INFO]
|
169 |
+
[2024-12-16 20:53:17] [INFO] Executing graph: 83%|βββββββββ | 1508/1820 [03:39<00:38, 8.20it/s][A
|
170 |
+
[2024-12-16 20:53:17] [INFO]
|
171 |
+
[2024-12-16 20:53:19] [INFO] Executing graph: 84%|βββββββββ | 1523/1820 [03:41<00:38, 7.74it/s][A
|
172 |
+
[2024-12-16 20:53:19] [INFO]
|
173 |
+
[2024-12-16 20:53:21] [INFO] Executing graph: 85%|βββββββββ | 1553/1820 [03:43<00:27, 9.73it/s][A
|
174 |
+
[2024-12-16 20:53:21] [INFO]
|
175 |
+
[2024-12-16 20:53:23] [INFO] Executing graph: 86%|βββββββββ | 1567/1820 [03:45<00:29, 8.65it/s][A
|
176 |
+
[2024-12-16 20:53:23] [INFO]
|
177 |
+
[2024-12-16 20:53:26] [INFO] Executing graph: 88%|βββββββββ | 1598/1820 [03:47<00:20, 10.78it/s][A
|
178 |
+
[2024-12-16 20:53:26] [INFO]
|
179 |
+
[2024-12-16 20:53:27] [INFO] Executing graph: 89%|βββββββββ | 1612/1820 [03:49<00:22, 9.28it/s][A
|
180 |
+
[2024-12-16 20:53:27] [INFO]
|
181 |
+
[2024-12-16 20:53:30] [INFO] Executing graph: 90%|βββββββββ | 1643/1820 [03:51<00:15, 11.34it/s][A
|
182 |
+
[2024-12-16 20:53:30] [INFO]
|
183 |
+
[2024-12-16 20:53:31] [INFO] Executing graph: 91%|βββββββββ | 1656/1820 [03:54<00:17, 9.48it/s][A
|
184 |
+
[2024-12-16 20:53:31] [INFO]
|
185 |
+
[2024-12-16 20:53:34] [INFO] Executing graph: 93%|ββββββββββ| 1688/1820 [03:55<00:11, 11.69it/s][A
|
186 |
+
[2024-12-16 20:53:34] [INFO]
|
187 |
+
[2024-12-16 20:53:36] [INFO] Executing graph: 93%|ββββββββββ| 1701/1820 [03:58<00:12, 9.69it/s][A
|
188 |
+
[2024-12-16 20:53:36] [INFO]
|
189 |
+
[2024-12-16 20:53:38] [INFO] Executing graph: 95%|ββββββββββ| 1733/1820 [03:59<00:07, 11.85it/s][A
|
190 |
+
[2024-12-16 20:53:38] [INFO]
|
191 |
+
[2024-12-16 20:53:40] [INFO] Executing graph: 96%|ββββββββββ| 1746/1820 [04:02<00:07, 9.78it/s][A
|
192 |
+
[2024-12-16 20:53:40] [INFO]
|
193 |
+
[2024-12-16 20:53:42] [INFO] Executing graph: 98%|ββββββββββ| 1778/1820 [04:04<00:03, 11.94it/s][A
|
194 |
+
[2024-12-16 20:53:42] [INFO]
|
195 |
+
[2024-12-16 20:53:50] [INFO] Executing graph: 98%|ββββββββββ| 1791/1820 [04:06<00:02, 9.83it/s][A
|
196 |
+
[2024-12-16 20:53:50] [INFO]
|
197 |
+
[2024-12-16 20:53:50] [INFO] Executing graph: 100%|ββββββββββ| 1820/1820 [04:14<00:00, 5.87it/s][A
|
198 |
+
[2024-12-16 20:53:50] [INFO] Executing graph: 100%|ββββββββββ| 1820/1820 [04:14<00:00, 7.14it/s]
|
199 |
+
[2024-12-16 20:53:51] [INFO] Command exited successfully
|
200 |
+
[2024-12-16 20:53:51] [INFO] Model merged successfully. Uploading to HF.
|
201 |
+
[2024-12-16 20:53:51] [INFO] Running upload_folder(repo_id=bamec66557/MISCHIEVOUS-12B-Mix_0.5v, folder_path=/tmp/tmpbiob0b1i/merged/merge)
|
202 |
+
[2024-12-16 20:54:48] [INFO]
|
203 |
+
[2024-12-16 20:54:48] [INFO]
|
204 |
+
[2024-12-16 20:55:15] [INFO] 0%| | 0/6 [00:00<?, ?it/s][A
|
205 |
+
[2024-12-16 20:55:15] [INFO]
|
206 |
+
[2024-12-16 20:55:34] [INFO] 17%|#6 | 1/6 [00:20<01:40, 20.19s/it][A
|
207 |
+
[2024-12-16 20:55:34] [INFO]
|
208 |
+
[2024-12-16 20:55:55] [INFO] 33%|###3 | 2/6 [00:41<01:22, 20.59s/it][A
|
209 |
+
[2024-12-16 20:55:55] [INFO]
|
210 |
+
[2024-12-16 20:56:17] [INFO] 50%|##### | 3/6 [01:01<01:01, 20.62s/it][A
|
211 |
+
[2024-12-16 20:56:17] [INFO]
|
212 |
+
[2024-12-16 20:56:32] [INFO] 67%|######6 | 4/6 [01:22<00:41, 20.74s/it][A
|
213 |
+
[2024-12-16 20:56:32] [INFO]
|
214 |
+
[2024-12-16 20:56:32] [INFO] 83%|########3 | 5/6 [01:43<00:20, 20.96s/it][A
|
215 |
+
[2024-12-16 20:56:32] [INFO] 100%|##########| 6/6 [01:44<00:00, 17.39s/it]
|
216 |
+
[2024-12-16 20:56:33] [INFO] Process completed successfully
|
217 |
+
[2024-12-16 20:56:33] [INFO] Model successfully uploaded to HF: bamec66557/MISCHIEVOUS-12B-Mix_0.5v
|