Qwen2.5-14B-Brocav2 / mergekit_config.yml
CultriX's picture
Upload folder using huggingface_hub
0203561 verified
merge_method: della_linear
base_model: CultriX/Qwen2.5-14B-Wernickev3
dtype: bfloat16
parameters:
epsilon: 0.03 # Refines sharper parameter scaling.
lambda: 1.1 # Balances blending while emphasizing significant contributions.
normalize: true # Ensures stable parameter integration across models.
adaptive_merge_parameters:
task_weights:
tinyArc: 1.5 # Logical reasoning boost.
tinyHellaswag: 1.3 # Contextual and multi-step reasoning.
tinyMMLU: 1.2 # Domain-specific knowledge retention.
tinyTruthfulQA: 1.6 # Enhanced factual QA tasks.
tinyTruthfulQA_mc1: 1.4
tinyWinogrande: 1.5 # Reasoning for multi-turn tasks.
IFEval: 1.6 # Instruction-following.
BBH: 1.5 # Complex reasoning improvement.
MATH: 1.7 # Mathematical reasoning focus.
GPQA: 1.6 # Graduate-level QA emphasis.
MUSR: 1.6 # Advanced multi-step reasoning.
MMLU-PRO: 1.5 # Multitask domain performance.
smoothing_factor: 0.15 # Balance model contributions.
gradient_clipping: 0.85 # Ensures no single model overly dominates.
models:
- model: CultriX/Qwen2.5-14B-Wernickev3
parameters:
weight: 0.2 # Core multitask foundation.
density: 0.7
- model: CultriX/Qwenfinity-2.5-14B
parameters:
weight: 0.18 # Broad multitask capabilities.
density: 0.65
- model: CultriX/Qwen2.5-14B-Broca
parameters:
weight: 0.15 # Logical reasoning and multitask adaptability.
density: 0.6
- model: djuna/Q2.5-Veltha-14B-0.5
parameters:
weight: 0.15 # Specialized for MUSR, IFEval, and BBH.
density: 0.6
- model: qingy2019/Qwen2.5-Math-14B-Instruct
parameters:
weight: 0.12 # Mathematical reasoning contributor.
density: 0.6
- model: CultriX/SeQwence-14Bv1
parameters:
weight: 0.12 # Broad multitask contributor.
density: 0.6
- model: sometimesanotion/Qwen2.5-14B-Vimarckoso
parameters:
weight: 0.08 # Specialist for MUSR and advanced reasoning tasks.
density: 0.5
tokenizer_source: CultriX/Qwen2.5-14B-Wernickev3