--- base_model: - Undi95/PsyMedRP-v1-20B - Undi95/MXLewd-L2-20B library_name: transformers tags: - mergekit - merge --- # More buffer for the start and end layers (20 non duplicated as apposed to 10) # In testing...... This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). ## Merge Details ### Merge Method This model was merged using the passthrough merge method. ### Models Merged The following models were included in the merge: * [Undi95/PsyMedRP-v1-20B](https://huggingface.co/Undi95/PsyMedRP-v1-20B) * [Undi95/MXLewd-L2-20B](https://huggingface.co/Undi95/MXLewd-L2-20B) ### Configuration The following YAML configuration was used to produce this model: ```yaml #--DUAL MODEL MERGE SETUP--- # More buffer for the start and end layers (20 non duplicated as apposed to 10) # Merging two faves Undi95/PsyMedRP-v1-20B and Undi95/PsyMedRP-v1-20B # The models we are going to use. const_tag: &BASE_MODEL Undi95/PsyMedRP-v1-20B const_tag: &MODEL1 Undi95/MXLewd-L2-20B const_tag: &MODEL2 Undi95/PsyMedRP-v1-20B # The amount to scale the contribution to the residual stream (to hopefully reduce overshoot). const_tag: &RESIDUAL_SCALE_FACTOR 0.71 # back to 0.7 model1-filter-env: &MODEL1_FILTER_ENV parameters: scale: - filter: down_proj value: *RESIDUAL_SCALE_FACTOR - value: 1.0 model2-filter-env: &MODEL2_FILTER_ENV parameters: scale: - filter: down_proj value: *RESIDUAL_SCALE_FACTOR - value: 1.0 slices: # The first 20 layers are not duplicated. - sources: - model: *BASE_MODEL layer_range: [0, 20] - sources: - model: *MODEL1 layer_range: [20, 21] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [20, 21] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [21, 22] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [21, 22] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [22, 23] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [22, 23] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [23, 24] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [23, 24] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [24, 25] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [24, 25] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [25, 26] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [25, 26] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [26, 27] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [26, 27] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [27, 28] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [27, 28] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [28, 29] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [28, 29] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [29, 30] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [29, 30] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [30, 31] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [30, 31] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [31, 32] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [31, 32] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [32, 33] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [32, 33] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [33, 34] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [33, 34] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [34, 35] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [34, 35] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [35, 36] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [35, 36] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [36, 37] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [36, 37] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [37, 38] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [37, 38] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [38, 39] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [38, 39] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [39, 40] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [39, 40] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [40, 41] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [40, 41] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [41, 42] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [41, 42] <<: *MODEL2_FILTER_ENV - sources: - model: *MODEL1 layer_range: [42, 43] <<: *MODEL1_FILTER_ENV - sources: - model: *MODEL2 layer_range: [42, 43] <<: *MODEL2_FILTER_ENV # The last 20 layers are not duplicated. - sources: - model: *BASE_MODEL layer_range: [42, 62] merge_method: passthrough dtype: bfloat16 ```