|
default_stage: |
|
default_modifiers: |
|
AWQModifier: |
|
config_groups: |
|
group_0: |
|
targets: [Linear] |
|
weights: |
|
num_bits: 4 |
|
type: int |
|
symmetric: true |
|
group_size: 32 |
|
strategy: group |
|
block_structure: null |
|
dynamic: false |
|
actorder: null |
|
observer: minmax |
|
observer_kwargs: {} |
|
input_activations: null |
|
output_activations: null |
|
targets: [Linear] |
|
ignore: [lm_head, 're:.*mlp.gate$'] |
|
mappings: |
|
- smooth_layer: re:.*input_layernorm$ |
|
balance_layers: ['re:.*q_proj$', 're:.*k_proj$', 're:.*v_proj$'] |
|
- smooth_layer: re:.*v_proj$ |
|
balance_layers: ['re:.*o_proj$'] |
|
- smooth_layer: re:.*post_attention_layernorm$ |
|
balance_layers: ['re:.*mlp\..*gate_proj$', 're:.*mlp\..*up_proj$'] |
|
- smooth_layer: re:.*up_proj$ |
|
balance_layers: ['re:.*down_proj$'] |
|
duo_scaling: true |
|
|