Upload folder using huggingface_hub
Browse files- README.md +3 -16
- modeling_wisent_qwen.py +6 -8
README.md
CHANGED
|
@@ -11,23 +11,10 @@ tags:
|
|
| 11 |
- wisent
|
| 12 |
library_name: transformers
|
| 13 |
datasets:
|
| 14 |
-
-
|
| 15 |
metrics:
|
| 16 |
- pass@1
|
| 17 |
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 18 |
-
model-index:
|
| 19 |
-
- name: wisent-ai/qwen2.5-coder-7b-wisent-caa
|
| 20 |
-
results:
|
| 21 |
-
- task:
|
| 22 |
-
type: code-generation
|
| 23 |
-
name: Code Generation
|
| 24 |
-
dataset:
|
| 25 |
-
type: mbpp
|
| 26 |
-
name: MBPP Plus
|
| 27 |
-
metrics:
|
| 28 |
-
- type: pass@1
|
| 29 |
-
value: 0.67
|
| 30 |
-
name: Pass@1
|
| 31 |
---
|
| 32 |
|
| 33 |
# Wisent-Qwen2.5-Coder-7B-Instruct with CAA Steering
|
|
@@ -164,7 +151,7 @@ The CAA parameters were optimized using:
|
|
| 164 |
- **Framework**: Optuna with TPE sampler
|
| 165 |
- **Search Space**: Layers 15-28, α ∈ [0.1, 5.0]
|
| 166 |
- **Objective**: Maximize accuracy on MBPP Plus validation set
|
| 167 |
-
- **
|
| 168 |
|
| 169 |
## Model Architecture
|
| 170 |
|
|
@@ -193,7 +180,7 @@ huggingface_qwen25-7b-coder-caa/
|
|
| 193 |
|
| 194 |
### MBPP Plus Benchmark
|
| 195 |
|
| 196 |
-
The model should be
|
| 197 |
|
| 198 |
### Running Evaluation
|
| 199 |
|
|
|
|
| 11 |
- wisent
|
| 12 |
library_name: transformers
|
| 13 |
datasets:
|
| 14 |
+
- evalplus/mbppplus
|
| 15 |
metrics:
|
| 16 |
- pass@1
|
| 17 |
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
---
|
| 19 |
|
| 20 |
# Wisent-Qwen2.5-Coder-7B-Instruct with CAA Steering
|
|
|
|
| 151 |
- **Framework**: Optuna with TPE sampler
|
| 152 |
- **Search Space**: Layers 15-28, α ∈ [0.1, 5.0]
|
| 153 |
- **Objective**: Maximize accuracy on MBPP Plus validation set
|
| 154 |
+
- **Validation Results**: Optimized for improved performance on MBPP Plus tasks
|
| 155 |
|
| 156 |
## Model Architecture
|
| 157 |
|
|
|
|
| 180 |
|
| 181 |
### MBPP Plus Benchmark
|
| 182 |
|
| 183 |
+
The model has been optimized using Optuna on MBPP Plus tasks. For reliable performance metrics, evaluation should be conducted on the complete MBPP Plus dataset (378 problems) using the [evalplus/mbppplus](https://huggingface.co/datasets/evalplus/mbppplus) dataset.
|
| 184 |
|
| 185 |
### Running Evaluation
|
| 186 |
|
modeling_wisent_qwen.py
CHANGED
|
@@ -5,12 +5,11 @@ This model automatically applies CAA steering during generation without requirin
|
|
| 5 |
The steering parameters are optimized using Optuna and stored in the model configuration.
|
| 6 |
"""
|
| 7 |
|
| 8 |
-
from typing import Optional, Tuple, Union
|
|
|
|
| 9 |
import torch
|
| 10 |
-
import
|
| 11 |
-
from transformers import Qwen2ForCausalLM, Qwen2Config
|
| 12 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
| 13 |
-
from transformers.cache_utils import Cache
|
| 14 |
|
| 15 |
|
| 16 |
class WisentQwen2Config(Qwen2Config):
|
|
@@ -150,8 +149,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
|
|
| 150 |
# Return modified output
|
| 151 |
if isinstance(output, tuple):
|
| 152 |
return (hidden_states,) + output[1:]
|
| 153 |
-
|
| 154 |
-
return hidden_states
|
| 155 |
|
| 156 |
def forward(
|
| 157 |
self,
|
|
@@ -254,7 +252,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
|
|
| 254 |
|
| 255 |
if not has_weights and local_path.exists() and (local_path / "config.json").exists():
|
| 256 |
# We have config but no weights - load from base model
|
| 257 |
-
print(
|
| 258 |
|
| 259 |
# First, load config from local path
|
| 260 |
from transformers import AutoConfig
|
|
@@ -301,7 +299,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
|
|
| 301 |
|
| 302 |
|
| 303 |
# Register the model
|
| 304 |
-
from transformers import
|
| 305 |
|
| 306 |
AutoConfig.register("wisent_qwen2", WisentQwen2Config)
|
| 307 |
AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)
|
|
|
|
| 5 |
The steering parameters are optimized using Optuna and stored in the model configuration.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
from typing import List, Optional, Tuple, Union
|
| 9 |
+
|
| 10 |
import torch
|
| 11 |
+
from transformers import Qwen2Config, Qwen2ForCausalLM
|
|
|
|
| 12 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
class WisentQwen2Config(Qwen2Config):
|
|
|
|
| 149 |
# Return modified output
|
| 150 |
if isinstance(output, tuple):
|
| 151 |
return (hidden_states,) + output[1:]
|
| 152 |
+
return hidden_states
|
|
|
|
| 153 |
|
| 154 |
def forward(
|
| 155 |
self,
|
|
|
|
| 252 |
|
| 253 |
if not has_weights and local_path.exists() and (local_path / "config.json").exists():
|
| 254 |
# We have config but no weights - load from base model
|
| 255 |
+
print("Loading weights from base model: Qwen/Qwen2.5-Coder-7B-Instruct")
|
| 256 |
|
| 257 |
# First, load config from local path
|
| 258 |
from transformers import AutoConfig
|
|
|
|
| 299 |
|
| 300 |
|
| 301 |
# Register the model
|
| 302 |
+
from transformers import AutoConfig, AutoModelForCausalLM
|
| 303 |
|
| 304 |
AutoConfig.register("wisent_qwen2", WisentQwen2Config)
|
| 305 |
AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)
|