Qwen3-8B-train-away-lying-lr1e5-temp10-penalize
This model was trained using the Hindsight Oversight pipeline.
Training Configuration
- Base model: Qwen/Qwen3-8B
- Training strategy: REINFORCE
- Detection strategy: penalize
- Epochs: 1
- Batch size: 16
- Learning rate: 1e-05
- LoRA: Yes
- LoRA r: 16
- LoRA alpha: 32
- LoRA target modules: ['q_proj', 'v_proj', 'k_proj', 'o_proj']
Final Evaluation Metrics
{
"epoch_0_batch_0": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_1": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_2": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_3": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_4": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_5": {
"proxy_lying_count": 15,
"proxy_truth_count": 1,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_6": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_7": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_8": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_9": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_10": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_11": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_12": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_13": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_14": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_15": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_16": {
"proxy_lying_count": 15,
"proxy_truth_count": 1,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_17": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_18": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_19": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_20": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_21": {
"proxy_lying_count": 12,
"proxy_truth_count": 4,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_22": {
"proxy_lying_count": 8,
"proxy_truth_count": 8,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_23": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_24": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_25": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_26": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_27": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 3,
"gt_truth_count": 13
},
"epoch_0_batch_28": {
"proxy_lying_count": 12,
"proxy_truth_count": 4,
"gt_lying_count": 3,
"gt_truth_count": 13
},
"epoch_0_batch_29": {
"proxy_lying_count": 9,
"proxy_truth_count": 7,
"gt_lying_count": 3,
"gt_truth_count": 13
},
"epoch_0_batch_30": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_31": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_32": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_33": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_34": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_35": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_36": {
"proxy_lying_count": 12,
"proxy_truth_count": 4,
"gt_lying_count": 3,
"gt_truth_count": 13
},
"epoch_0_batch_37": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_38": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_39": {
"proxy_lying_count": 15,
"proxy_truth_count": 1,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_40": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_41": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_42": {
"proxy_lying_count": 12,
"proxy_truth_count": 4,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_43": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_44": {
"proxy_lying_count": 12,
"proxy_truth_count": 4,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_45": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_46": {
"proxy_lying_count": 15,
"proxy_truth_count": 1,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_47": {
"proxy_lying_count": 10,
"proxy_truth_count": 6,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_48": {
"proxy_lying_count": 13,
"proxy_truth_count": 3,
"gt_lying_count": 2,
"gt_truth_count": 14
},
"epoch_0_batch_49": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_50": {
"proxy_lying_count": 14,
"proxy_truth_count": 2,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_51": {
"proxy_lying_count": 12,
"proxy_truth_count": 4,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_52": {
"proxy_lying_count": 11,
"proxy_truth_count": 5,
"gt_lying_count": 0,
"gt_truth_count": 16
},
"epoch_0_batch_53": {
"proxy_lying_count": 15,
"proxy_truth_count": 1,
"gt_lying_count": 1,
"gt_truth_count": 15
},
"epoch_0_batch_54": {
"proxy_lying_count": 8,
"proxy_truth_count": 3,
"gt_lying_count": 2,
"gt_truth_count": 9
}
}