Qwen3-8B-train-away-lying-lr1e5-temp10-penalize

This model was trained using the Hindsight Oversight pipeline.

Training Configuration

  • Base model: Qwen/Qwen3-8B
  • Training strategy: REINFORCE
  • Detection strategy: penalize
  • Epochs: 1
  • Batch size: 16
  • Learning rate: 1e-05
  • LoRA: Yes
  • LoRA r: 16
  • LoRA alpha: 32
  • LoRA target modules: ['q_proj', 'v_proj', 'k_proj', 'o_proj']

Final Evaluation Metrics

{
  "epoch_0_batch_0": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_1": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_2": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_3": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_4": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_5": {
    "proxy_lying_count": 15,
    "proxy_truth_count": 1,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_6": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_7": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_8": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_9": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_10": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_11": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_12": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_13": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_14": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_15": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_16": {
    "proxy_lying_count": 15,
    "proxy_truth_count": 1,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_17": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_18": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_19": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_20": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_21": {
    "proxy_lying_count": 12,
    "proxy_truth_count": 4,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_22": {
    "proxy_lying_count": 8,
    "proxy_truth_count": 8,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_23": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_24": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_25": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_26": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_27": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 3,
    "gt_truth_count": 13
  },
  "epoch_0_batch_28": {
    "proxy_lying_count": 12,
    "proxy_truth_count": 4,
    "gt_lying_count": 3,
    "gt_truth_count": 13
  },
  "epoch_0_batch_29": {
    "proxy_lying_count": 9,
    "proxy_truth_count": 7,
    "gt_lying_count": 3,
    "gt_truth_count": 13
  },
  "epoch_0_batch_30": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_31": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_32": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_33": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_34": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_35": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_36": {
    "proxy_lying_count": 12,
    "proxy_truth_count": 4,
    "gt_lying_count": 3,
    "gt_truth_count": 13
  },
  "epoch_0_batch_37": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_38": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_39": {
    "proxy_lying_count": 15,
    "proxy_truth_count": 1,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_40": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_41": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_42": {
    "proxy_lying_count": 12,
    "proxy_truth_count": 4,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_43": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_44": {
    "proxy_lying_count": 12,
    "proxy_truth_count": 4,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_45": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_46": {
    "proxy_lying_count": 15,
    "proxy_truth_count": 1,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_47": {
    "proxy_lying_count": 10,
    "proxy_truth_count": 6,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_48": {
    "proxy_lying_count": 13,
    "proxy_truth_count": 3,
    "gt_lying_count": 2,
    "gt_truth_count": 14
  },
  "epoch_0_batch_49": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_50": {
    "proxy_lying_count": 14,
    "proxy_truth_count": 2,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_51": {
    "proxy_lying_count": 12,
    "proxy_truth_count": 4,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_52": {
    "proxy_lying_count": 11,
    "proxy_truth_count": 5,
    "gt_lying_count": 0,
    "gt_truth_count": 16
  },
  "epoch_0_batch_53": {
    "proxy_lying_count": 15,
    "proxy_truth_count": 1,
    "gt_lying_count": 1,
    "gt_truth_count": 15
  },
  "epoch_0_batch_54": {
    "proxy_lying_count": 8,
    "proxy_truth_count": 3,
    "gt_lying_count": 2,
    "gt_truth_count": 9
  }
}
Downloads last month

-

Downloads are not tracked for this model. How to track
Video Preview
loading

Model tree for arianaazarbal/Qwen3-8B-train-away-lying-lr1e5-temp10-penalize

Base model

Qwen/Qwen3-8B-Base
Finetuned
Qwen/Qwen3-8B
Adapter
(83)
this model