kalomaze's picture
Upload rm_v2.yaml with huggingface_hub
27847b3 verified
base_model: /ephemeral/qwen_base/gate_reduction_middle_outward_init
base_model_ignore_patterns: "*/*"
# optionally might have model_type or tokenizer_type
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
trust_remote_code: true
plugins:
- axolotl.integrations.liger.LigerPlugin
liger_rope: true
liger_rms_norm: true
liger_swiglu: true
liger_fused_linear_cross_entropy: false
load_in_8bit: false
load_in_4bit: false
strict: false
auto_resume_from_checkpoints: true
datasets:
- path: /ephemeral/boom/verifiers/quest-tools/data_rm_tools
data_files: "/ephemeral/boom/verifiers/quest-tools/data_rm_tools/sample_heha_20k_axo.jsonl"
ds_type: json
type: completion
use_hub: false
shuffle_merged_datasets: true
dataset_prepared_path: ./prepared_v2-12/
val_set_size: 0.005
output_dir: ./output-20k-both-2/
sequence_len: 6144
sample_packing: true
eval_sample_packing: true
gradient_accumulation_steps: 1
micro_batch_size: 4
num_epochs: 2
optimizer: paged_adamw_8bit
lr_scheduler: cosine
learning_rate: 1e-6
adam_beta1: 0.9
adam_beta2: 0.999
max_grad_norm: 0.001
train_on_inputs: false
group_by_length: false
bf16: auto
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: true
logging_steps: 1
flash_attention: true
warmup_steps: 200
eval_steps: 50
saves_per_epoch: 5
deepspeed: /ephemeral/axolotl/axolotl/deepspeed_configs/zero3_bf16.json
auto_resume_from_checkpoints: false
wandb_project: corruption_model_rm_chunk
wandb_entity:
wandb_watch:
wandb_name: rm-v2-20k-both-2
wandb_log_model: