rfm_prefprog_v3 / config.yaml
aliangdw's picture
Upload RFM model
0bdfddd verified
data:
dataloader_num_workers: 0
dataloader_pin_memory: false
dataset_preference_ratio: 0.7
dataset_type: default
eval_datasets:
- abraranwar/libero_rfm
- ykorkmaz/libero_failure_rfm
- HenryZhang/metaworld_rewind_rfm_eval
eval_subset_size: 500
eval_subsets:
- - libero256_10
- - libero_10_failure
- - metaworld_rewind_eval
force_reprocess: false
fps: 10
max_frames: 16
max_frames_for_preprocessing: 64
max_trajectories: -1
model_type: default
n_wrong_tasks: 5
num_bins: 10
num_proc: 1
preference_ratio: 1.0
preference_strategy_ratio:
- 0.4
- 0.3
- 0.3
- 0.0
progress_ratio: 0.5
resized_height: 128
resized_width: 128
rewind_lengths: null
samples_per_trajectory: 1
seed: 42
shuffle: true
train_datasets:
- abraranwar/libero_rfm
- ykorkmaz/libero_failure_rfm
- HenryZhang/metaworld_rewind_rfm_train
train_subsets:
- - libero256_90
- - libero_90_failure
- - metaworld_rewind_train
video_frame_sampling: uniform
debug: false
logging:
print_trainable_parameters: true
save_model: true
save_processor: true
use_wandb: true
wandb_entity: clvr
wandb_project: rfm
wandb_run_name: rfm
mode: train
model:
base_model_id: Qwen/Qwen2.5-VL-3B-Instruct
torch_dtype: bfloat16
train_language_model: false
train_preference_head: true
train_progress_head: true
train_similarity_head: false
train_value_head: true
train_vision_encoder: true
trust_remote_code: true
peft:
bias: none
lora_alpha: 64
lora_dropout: 0.05
peft_vision_encoder: true
r: 32
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
use_peft: false
training:
beta: 0.1
bf16: true
ddp_bucket_cap_mb: 25
ddp_find_unused_parameters: true
do_eval: true
eval_steps: 100
evaluation_strategy: steps
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 2.0e-05
logging_steps: 1
lr_scheduler_type: cosine
max_grad_norm: 10.0
max_seq_length: 1024
max_steps: 5000
num_gpus: 2
num_train_epochs: -1
output_dir: ./logs/rfm_progpref_peft_vision
per_device_eval_batch_size: 8
per_device_train_batch_size: 8
prediction_loss_only: true
remove_unused_columns: false
resume_from_checkpoint: null
save_steps: 200
save_strategy: steps
warmup_ratio: 0.1
warmup_steps: 0