|
data: |
|
dataloader_num_workers: 0 |
|
dataloader_pin_memory: false |
|
dataset_preference_ratio: 0.7 |
|
dataset_type: default |
|
eval_datasets: |
|
- abraranwar/libero_rfm |
|
- ykorkmaz/libero_failure_rfm |
|
- HenryZhang/metaworld_rewind_rfm_eval |
|
eval_subset_size: 500 |
|
eval_subsets: |
|
- - libero256_10 |
|
- - libero_10_failure |
|
- - metaworld_rewind_eval |
|
force_reprocess: false |
|
fps: 10 |
|
max_frames: 16 |
|
max_frames_for_preprocessing: 64 |
|
max_trajectories: -1 |
|
model_type: default |
|
n_wrong_tasks: 5 |
|
num_bins: 10 |
|
num_proc: 1 |
|
preference_ratio: 1.0 |
|
preference_strategy_ratio: |
|
- 0.4 |
|
- 0.3 |
|
- 0.3 |
|
- 0.0 |
|
progress_ratio: 0.5 |
|
resized_height: 128 |
|
resized_width: 128 |
|
rewind_lengths: null |
|
samples_per_trajectory: 1 |
|
seed: 42 |
|
shuffle: true |
|
train_datasets: |
|
- abraranwar/libero_rfm |
|
- ykorkmaz/libero_failure_rfm |
|
- HenryZhang/metaworld_rewind_rfm_train |
|
train_subsets: |
|
- - libero256_90 |
|
- - libero_90_failure |
|
- - metaworld_rewind_train |
|
video_frame_sampling: uniform |
|
debug: false |
|
logging: |
|
print_trainable_parameters: true |
|
save_model: true |
|
save_processor: true |
|
use_wandb: true |
|
wandb_entity: clvr |
|
wandb_project: rfm |
|
wandb_run_name: rfm |
|
mode: train |
|
model: |
|
base_model_id: Qwen/Qwen2.5-VL-3B-Instruct |
|
torch_dtype: bfloat16 |
|
train_language_model: false |
|
train_preference_head: true |
|
train_progress_head: true |
|
train_similarity_head: false |
|
train_value_head: true |
|
train_vision_encoder: true |
|
trust_remote_code: true |
|
peft: |
|
bias: none |
|
lora_alpha: 64 |
|
lora_dropout: 0.05 |
|
peft_vision_encoder: true |
|
r: 32 |
|
target_modules: |
|
- q_proj |
|
- k_proj |
|
- v_proj |
|
- o_proj |
|
- gate_proj |
|
- up_proj |
|
- down_proj |
|
use_peft: false |
|
training: |
|
beta: 0.1 |
|
bf16: true |
|
ddp_bucket_cap_mb: 25 |
|
ddp_find_unused_parameters: true |
|
do_eval: true |
|
eval_steps: 100 |
|
evaluation_strategy: steps |
|
fp16: false |
|
gradient_accumulation_steps: 1 |
|
gradient_checkpointing: true |
|
learning_rate: 2.0e-05 |
|
logging_steps: 1 |
|
lr_scheduler_type: cosine |
|
max_grad_norm: 10.0 |
|
max_seq_length: 1024 |
|
max_steps: 5000 |
|
num_gpus: 2 |
|
num_train_epochs: -1 |
|
output_dir: ./logs/rfm_progpref_peft_vision |
|
per_device_eval_batch_size: 8 |
|
per_device_train_batch_size: 8 |
|
prediction_loss_only: true |
|
remove_unused_columns: false |
|
resume_from_checkpoint: null |
|
save_steps: 200 |
|
save_strategy: steps |
|
warmup_ratio: 0.1 |
|
warmup_steps: 0 |
|
|