aliangdw commited on
Commit
cc35407
·
verified ·
1 Parent(s): 411bf05

Upload RFM model

Browse files
Files changed (1) hide show
  1. config.yaml +101 -0
config.yaml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ dataloader_num_workers: 0
3
+ dataloader_pin_memory: false
4
+ dataset_preference_ratio: 0.7
5
+ dataset_type: default
6
+ eval_datasets:
7
+ - abraranwar/libero_rfm
8
+ - ykorkmaz/libero_failure_rfm
9
+ eval_subset_size: 500
10
+ eval_subsets:
11
+ - - libero256_10
12
+ - - libero_10_failure
13
+ force_reprocess: false
14
+ fps: 10
15
+ max_frames: 16
16
+ max_frames_for_preprocessing: 64
17
+ model_type: default
18
+ num_bins: 3
19
+ num_proc: 1
20
+ preference_ratio: 1.0
21
+ preference_strategy_ratio:
22
+ - 0.8
23
+ - 0.1
24
+ - 0.1
25
+ - 0.0
26
+ progress_ratio: 0.5
27
+ resized_height: 128
28
+ resized_width: 128
29
+ rewind_lengths: null
30
+ samples_per_trajectory: 1
31
+ seed: 42
32
+ shuffle: true
33
+ train_datasets:
34
+ - abraranwar/libero_rfm
35
+ - ykorkmaz/libero_failure_rfm
36
+ train_subsets:
37
+ - - libero256_90
38
+ - - libero_90_failure
39
+ video_frame_sampling: uniform
40
+ debug: false
41
+ logging:
42
+ print_trainable_parameters: true
43
+ save_model: true
44
+ save_processor: true
45
+ use_wandb: true
46
+ wandb_entity: clvr
47
+ wandb_project: rfm
48
+ wandb_run_name: rfm
49
+ mode: train
50
+ model:
51
+ base_model_id: Qwen/Qwen2.5-VL-3B-Instruct
52
+ torch_dtype: bfloat16
53
+ train_language_model: false
54
+ train_preference_head: true
55
+ train_progress_head: true
56
+ train_similarity_head: false
57
+ train_value_head: true
58
+ train_vision_encoder: true
59
+ trust_remote_code: true
60
+ peft:
61
+ bias: none
62
+ lora_alpha: 64
63
+ lora_dropout: 0.05
64
+ r: 32
65
+ target_modules:
66
+ - q_proj
67
+ - k_proj
68
+ - v_proj
69
+ - o_proj
70
+ - gate_proj
71
+ - up_proj
72
+ - down_proj
73
+ use_peft: false
74
+ training:
75
+ beta: 0.1
76
+ bf16: true
77
+ ddp_bucket_cap_mb: 25
78
+ ddp_find_unused_parameters: true
79
+ do_eval: true
80
+ eval_steps: 50
81
+ evaluation_strategy: steps
82
+ fp16: false
83
+ gradient_accumulation_steps: 1
84
+ gradient_checkpointing: true
85
+ learning_rate: 2.0e-05
86
+ logging_steps: 1
87
+ lr_scheduler_type: cosine
88
+ max_seq_length: 1024
89
+ max_steps: 5000
90
+ num_gpus: 2
91
+ num_train_epochs: -1
92
+ output_dir: ./logs/rfm_prefprogress
93
+ per_device_eval_batch_size: 8
94
+ per_device_train_batch_size: 8
95
+ prediction_loss_only: true
96
+ remove_unused_columns: false
97
+ resume_from_checkpoint: null
98
+ save_steps: 200
99
+ save_strategy: steps
100
+ warmup_ratio: 0.1
101
+ warmup_steps: 0