### model
# model_name_or_path: xtuner/llava-llama-3-8b-v1_1-hf
# model_name_or_path: Intel/llava-llama-3-8b
# model_name_or_path: ./models/Offical_models/Intel--llava-llama-3-8b
# model_name_or_path: lmms-lab/llama3-llava-next-8b
# model_name_or_path: llava-hf/llava-1.5-7b-hf

# model_name_or_path: llava-hf/llama3-llava-next-8b-hf Qwen/Qwen2.5-VL-7B
# model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
train_from_scratch: false
# image_max_pixels: 262144
# video_max_pixels: 16384

### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 64
lora_alpha: 16
lora_dropout: 0
lora_target: all
# lora_target: v_proj,o_proj,q_proj,k_proj,molecule_projector.linear_1,molecule_projector.linear_2


### Full
# finetuning_type: full
# freeze_vision_tower: true  # choices: [true, false]
# freeze_multi_modal_projector: true  # choices: [true, false]
# freeze_language_model: false  # choices: [true, false]
# deepspeed: examples/deepspeed/ds_z3_config.json  # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]

### dataset
dataset_dir: ./data
dataset: mol-instruct
# dataset: llava_1k_en
# template: llava_next_llama3
# template: llava_next
# template: qwen2_vl
template: llama3
# cutoff_len: 2048
cutoff_len: 4096
max_samples: 1000000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 8

### output
output_dir: saves/mol-instruct
logging_steps: 10
save_steps: 1000
plot_loss: true
overwrite_output_dir: true

### train
per_device_train_batch_size: 8
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
# learning_rate: 5e-5
num_train_epochs: 3.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
plot_loss: true
gradient_checkpointing: true
ddp_timeout: 180000000

# trust_remote_code: True
# optim: adamw_torch
cache_dir: ./JUNKS/mol-instruct

# # Validate
# do_predict: true

# predict_with_generate: true
# eval_dataset: mllmChem_eval_OCSR, mllmChem_eval_MDG, mllmChem_eval_FRP, mllmChem_eval_RT, mllmChem_eval_RP, mllmChem_eval_PP

## eval
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500