### model | |
# model_name_or_path: xtuner/llava-llama-3-8b-v1_1-hf | |
# model_name_or_path: Intel/llava-llama-3-8b | |
# model_name_or_path: ./models/Offical_models/Intel--llava-llama-3-8b | |
# model_name_or_path: lmms-lab/llama3-llava-next-8b | |
# model_name_or_path: llava-hf/llava-1.5-7b-hf | |
# model_name_or_path: llava-hf/llama3-llava-next-8b-hf Qwen/Qwen2.5-VL-7B | |
# model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct | |
model_name_or_path: llava-hf/llama3-llava-next-8b-hf | |
trust_remote_code: true | |
train_from_scratch: false | |
image_max_pixels: 262144 | |
video_max_pixels: 16384 | |
### method | |
stage: sft | |
do_train: true | |
finetuning_type: lora | |
lora_rank: 64 | |
lora_alpha: 16 | |
lora_dropout: 0 | |
lora_target: all | |
# lora_target: v_proj,o_proj,q_proj,k_proj,molecule_projector.linear_1,molecule_projector.linear_2 | |
### Full | |
# finetuning_type: full | |
# freeze_vision_tower: true # choices: [true, false] | |
# freeze_multi_modal_projector: true # choices: [true, false] | |
# freeze_language_model: false # choices: [true, false] | |
# deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json] | |
### dataset | |
dataset_dir: ./data | |
dataset: mol-instruct | |
# dataset: llava_1k_en | |
# template: llava_next_llama3 | |
template: llava_next | |
# template: qwen2_vl | |
# cutoff_len: 2048 | |
cutoff_len: 4096 | |
max_samples: 1000000 | |
overwrite_cache: true | |
preprocessing_num_workers: 16 | |
dataloader_num_workers: 8 | |
### output | |
output_dir: saves/mol-instruct-llava3-next | |
logging_steps: 10 | |
save_steps: 1000 | |
plot_loss: true | |
overwrite_output_dir: true | |
save_total_limit: 100 | |
### train | |
per_device_train_batch_size: 8 | |
gradient_accumulation_steps: 8 | |
learning_rate: 1.0e-4 | |
# learning_rate: 5e-5 | |
num_train_epochs: 1.0 | |
lr_scheduler_type: cosine | |
warmup_ratio: 0.1 | |
bf16: true | |
plot_loss: true | |
gradient_checkpointing: true | |
ddp_timeout: 180000000 | |
# trust_remote_code: True | |
# optim: adamw_torch | |
cache_dir: ./JUNKS/mol-instruct-llava3-next | |
# # Validate | |
# do_predict: true | |
# predict_with_generate: true | |
# eval_dataset: mllmChem_eval_OCSR, mllmChem_eval_MDG, mllmChem_eval_FRP, mllmChem_eval_RT, mllmChem_eval_RP, mllmChem_eval_PP | |
## eval | |
# val_size: 0.1 | |
# per_device_eval_batch_size: 1 | |
# eval_strategy: steps | |
# eval_steps: 500 |