Neroism8422's picture
Add tuning configuration file, LlamaFactory can reproduce.
2ef28c7 verified
### model
# model_name_or_path: xtuner/llava-llama-3-8b-v1_1-hf
# model_name_or_path: Intel/llava-llama-3-8b
# model_name_or_path: ./models/Offical_models/Intel--llava-llama-3-8b
# model_name_or_path: lmms-lab/llama3-llava-next-8b
# model_name_or_path: llava-hf/llava-1.5-7b-hf
# model_name_or_path: llava-hf/llama3-llava-next-8b-hf Qwen/Qwen2.5-VL-7B
# model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
model_name_or_path: llava-hf/llama3-llava-next-8b-hf
trust_remote_code: true
train_from_scratch: false
image_max_pixels: 262144
video_max_pixels: 16384
### method
stage: sft
do_train: true
finetuning_type: lora
lora_rank: 64
lora_alpha: 16
lora_dropout: 0
lora_target: all
# lora_target: v_proj,o_proj,q_proj,k_proj,molecule_projector.linear_1,molecule_projector.linear_2
### Full
# finetuning_type: full
# freeze_vision_tower: true # choices: [true, false]
# freeze_multi_modal_projector: true # choices: [true, false]
# freeze_language_model: false # choices: [true, false]
# deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]
### dataset
dataset_dir: ./data
dataset: mol-instruct
# dataset: llava_1k_en
# template: llava_next_llama3
template: llava_next
# template: qwen2_vl
# cutoff_len: 2048
cutoff_len: 4096
max_samples: 1000000
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 8
### output
output_dir: saves/mol-instruct-llava3-next
logging_steps: 10
save_steps: 1000
plot_loss: true
overwrite_output_dir: true
save_total_limit: 100
### train
per_device_train_batch_size: 8
gradient_accumulation_steps: 8
learning_rate: 1.0e-4
# learning_rate: 5e-5
num_train_epochs: 1.0
lr_scheduler_type: cosine
warmup_ratio: 0.1
bf16: true
plot_loss: true
gradient_checkpointing: true
ddp_timeout: 180000000
# trust_remote_code: True
# optim: adamw_torch
cache_dir: ./JUNKS/mol-instruct-llava3-next
# # Validate
# do_predict: true
# predict_with_generate: true
# eval_dataset: mllmChem_eval_OCSR, mllmChem_eval_MDG, mllmChem_eval_FRP, mllmChem_eval_RT, mllmChem_eval_RP, mllmChem_eval_PP
## eval
# val_size: 0.1
# per_device_eval_batch_size: 1
# eval_strategy: steps
# eval_steps: 500