mol-instruct-llava3-checkpoint-1000 / tuning_config.yaml

Add tuning configuration file, LlamaFactory can reproduce.

2ef28c7 verified 18 days ago

2.19 kB

	### model
	# model_name_or_path: xtuner/llava-llama-3-8b-v1_1-hf
	# model_name_or_path: Intel/llava-llama-3-8b
	# model_name_or_path: ./models/Offical_models/Intel--llava-llama-3-8b
	# model_name_or_path: lmms-lab/llama3-llava-next-8b
	# model_name_or_path: llava-hf/llava-1.5-7b-hf

	# model_name_or_path: llava-hf/llama3-llava-next-8b-hf Qwen/Qwen2.5-VL-7B
	# model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
	model_name_or_path: llava-hf/llama3-llava-next-8b-hf
	trust_remote_code: true
	train_from_scratch: false
	image_max_pixels: 262144
	video_max_pixels: 16384

	### method
	stage: sft
	do_train: true
	finetuning_type: lora
	lora_rank: 64
	lora_alpha: 16
	lora_dropout: 0
	lora_target: all
	# lora_target: v_proj,o_proj,q_proj,k_proj,molecule_projector.linear_1,molecule_projector.linear_2


	### Full
	# finetuning_type: full
	# freeze_vision_tower: true # choices: [true, false]
	# freeze_multi_modal_projector: true # choices: [true, false]
	# freeze_language_model: false # choices: [true, false]
	# deepspeed: examples/deepspeed/ds_z3_config.json # choices: [ds_z0_config.json, ds_z2_config.json, ds_z3_config.json]

	### dataset
	dataset_dir: ./data
	dataset: mol-instruct
	# dataset: llava_1k_en
	# template: llava_next_llama3
	template: llava_next
	# template: qwen2_vl
	# cutoff_len: 2048
	cutoff_len: 4096
	max_samples: 1000000
	overwrite_cache: true
	preprocessing_num_workers: 16
	dataloader_num_workers: 8

	### output
	output_dir: saves/mol-instruct-llava3-next
	logging_steps: 10
	save_steps: 1000
	plot_loss: true
	overwrite_output_dir: true
	save_total_limit: 100

	### train
	per_device_train_batch_size: 8
	gradient_accumulation_steps: 8
	learning_rate: 1.0e-4
	# learning_rate: 5e-5
	num_train_epochs: 1.0
	lr_scheduler_type: cosine
	warmup_ratio: 0.1
	bf16: true
	plot_loss: true
	gradient_checkpointing: true
	ddp_timeout: 180000000

	# trust_remote_code: True
	# optim: adamw_torch
	cache_dir: ./JUNKS/mol-instruct-llava3-next

	# # Validate
	# do_predict: true

	# predict_with_generate: true
	# eval_dataset: mllmChem_eval_OCSR, mllmChem_eval_MDG, mllmChem_eval_FRP, mllmChem_eval_RT, mllmChem_eval_RP, mllmChem_eval_PP

	## eval
	# val_size: 0.1
	# per_device_eval_batch_size: 1
	# eval_strategy: steps
	# eval_steps: 500