zswzswzsw
/

grpo_run_code

Upload folder using huggingface_hub

ae40651 verified 3 months ago

909 Bytes

	# Model arguments
	model_name_or_path: alignment-handbook/zephyr-7b-sft-full
	torch_dtype: null

	# Data training arguments
	# For definitions, see: src/h4/training/config.py
	dataset_mixer:
	HuggingFaceH4/ultrafeedback_binarized: 1.0
	dataset_splits:
	- train_prefs
	- test_prefs
	preprocessing_num_workers: 12

	# DPOTrainer arguments
	bf16: true
	beta: 0.01
	do_eval: true
	eval_strategy: steps
	eval_steps: 100
	gradient_accumulation_steps: 2
	gradient_checkpointing: true
	gradient_checkpointing_kwargs:
	use_reentrant: False
	hub_model_id: zephyr-7b-dpo-full
	learning_rate: 5.0e-7
	log_level: info
	logging_steps: 10
	lr_scheduler_type: cosine
	max_length: 1024
	max_prompt_length: 512
	num_train_epochs: 1
	optim: adamw_torch
	output_dir: data/zephyr-7b-dpo-full
	per_device_train_batch_size: 8
	per_device_eval_batch_size: 8
	push_to_hub: true
	save_strategy: "steps"
	save_steps: 100
	save_total_limit: 1
	seed: 42
	warmup_ratio: 0.1