# accelerate_config.yaml - 多GPU训练配置 compute_environment: LOCAL_MACHINE distributed_type: MULTI_GPU downcast_bf16: 'no' gpu_ids: all machine_rank: 0 main_training_function: main mixed_precision: fp16 num_machines: 1 num_processes: 4 # 根据GPU数量调整 rdzv_backend: static same_network: true tpu_env: [] tpu_use_cluster: false tpu_use_sudo: false use_cpu: false # RLHF特定设置 gradient_accumulation_steps: 8 gradient_clipping: 1.0 learning_rate: 1e-5 dataloader_drop_last: true