| source /linzhihang/conda_env/init.sh | |
| conda activate s2s | |
| cd /linzhihang/zhangyuhao/ACLlama_s2s/scripts | |
| prefix=/linzhihang/zhangyuhao/ACLlama_s2s | |
| # NAME | |
| TASK=S2S | |
| stage=finetune # edit in config | |
| model_size=small # edit in config | |
| lr=3e-5 | |
| # subtag=shareEmbW_CR_0507_base_Echox_s2s_pretrained_0503 | |
| # subtag=FirstTurnS2T+aligner_0505 | |
| # subtag=QA_OneTurn+aligner_Lora_0510 | |
| # subtag=ASR_UnitLanguage_4gram_BPE+aligner_0513 | |
| # subtag=QA_OneTurn_ALL_Lora_0516 | |
| # subtag=QA_OneTurn_ALL_Lora_0517 | |
| #subtag=QA_OneTurn_ALL_Lora_0618_newGen_80k_spm_epoch10-embedcon-10epoch-large-adapter-add-prefix | |
| subtag=kd_offline_base_merge_0820_tune_bench | |
| # base_model=/mnt/speech/zhangyuhao/text_to_speech/ACLlama_t2u/Echox_s2s_0516 # unit | |
| #base_model=/linzhihang/zhangyuhao/ACLlama_s2s/Echox_s2s_unit_language_0529 # unit language | |
| #base_model=/linzhihang/zhangyuhao/ACLlama_s2s/Echox_s2s_unit_language_0625 # unit language | |
| #base_model=/linzhihang/zhangyuhao/ACLlama_s2s/Echox_s2s_KD_unit_langauge_0802_large_8B_check | |
| base_model=/linzhihang/zhangyuhao/ACLlama_s2s/output/S2S/S2S_finetune_small_lr1e-4_S2S-KD-unit-languge-new-0804-filter_kd_offline_base_merge_0808_20string_1e-4/checkpoint-16000 | |
| #base_model=/linzhihang/zhangyuhao/ACLlama_s2s/output/S2S/S2S_finetune_small_lr1e-4_S2S-KD-unit-new-0812-bench-filter-8B_kd_offline_base_merge_0814/checkpoint-12200 | |
| # DATA | |
| # data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie_wer-filter-kd-40k-echo-ul-spm.json | |
| #data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0618.json | |
| #data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0629-filter.json | |
| #data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0706-filter.json | |
| #data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0717-filter.json | |
| #data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/S2S-KD-unit-languge-new-0804-filter.json | |
| data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/S2S-KD-unit-languge-new-0808-bench-filter-8B.json | |
| #data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/S2S-KD-unit-new-0812-bench-filter-8B.json | |
| training_set=${TASK}_${stage}_${model_size}_lr${lr} | |
| model_tag="${training_set}_$(basename "$data_json" .json)_${subtag}" | |
| checkpoint_dir=$prefix/output/$TASK/$model_tag | |
| echo $checkpoint_dir | |
| mkdir -p $checkpoint_dir | |
| cp $0 $checkpoint_dir/ | |
| # CMD | |
| NCCL_P2P_DISABLE=1 \ | |
| NCCL_IB_DISABLE=1 \ | |
| CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \ | |
| torchrun \ | |
| --nproc_per_node 8 \ | |
| --nnodes 1 \ | |
| --node_rank 0 \ | |
| --master_addr localhost \ | |
| --master_port 7897 \ | |
| $prefix/finetune_acllama_s2s_zyh.py \ | |
| --audio_model_name_or_path "/linzhihang/LLMs/whisper-v3" \ | |
| --text_model_name_or_path $base_model \ | |
| --data_path "$data_json" \ | |
| --fp16 True \ | |
| --output_dir "$checkpoint_dir" \ | |
| --num_train_epochs 1 \ | |
| --per_device_train_batch_size 1 \ | |
| --per_device_eval_batch_size 1 \ | |
| --gradient_accumulation_steps 1 \ | |
| --evaluation_strategy "no" \ | |
| --save_strategy "steps" \ | |
| --save_steps 100 \ | |
| --save_total_limit 1 \ | |
| --learning_rate $lr \ | |
| --weight_decay 0.1 \ | |
| --adam_beta2 0.95 \ | |
| --warmup_ratio 0.01 \ | |
| --lr_scheduler_type "inverse_sqrt" \ | |
| --logging_steps 1 \ | |
| --report_to "none" \ | |
| --model_max_length 1024 \ | |
| --gradient_checkpointing True \ | |
| --lazy_preprocess True \ | |
| --deepspeed "$prefix/config/ds_config_zero2.json" #\ | |
| #--use_lora #> $checkpoint_dir/train.log # 2>&1 | |
| # --use_lora | |
| #--data_path "$prefix/data/libri_train_update.json" \ | |
| #--text_model_name_or_path "/mnt/user/zhangyuhao/LLM/llama3-instruct/llama3_1-8B/" \ | |
| #--data_path "../data/libri_train_other460.json" \ | |
| #--data_path "../data/train_mt_orgnize.json" \ | |