File size: 3,257 Bytes
f7780e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
source /linzhihang/conda_env/init.sh
conda activate s2s
cd /linzhihang/zhangyuhao/ACLlama_s2s/scripts

prefix=/linzhihang/zhangyuhao/ACLlama_s2s

# NAME
TASK=S2S
stage=finetune  # edit in config
model_size=small    # edit in config
lr=3e-5
# subtag=shareEmbW_CR_0507_base_Echox_s2s_pretrained_0503
# subtag=FirstTurnS2T+aligner_0505
# subtag=QA_OneTurn+aligner_Lora_0510
# subtag=ASR_UnitLanguage_4gram_BPE+aligner_0513
# subtag=QA_OneTurn_ALL_Lora_0516
# subtag=QA_OneTurn_ALL_Lora_0517
#subtag=QA_OneTurn_ALL_Lora_0618_newGen_80k_spm_epoch10-embedcon-10epoch-large-adapter-add-prefix
subtag=kd_offline_base_merge_0819_bench



# base_model=/mnt/speech/zhangyuhao/text_to_speech/ACLlama_t2u/Echox_s2s_0516 # unit
#base_model=/linzhihang/zhangyuhao/ACLlama_s2s/Echox_s2s_unit_language_0529 # unit language
#base_model=/linzhihang/zhangyuhao/ACLlama_s2s/Echox_s2s_unit_0706 # unit language
base_model=/linzhihang/zhangyuhao/ACLlama_s2s/output/S2S/S2S_finetune_small_lr3e-5_magpie-slice-unit-languge-new-0717-filter_kd_offline_base_merge_0717
#/linzhihang/zhangyuhao/ACLlama_s2s/output/S2S/S2S_finetune_small_lr3e-5_S2S-KD-unit-new-0816-filter-3B_kd_offline_base_merge_0816

# DATA

# data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie_wer-filter-kd-40k-echo-ul-spm.json
#data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0618.json
#data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0629-filter.json
#data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/magpie-slice-unit-languge-new-0706-filter.json
data_json=/linzhihang/zhangyuhao/ACLlama_s2s/data/S2S-KD-unit-language-new-0819-3B-bench-filter.json
#S2S-KD-unit-new-0819-3B-bench-filter.json
#S2S-KD-unit-new-0816-filter-3B.json
#magpie-slice-unit-languge-new-0717-filter.json



training_set=${TASK}_${stage}_${model_size}_lr${lr}
model_tag="${training_set}_$(basename "$data_json" .json)_${subtag}"

checkpoint_dir=$prefix/output/$TASK/$model_tag
echo $checkpoint_dir
mkdir -p $checkpoint_dir
cp $0 $checkpoint_dir/

# CMD
NCCL_P2P_DISABLE=1 \
NCCL_IB_DISABLE=1 \
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
torchrun \
--nproc_per_node 8 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 7897 \
$prefix/finetune_acllama_s2s_zyh.py \
--audio_model_name_or_path "/linzhihang/LLMs/whisper-v3" \
--text_model_name_or_path $base_model \
--data_path "$data_json" \
--fp16 True \
--output_dir "$checkpoint_dir" \
--num_train_epochs 1 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
--gradient_accumulation_steps 1 \
--evaluation_strategy "no" \
--save_strategy "steps" \
--save_steps 200 \
--save_total_limit 1 \
--learning_rate $lr \
--weight_decay 0.1 \
--adam_beta2 0.95 \
--warmup_ratio 0.01 \
--lr_scheduler_type "inverse_sqrt" \
--logging_steps 1 \
--report_to "none" \
--model_max_length 1024 \
--gradient_checkpointing True \
--lazy_preprocess True \
--deepspeed "$prefix/config/ds_config_zero2.json" #\
#--use_lora #> $checkpoint_dir/train.log # 2>&1

# --use_lora
#--data_path "$prefix/data/libri_train_update.json" \
#--text_model_name_or_path "/mnt/user/zhangyuhao/LLM/llama3-instruct/llama3_1-8B/" \
#--data_path "../data/libri_train_other460.json" \
#--data_path "../data/train_mt_orgnize.json" \