File size: 1,072 Bytes
d2b47aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
seed: 123

### model
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct
trust_remote_code: true
flash_attn: auto
use_cache: false

### method
stage: sft
do_train: true
finetuning_type: prompt-tuning
task_type: CAUSAL_LM
num_virtual_tokens: 100

### dataset
dataset: cola
template: llama3
cutoff_len: 2048
overwrite_cache: true
preprocessing_num_workers: 16
dataloader_num_workers: 4
packing: false

### output
output_dir: saves/prompt-tuning/llama-3-8b-instruct/train_cola_1752763927
logging_steps: 5
save_steps: 0.05
overwrite_output_dir: true
save_only_model: false
plot_loss: true
include_num_input_tokens_seen: true
push_to_hub: true
push_to_hub_organization: rbelanec
load_best_model_at_end: true
save_total_limit: 1

### train
per_device_train_batch_size: 8
learning_rate: 5.0e-5
num_train_epochs: 10.0
weight_decay: 1.0e-5
lr_scheduler_type: cosine
bf16: true
ddp_timeout: 180000000
resume_from_checkpoint: null
warmup_ratio: 0.1
optim: adamw_torch
report_to:
- wandb

### eval
per_device_eval_batch_size: 8
eval_strategy: steps
eval_steps: 0.05
val_size: 0.1