data: | |
collator: | |
pad_to_multiple_of: 8 | |
dataloader: | |
drop_last: true | |
num_workers: 4 | |
pin_memory: true | |
shuffle: true | |
processed_dir: finetune_processed_experiences | |
fsdp: | |
activation_checkpointing: true | |
mixed_precision: true | |
sharding_strategy: FULL_SHARD | |
gpu: | |
data_parallel: true | |
single_gpu: false | |
huggingface: | |
create_model_card: true | |
repo_name: dtadpole/KernelCoder-32B-AWQ_20250621-161329 | |
upload: true | |
lora: | |
alpha: 64 | |
bias: none | |
dropout: 0.05 | |
r: 64 | |
target_modules: | |
- q_proj | |
- k_proj | |
- v_proj | |
- o_proj | |
- gate_proj | |
- down_proj | |
- up_proj | |
model: | |
dtype: null | |
load_in_4bit: false | |
max_seq_length: 8192 | |
name: Qwen/Qwen3-32B-AWQ | |
test: | |
default_prompt: '<|im_start|>system | |
You are a helpful assistant.<|im_end|> | |
<|im_start|>user | |
What is machine learning?<|im_end|> | |
<|im_start|>assistant | |
' | |
generation: | |
do_sample: true | |
max_new_tokens: 1024 | |
temperature: 0.7 | |
use_cache: true | |
training: | |
gradient_accumulation_steps: 1 | |
learning_rate: 3.0e-05 | |
logging_steps: 1 | |
lr_scheduler_type: cosine | |
max_grad_norm: 0.75 | |
max_steps: -1 | |
num_train_epochs: 1 | |
num_workers: 4 | |
optim: paged_adamw_8bit | |
output_dir: ../finetune_model_output | |
per_device_batch_size: 1 | |
save_steps: 100 | |
save_total_limit: 3 | |
seed: 3407 | |
use_awq_precision: true | |
use_custom_loss_masking: true | |
warmup_steps: 10 | |
weight_decay: 0.05 | |