{ | |
"model": { | |
"vocab_size": 8192, | |
"context_length": 128, | |
"d_embedding": 128, | |
"d_intermediate": 512, | |
"n_heads": 4, | |
"n_layers": 4, | |
"qkv_bias": false | |
}, | |
"train": { | |
"peak_lr": 0.001, | |
"warmup_ratio": 0.01, | |
"n_epochs": 2, | |
"batch_size": 8, | |
"weight_decay": 0.1 | |
} | |
} |