from dataclasses import dataclass | |
class Config: | |
vocab_size: int = 50257 # number of tokens: 50,000 BPE merges + 256 bytes tokens + 1 <|endoftext|> token | |
nn_layer: int = 12 # number of layers | |
nn_head: int = 12 # number of heads | |
nn_embed: int = 768 # embedding dimension | |
nn_max_tok_seq: int = 1024 # max token sequence length (for pos embedding) # Block size | |
nn_train_tok_seq: int = 32 # Actual training token sequence | |
nn_mlp_expansion: int = 4 # Expansion in the MLP layer | |
batch_size: int = 256 | |
train_tok_size: int = 32 | |
saved_model_path = 'data/model_tf.pth' | |
train_input_file = 'data/input.txt' |