gitesh-grover's picture
Upload 9 files
a48e448 verified
raw
history blame contribute delete
650 Bytes
from dataclasses import dataclass
@dataclass
class Config:
vocab_size: int = 50257 # number of tokens: 50,000 BPE merges + 256 bytes tokens + 1 <|endoftext|> token
nn_layer: int = 12 # number of layers
nn_head: int = 12 # number of heads
nn_embed: int = 768 # embedding dimension
nn_max_tok_seq: int = 1024 # max token sequence length (for pos embedding) # Block size
nn_train_tok_seq: int = 32 # Actual training token sequence
nn_mlp_expansion: int = 4 # Expansion in the MLP layer
batch_size: int = 256
train_tok_size: int = 32
saved_model_path = 'data/model_tf.pth'
train_input_file = 'data/input.txt'