{ "model_type": "krdmodel", "vocab_size": 111, "dim": 1024, "n_layers": 16, "n_heads": 16, "n_kv_heads": 4, "ffn_dim": 2816, "max_seq_len": 2048, "batch_size": 4, "gradient_accumulation": 8, "train_steps": 5000, "lr": 2e-4, "mixed_precision": "fp16", "lora_rank": 32, "use_flash": true, "grad_checkpoint": true }