| { | |
| "num_heads": 8, | |
| "attention_dim": 512, | |
| "vocab_size": 50260, | |
| "num_blocks": 8, | |
| "ff_dim": 1536, | |
| "dropout_rate": 0.1, | |
| "possible_opt_path": "Loaded_model", | |
| "max_len": 2048, | |
| "emb_splt": 256, | |
| "attn_chunks": 1, | |
| "use_fash_attention": false, | |
| "emb_init_range": 0.02, | |
| "use_rope": true, | |
| "emb_scaling_factor": 1, | |
| "res_scale": 1 | |
| } |