nl2sql-pretrained / architecture.json
karthik-2905's picture
Upload folder using huggingface_hub
e18f039 verified
{
"model_name": "MySQL Query Generator From Scratch",
"version": "1.0.0",
"architecture": {
"type": "GPT-style Transformer",
"variant": "Decoder-only",
"trained_from": "absolute_scratch",
"no_pretrained_weights": true,
"layers": {
"total_transformer_blocks": 8,
"attention_heads_per_layer": 8,
"hidden_size": 512,
"feedforward_size": 2048,
"max_sequence_length": 512,
"dropout_rate": 0.1
},
"components": {
"token_embedding": "4206 x 512",
"position_embedding": "512 x 512",
"multi_head_attention": "Custom implementation",
"feed_forward": "GELU activation",
"layer_norm": "Pre-norm configuration",
"output_projection": "512 x 4206"
}
},
"parameters": {
"total_parameters": 29789184,
"trainable_parameters": 29789184,
"embedding_parameters": 2415616,
"transformer_parameters": 27373568,
"model_size_mb": 113.63671875
},
"vocabulary": {
"total_tokens": 4206,
"special_tokens": 4,
"sql_keywords": "SELECT, FROM, WHERE, JOIN, GROUP BY, ORDER BY, LIMIT, etc.",
"tokenization": "Custom word-level tokenizer",
"built_from_scratch": true
}
}