nl2sql-pretrained / training_configuration.json
karthik-2905's picture
Upload folder using huggingface_hub
e18f039 verified
raw
history blame contribute delete
984 Bytes
{
"dataset": {
"total_examples": 24293,
"training_examples": 21863,
"validation_examples": 2430,
"data_sources": {
"synthetic_sql": "60%",
"spider_dataset": "25%",
"wikisql_dataset": "15%"
},
"data_quality": "high",
"mysql_specificity": "100%"
},
"training_setup": {
"training_type": "causal_language_modeling",
"batch_size": 6,
"sequence_length": 256,
"learning_rate": 0.0003,
"weight_decay": 0.1,
"optimizer": "AdamW",
"scheduler": "CosineAnnealingLR",
"gradient_clipping": 1.0
},
"hardware_configuration": {
"gpu": "RTX 5080 16GB",
"memory_usage": "~2GB VRAM",
"training_speed": "42.3 batches/second",
"total_training_time": "12 minutes",
"energy_efficiency": "excellent"
},
"model_configuration": {
"architecture": "GPT-style",
"layers": 8,
"heads": 8,
"hidden_size": 512,
"feedforward_size": 2048,
"dropout": 0.1,
"max_sequence": 512
}
}