Safetensors
qwen3
Qwen3-58B-Embiggened / stage1_v2_metadata.json
ehartford's picture
Upload folder using huggingface_hub
0b84f23 verified
{
"stage": "1-v2-sharted",
"source_model": "Qwen/Qwen3-32B",
"method": "gpu_accelerated_structure_aware_interpolation_sharted",
"num_gpus_used": 8,
"fixes": [
"Corrected o_proj dimensions to 8192x8192",
"Proper handling of GQA architecture"
],
"optimizations": [
"Multi-GPU parallel processing",
"JIT-compiled operations",
"Sharted weight loading/saving \ud83d\udca9",
"Efficient memory management"
],
"sharting_info": {
"format": "safetensors",
"max_shart_size": "5GB",
"poop_emoji": "\ud83d\udca9"
}
}