Add MoE configs
fdd8ac0
|
{ |
|
"1": { |
|
"BLOCK_SIZE_M": 16, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 256, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 4 |
|
}, |
|
"2": { |
|
"BLOCK_SIZE_M": 16, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 256, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 3 |
|
}, |
|
"4": { |
|
"BLOCK_SIZE_M": 16, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 256, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 3 |
|
}, |
|
"8": { |
|
"BLOCK_SIZE_M": 16, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 256, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 3 |
|
}, |
|
"16": { |
|
"BLOCK_SIZE_M": 16, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 256, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 5 |
|
}, |
|
"24": { |
|
"BLOCK_SIZE_M": 32, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 256, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"32": { |
|
"BLOCK_SIZE_M": 64, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 128, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 5 |
|
}, |
|
"48": { |
|
"BLOCK_SIZE_M": 64, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 128, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 5 |
|
}, |
|
"64": { |
|
"BLOCK_SIZE_M": 64, |
|
"BLOCK_SIZE_N": 64, |
|
"BLOCK_SIZE_K": 128, |
|
"GROUP_SIZE_M": 1, |
|
"num_warps": 4, |
|
"num_stages": 5 |
|
}, |
|
"96": { |
|
"BLOCK_SIZE_M": 64, |
|
"BLOCK_SIZE_N": 128, |
|
"BLOCK_SIZE_K": 128, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 4, |
|
"num_stages": 4 |
|
}, |
|
"128": { |
|
"BLOCK_SIZE_M": 64, |
|
"BLOCK_SIZE_N": 128, |
|
"BLOCK_SIZE_K": 128, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 4, |
|
"num_stages": 4 |
|
}, |
|
"256": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 128, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 64, |
|
"num_warps": 8, |
|
"num_stages": 5 |
|
}, |
|
"512": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"1024": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 32, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"1536": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 32, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"2048": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"3072": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 64, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"4096": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 32, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"5120": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"9216": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"13312": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"17408": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"25600": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 4 |
|
}, |
|
"33792": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"41984": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"50176": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
}, |
|
"58368": { |
|
"BLOCK_SIZE_M": 128, |
|
"BLOCK_SIZE_N": 256, |
|
"BLOCK_SIZE_K": 64, |
|
"GROUP_SIZE_M": 16, |
|
"num_warps": 8, |
|
"num_stages": 3 |
|
} |
|
} |