| [general] | |
| name = "moe" | |
| [torch] | |
| src = [ | |
| "core/scalar_type.hpp", | |
| "torch-ext/torch_binding.cpp", | |
| "torch-ext/torch_binding.h", | |
| ] | |
| include = ["."] | |
| pyext = ["py", "json"] | |
| [kernel.fp8] | |
| cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"] | |
| src = [ | |
| "cuda_compat.h", | |
| "dispatch_utils.h", | |
| "fp8/amd/hip_float8.h", | |
| "fp8/amd/hip_float8_impl.h", | |
| "fp8/common.cu", | |
| "fp8/common.cuh", | |
| "fp8/vectorization.cuh", | |
| ] | |
| include = ["."] | |
| depends = ["torch"] | |
| [kernel.moe] | |
| cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"] | |
| src = [ | |
| "cuda_compat.h", | |
| "dispatch_utils.h", | |
| "moe/moe_align_sum_kernels.cu", | |
| "moe/topk_softmax_kernels.cu", | |
| ] | |
| depends = ["torch"] | |
| [kernel.moe-marlin] | |
| cuda-capabilities = ["8.0", "8.6", "8.7", "8.9", "9.0"] | |
| src = [ | |
| "core/exception.hpp", | |
| "core/scalar_type.hpp", | |
| "marlin-moe/marlin_moe_ops.cu", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.cu", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.cu", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel.h", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4.h", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.h", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku4b8.cu", | |
| "marlin-moe/marlin_kernels/marlin_moe_kernel_ku8b128.h", | |
| ] | |
| include = ["."] | |
| depends = ["torch"] | |
| [kernel.activation] | |
| cuda-capabilities = ["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0"] | |
| src = [ | |
| "activation/activation_kernels.cu", | |
| "activation/cuda_compat.h", | |
| "activation/dispatch_utils.h", | |
| ] | |
| depends = ["torch"] | |