File size: 3,128 Bytes
b4cad21 116e562 b4cad21 5c6fb68 c31b5ce 116e562 b4cad21 5c6fb68 b4cad21 59b2fef b4cad21 0da5bf5 b4cad21 0da5bf5 b4cad21 116e562 b4cad21 0da5bf5 b4cad21 0da5bf5 b4cad21 0da5bf5 5c6fb68 85bad96 5c6fb68 3c8bb73 5c6fb68 0da5bf5 5c6fb68 59b2fef 5c6fb68 59b2fef 3c8bb73 5c6fb68 c31b5ce 59b2fef c31b5ce 165b25c 59b2fef 165b25c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
[general]
name = "quantization"
[torch]
src = [
"core/registration.h",
"core/scalar_type.hpp",
"torch-ext/torch_binding.cpp",
"torch-ext/torch_binding.h"
]
include = [ "." ]
[kernel.cutlass_w8a8]
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
src = [
"core/math.hpp",
"cutlass_w8a8/common.hpp",
"cutlass_w8a8/scaled_mm_c2x.cu",
"cutlass_w8a8/scaled_mm_c2x.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm75_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm80_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh",
"cutlass_w8a8/scaled_mm_entry.cu",
"cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp",
"cutlass_extensions/epilogue/broadcast_load_epilogue_c2x.hpp",
]
include = [ "." ]
depends = [ "cutlass_3_6", "torch" ]
[kernel.cutlass_w8a8_hopper]
cuda-capabilities = [ "9.0", "9.0a" ]
src = [
"core/math.hpp",
"cutlass_w8a8/common.hpp",
"cutlass_w8a8/scaled_mm_c3x.cu",
"cutlass_w8a8/scaled_mm_c3x.cuh",
"cutlass_w8a8/scaled_mm_c3x_sm90_fp8_dispatch.cuh",
"cutlass_w8a8/scaled_mm_c3x_sm90_int8_dispatch.cuh",
"cutlass_extensions/common.cpp",
"cutlass_extensions/common.hpp",
"cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp",
"cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp",
]
include = [ "." ]
depends = [ "cutlass_3_6", "torch" ]
[kernel.fp8_common]
# language = "cuda-hipify"
# rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
src = [
"fp8/amd/hip_float8.h",
"fp8/amd/hip_float8_impl.h",
"fp8/common.cu",
"fp8/common.cuh",
"dispatch_utils.h",
"vectorization.cuh"
]
include = [ "." ]
depends = [ "torch" ]
[kernel.fp8_marlin]
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
src = [
"fp8/fp8_marlin.cu",
"gptq_marlin/marlin.cuh",
"gptq_marlin/marlin_dtypes.cuh",
]
depends = [ "torch" ]
[kernel.int8_common]
cuda-capabilities = [ "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
rocm-archs = [ "gfx906", "gfx908", "gfx90a", "gfx940", "gfx941", "gfx942", "gfx1030", "gfx1100", "gfx1101" ]
src = [
"compressed_tensors/int8_quant_kernels.cu",
"dispatch_utils.h"
]
include = [ "." ]
depends = [ "torch" ]
[kernel.gptq_marlin]
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
src = [
"core/scalar_type.hpp",
"gptq_marlin/awq_marlin_repack.cu",
"gptq_marlin/gptq_marlin.cu",
"gptq_marlin/gptq_marlin_repack.cu",
"gptq_marlin/marlin.cuh",
"gptq_marlin/marlin_dtypes.cuh"
]
include = [ "." ]
depends = [ "torch" ]
[kernel.marlin]
cuda-capabilities = [ "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0" ]
src = [
"core/scalar_type.hpp",
"marlin/dense/common/base.h",
"marlin/dense/common/mem.h",
"marlin/dense/marlin_cuda_kernel.cu",
"marlin/qqq/marlin_qqq_gemm_kernel.cu",
"marlin/sparse/common/base.h",
"marlin/sparse/common/mem.h",
"marlin/sparse/common/mma.h",
"marlin/sparse/marlin_24_cuda_kernel.cu"
]
include = [ "." ]
depends = [ "torch" ]
|