danieldk HF Staff commited on
Commit
5e584b3
Β·
1 Parent(s): b4c51e9

Build (aarch64-linux)

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. build/torch26-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  2. build/torch26-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  3. build/torch26-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  4. build/torch26-cxx98-cu126-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc +0 -0
  5. build/torch26-cxx98-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  6. build/torch26-cxx98-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  7. build/torch27-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  8. build/torch27-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  9. build/{torch28-cxx11-cu128-aarch64-linux β†’ torch27-cxx11-cu128-aarch64-linux}/paged_attention/__pycache__/__init__.cpython-313.pyc +0 -0
  10. build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc +0 -0
  11. build/{torch28-cxx11-cu126-aarch64-linux β†’ torch27-cxx11-cu128-aarch64-linux}/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  12. build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc +0 -0
  13. build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  14. build/torch27-cxx11-cu128-aarch64-linux/paged_attention/_ops.py +3 -3
  15. build/{torch26-cxx98-cu126-aarch64-linux/paged_attention/_paged_attention_6677800.abi3.so β†’ torch27-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so} +2 -2
  16. build/torch28-cxx11-cu126-aarch64-linux/paged_attention/__init__.py +0 -21
  17. build/torch28-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  18. build/torch28-cxx11-cu126-aarch64-linux/paged_attention/_custom_ops.py +0 -173
  19. build/torch28-cxx11-cu126-aarch64-linux/paged_attention/_ops.py +0 -9
  20. build/torch28-cxx11-cu126-aarch64-linux/paged_attention/_paged_attention_e6ce28c_dirty.abi3.so +0 -3
  21. build/torch28-cxx11-cu126-aarch64-linux/paged_attention/platforms.py +0 -92
  22. build/torch28-cxx11-cu128-aarch64-linux/paged_attention/__init__.py +0 -21
  23. build/torch28-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  24. build/torch28-cxx11-cu128-aarch64-linux/paged_attention/_custom_ops.py +0 -173
  25. build/torch28-cxx11-cu128-aarch64-linux/paged_attention/_ops.py +0 -9
  26. build/torch28-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_e6ce28c_dirty.abi3.so +0 -3
  27. build/torch28-cxx11-cu128-aarch64-linux/paged_attention/platforms.py +0 -92
  28. build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc +0 -0
  29. build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  30. build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  31. build/torch28-cxx11-cu129-aarch64-linux/paged_attention/_ops.py +3 -3
  32. build/{torch27-cxx11-cu126-aarch64-linux/paged_attention/_paged_attention_6677800.abi3.so β†’ torch28-cxx11-cu129-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so} +2 -2
  33. build/torch28-cxx11-cu129-aarch64-linux/paged_attention/_paged_attention_e6ce28c_dirty.abi3.so +0 -3
  34. build/{torch26-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/__init__.py +0 -0
  35. build/{torch28-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/__pycache__/__init__.cpython-313.pyc +0 -0
  36. build/{torch28-cxx11-cu128-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  37. build/torch29-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  38. build/{torch26-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/_custom_ops.py +0 -0
  39. build/{torch27-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/_ops.py +3 -3
  40. build/{torch27-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_6677800.abi3.so β†’ torch29-cxx11-cu126-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so} +2 -2
  41. build/{torch26-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/platforms.py +0 -0
  42. build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/__init__.py +0 -0
  43. build/{torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc β†’ torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} +0 -0
  44. build/torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc +0 -0
  45. build/torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc +0 -0
  46. build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/_custom_ops.py +0 -0
  47. build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/_ops.py +3 -3
  48. build/torch29-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so +3 -0
  49. build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/platforms.py +0 -0
  50. build/{torch27-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu130-aarch64-linux}/paged_attention/__init__.py +0 -0
build/torch26-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (510 Bytes)
 
build/torch26-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.71 kB)
 
build/torch26-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (548 Bytes)
 
build/torch26-cxx98-cu126-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (510 Bytes)
 
build/torch26-cxx98-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.71 kB)
 
build/torch26-cxx98-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (548 Bytes)
 
build/torch27-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.71 kB)
 
build/torch27-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (548 Bytes)
 
build/{torch28-cxx11-cu128-aarch64-linux β†’ torch27-cxx11-cu128-aarch64-linux}/paged_attention/__pycache__/__init__.cpython-313.pyc RENAMED
Binary files a/build/torch28-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc and b/build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-312.pyc DELETED
Binary file (4.71 kB)
 
build/{torch28-cxx11-cu126-aarch64-linux β†’ torch27-cxx11-cu128-aarch64-linux}/paged_attention/__pycache__/_custom_ops.cpython-313.pyc RENAMED
Binary files a/build/torch28-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc and b/build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc differ
 
build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-312.pyc DELETED
Binary file (548 Bytes)
 
build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (542 Bytes). View file
 
build/torch27-cxx11-cu128-aarch64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_6677800::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_b4c51e9
3
+ ops = torch.ops._paged_attention_b4c51e9
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_b4c51e9::{op_name}"
build/{torch26-cxx98-cu126-aarch64-linux/paged_attention/_paged_attention_6677800.abi3.so β†’ torch27-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:385c8fd6816793573ac802c40e1be99cc5703eebb4df05330ce57b2216da8836
3
- size 88162432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cba9e7b3cf9de722c5dc8f56a533542521fdef05a290a2b4db13948b0f1eca4
3
+ size 138172880
build/torch28-cxx11-cu126-aarch64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc DELETED
Binary file (554 Bytes)
 
build/torch28-cxx11-cu126-aarch64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu126-aarch64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_e6ce28c_dirty
3
- ops = torch.ops._paged_attention_e6ce28c_dirty
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_e6ce28c_dirty::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu126-aarch64-linux/paged_attention/_paged_attention_e6ce28c_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:97f6ba61bd73001a587126e697ca500dd6e89bb5b2e46a58ea42e83d023f0d0e
3
- size 88171000
 
 
 
 
build/torch28-cxx11-cu126-aarch64-linux/paged_attention/platforms.py DELETED
@@ -1,92 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
- IS_MPS = torch.backends.mps.is_available()
12
-
13
-
14
- class Platform(ABC):
15
- @classmethod
16
- def seed_everything(cls, seed: int) -> None:
17
- """
18
- Set the seed of each random module.
19
- `torch.manual_seed` will set seed on all devices.
20
-
21
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
22
- """
23
- random.seed(seed)
24
- np.random.seed(seed)
25
- torch.manual_seed(seed)
26
-
27
- @abstractmethod
28
- def get_device_name(self, device_id: int = 0) -> str: ...
29
-
30
- @abstractmethod
31
- def is_cuda(self) -> bool: ...
32
-
33
- @abstractmethod
34
- def is_rocm(self) -> bool: ...
35
-
36
- @abstractmethod
37
- def is_mps(self) -> bool: ...
38
-
39
-
40
- class CudaPlatform(Platform):
41
- @classmethod
42
- @lru_cache(maxsize=8)
43
- def get_device_name(cls, device_id: int = 0) -> str:
44
- return torch.cuda.get_device_name(0)
45
-
46
- def is_cuda(self) -> bool:
47
- return True
48
-
49
- def is_rocm(self) -> bool:
50
- return False
51
-
52
- def is_mps(self) -> bool:
53
- return False
54
-
55
-
56
- class RocmPlatform(Platform):
57
- @classmethod
58
- @lru_cache(maxsize=8)
59
- def get_device_name(cls, device_id: int = 0) -> str:
60
- return torch.cuda.get_device_name(device_id)
61
-
62
- def is_cuda(self) -> bool:
63
- return False
64
-
65
- def is_rocm(self) -> bool:
66
- return True
67
-
68
- def is_mps(self) -> bool:
69
- return False
70
-
71
-
72
- class MpsPlatform(Platform):
73
- @classmethod
74
- @lru_cache(maxsize=8)
75
- def get_device_name(cls, device_id: int = 0) -> str:
76
- return torch.cuda.get_device_name(device_id)
77
-
78
- def is_cuda(self) -> bool:
79
- return False
80
-
81
- def is_rocm(self) -> bool:
82
- return False
83
-
84
- def is_mps(self) -> bool:
85
- return True
86
-
87
- current_platform = (
88
- RocmPlatform() if IS_ROCM else
89
- MpsPlatform() if IS_MPS else
90
- CudaPlatform() if torch.cuda.is_available() else
91
- None
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-aarch64-linux/paged_attention/__init__.py DELETED
@@ -1,21 +0,0 @@
1
- from ._custom_ops import (
2
- convert_fp8,
3
- copy_blocks,
4
- paged_attention_v1,
5
- paged_attention_v2,
6
- reshape_and_cache,
7
- reshape_and_cache_flash,
8
- swap_blocks,
9
- )
10
- from ._ops import ops
11
-
12
- __all__ = [
13
- "convert_fp8",
14
- "copy_blocks",
15
- "ops",
16
- "paged_attention_v1",
17
- "paged_attention_v2",
18
- "reshape_and_cache",
19
- "reshape_and_cache_flash",
20
- "swap_blocks",
21
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc DELETED
Binary file (554 Bytes)
 
build/torch28-cxx11-cu128-aarch64-linux/paged_attention/_custom_ops.py DELETED
@@ -1,173 +0,0 @@
1
- from typing import List, Optional
2
-
3
- import torch
4
-
5
- from ._ops import ops
6
-
7
-
8
- # page attention ops
9
- def paged_attention_v1(
10
- out: torch.Tensor,
11
- query: torch.Tensor,
12
- key_cache: torch.Tensor,
13
- value_cache: torch.Tensor,
14
- num_kv_heads: int,
15
- scale: float,
16
- block_tables: torch.Tensor,
17
- seq_lens: torch.Tensor,
18
- block_size: int,
19
- max_seq_len: int,
20
- alibi_slopes: Optional[torch.Tensor],
21
- kv_cache_dtype: str,
22
- k_scale: float,
23
- v_scale: float,
24
- tp_rank: int = 0,
25
- blocksparse_local_blocks: int = 0,
26
- blocksparse_vert_stride: int = 0,
27
- blocksparse_block_size: int = 64,
28
- blocksparse_head_sliding_step: int = 0,
29
- ) -> None:
30
- ops.paged_attention_v1(
31
- out,
32
- query,
33
- key_cache,
34
- value_cache,
35
- num_kv_heads,
36
- scale,
37
- block_tables,
38
- seq_lens,
39
- block_size,
40
- max_seq_len,
41
- alibi_slopes,
42
- kv_cache_dtype,
43
- k_scale,
44
- v_scale,
45
- tp_rank,
46
- blocksparse_local_blocks,
47
- blocksparse_vert_stride,
48
- blocksparse_block_size,
49
- blocksparse_head_sliding_step,
50
- )
51
-
52
-
53
- def paged_attention_v2(
54
- out: torch.Tensor,
55
- exp_sum: torch.Tensor,
56
- max_logits: torch.Tensor,
57
- tmp_out: torch.Tensor,
58
- query: torch.Tensor,
59
- key_cache: torch.Tensor,
60
- value_cache: torch.Tensor,
61
- num_kv_heads: int,
62
- scale: float,
63
- block_tables: torch.Tensor,
64
- seq_lens: torch.Tensor,
65
- block_size: int,
66
- max_seq_len: int,
67
- alibi_slopes: Optional[torch.Tensor],
68
- kv_cache_dtype: str,
69
- k_scale: float,
70
- v_scale: float,
71
- tp_rank: int = 0,
72
- blocksparse_local_blocks: int = 0,
73
- blocksparse_vert_stride: int = 0,
74
- blocksparse_block_size: int = 64,
75
- blocksparse_head_sliding_step: int = 0,
76
- ) -> None:
77
- ops.paged_attention_v2(
78
- out,
79
- exp_sum,
80
- max_logits,
81
- tmp_out,
82
- query,
83
- key_cache,
84
- value_cache,
85
- num_kv_heads,
86
- scale,
87
- block_tables,
88
- seq_lens,
89
- block_size,
90
- max_seq_len,
91
- alibi_slopes,
92
- kv_cache_dtype,
93
- k_scale,
94
- v_scale,
95
- tp_rank,
96
- blocksparse_local_blocks,
97
- blocksparse_vert_stride,
98
- blocksparse_block_size,
99
- blocksparse_head_sliding_step,
100
- )
101
-
102
-
103
- def reshape_and_cache(
104
- key: torch.Tensor,
105
- value: torch.Tensor,
106
- key_cache: torch.Tensor,
107
- value_cache: torch.Tensor,
108
- slot_mapping: torch.Tensor,
109
- kv_cache_dtype: str,
110
- k_scale: float,
111
- v_scale: float,
112
- ) -> None:
113
- ops.reshape_and_cache(
114
- key,
115
- value,
116
- key_cache,
117
- value_cache,
118
- slot_mapping,
119
- kv_cache_dtype,
120
- k_scale,
121
- v_scale,
122
- )
123
-
124
-
125
- def reshape_and_cache_flash(
126
- key: torch.Tensor,
127
- value: torch.Tensor,
128
- key_cache: torch.Tensor,
129
- value_cache: torch.Tensor,
130
- slot_mapping: torch.Tensor,
131
- kv_cache_dtype: str,
132
- k_scale: torch.Tensor,
133
- v_scale: torch.Tensor,
134
- ) -> None:
135
- ops.reshape_and_cache_flash(
136
- key,
137
- value,
138
- key_cache,
139
- value_cache,
140
- slot_mapping,
141
- kv_cache_dtype,
142
- k_scale,
143
- v_scale,
144
- )
145
-
146
-
147
- def copy_blocks(
148
- key_caches: List[torch.Tensor],
149
- value_caches: List[torch.Tensor],
150
- block_mapping: torch.Tensor,
151
- ) -> None:
152
- ops.copy_blocks(key_caches, value_caches, block_mapping)
153
-
154
-
155
- def swap_blocks(
156
- src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor
157
- ) -> None:
158
- ops.swap_blocks(src, dst, block_mapping)
159
-
160
-
161
- def convert_fp8(
162
- output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8"
163
- ) -> None:
164
- ops.convert_fp8(output, input, scale, kv_dtype)
165
-
166
-
167
- __all__ = [
168
- "convert_fp8",
169
- "paged_attention_v1",
170
- "paged_attention_v2",
171
- "reshape_and_cache",
172
- "copy_blocks",
173
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-aarch64-linux/paged_attention/_ops.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
- from . import _paged_attention_e6ce28c_dirty
3
- ops = torch.ops._paged_attention_e6ce28c_dirty
4
-
5
- def add_op_namespace_prefix(op_name: str):
6
- """
7
- Prefix op by namespace.
8
- """
9
- return f"_paged_attention_e6ce28c_dirty::{op_name}"
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_e6ce28c_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:80929eb7a9f2561ef3e21c764494ac30f80935b054150b415626e49fe8d52b96
3
- size 120282936
 
 
 
 
build/torch28-cxx11-cu128-aarch64-linux/paged_attention/platforms.py DELETED
@@ -1,92 +0,0 @@
1
- import os
2
- import random
3
- from abc import ABC, abstractmethod
4
- from functools import lru_cache, wraps
5
- from typing import Callable, ParamSpec, TypeVar
6
-
7
- import numpy as np
8
- import torch
9
-
10
- IS_ROCM = torch.version.hip is not None
11
- IS_MPS = torch.backends.mps.is_available()
12
-
13
-
14
- class Platform(ABC):
15
- @classmethod
16
- def seed_everything(cls, seed: int) -> None:
17
- """
18
- Set the seed of each random module.
19
- `torch.manual_seed` will set seed on all devices.
20
-
21
- Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
22
- """
23
- random.seed(seed)
24
- np.random.seed(seed)
25
- torch.manual_seed(seed)
26
-
27
- @abstractmethod
28
- def get_device_name(self, device_id: int = 0) -> str: ...
29
-
30
- @abstractmethod
31
- def is_cuda(self) -> bool: ...
32
-
33
- @abstractmethod
34
- def is_rocm(self) -> bool: ...
35
-
36
- @abstractmethod
37
- def is_mps(self) -> bool: ...
38
-
39
-
40
- class CudaPlatform(Platform):
41
- @classmethod
42
- @lru_cache(maxsize=8)
43
- def get_device_name(cls, device_id: int = 0) -> str:
44
- return torch.cuda.get_device_name(0)
45
-
46
- def is_cuda(self) -> bool:
47
- return True
48
-
49
- def is_rocm(self) -> bool:
50
- return False
51
-
52
- def is_mps(self) -> bool:
53
- return False
54
-
55
-
56
- class RocmPlatform(Platform):
57
- @classmethod
58
- @lru_cache(maxsize=8)
59
- def get_device_name(cls, device_id: int = 0) -> str:
60
- return torch.cuda.get_device_name(device_id)
61
-
62
- def is_cuda(self) -> bool:
63
- return False
64
-
65
- def is_rocm(self) -> bool:
66
- return True
67
-
68
- def is_mps(self) -> bool:
69
- return False
70
-
71
-
72
- class MpsPlatform(Platform):
73
- @classmethod
74
- @lru_cache(maxsize=8)
75
- def get_device_name(cls, device_id: int = 0) -> str:
76
- return torch.cuda.get_device_name(device_id)
77
-
78
- def is_cuda(self) -> bool:
79
- return False
80
-
81
- def is_rocm(self) -> bool:
82
- return False
83
-
84
- def is_mps(self) -> bool:
85
- return True
86
-
87
- current_platform = (
88
- RocmPlatform() if IS_ROCM else
89
- MpsPlatform() if IS_MPS else
90
- CudaPlatform() if torch.cuda.is_available() else
91
- None
92
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc CHANGED
Binary files a/build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc and b/build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc CHANGED
Binary files a/build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc and b/build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc differ
 
build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc CHANGED
Binary files a/build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc and b/build/torch28-cxx11-cu129-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc differ
 
build/torch28-cxx11-cu129-aarch64-linux/paged_attention/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_e6ce28c_dirty
3
- ops = torch.ops._paged_attention_e6ce28c_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_e6ce28c_dirty::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_b4c51e9
3
+ ops = torch.ops._paged_attention_b4c51e9
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_b4c51e9::{op_name}"
build/{torch27-cxx11-cu126-aarch64-linux/paged_attention/_paged_attention_6677800.abi3.so β†’ torch28-cxx11-cu129-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2639286d960d9992c40b85c911773825374d8bb218a82eb84cdd4ac41342cb4c
3
- size 88169904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02238a0a4dacdbbf60eb9bb73a95332b448d5127f71149003c764f40595d9d06
3
+ size 149841048
build/torch28-cxx11-cu129-aarch64-linux/paged_attention/_paged_attention_e6ce28c_dirty.abi3.so DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:874c4b927fbbc21dc27ec190beac22b38346b66a080a0e56e4e9a010c5d737b5
3
- size 130442416
 
 
 
 
build/{torch26-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/__init__.py RENAMED
File without changes
build/{torch28-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/__pycache__/__init__.cpython-313.pyc RENAMED
Binary files a/build/torch28-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc and b/build/torch29-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/{torch28-cxx11-cu128-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/__pycache__/_custom_ops.cpython-313.pyc RENAMED
Binary files a/build/torch28-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc and b/build/torch29-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc differ
 
build/torch29-cxx11-cu126-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (542 Bytes). View file
 
build/{torch26-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/_custom_ops.py RENAMED
File without changes
build/{torch27-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_6677800::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_b4c51e9
3
+ ops = torch.ops._paged_attention_b4c51e9
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_b4c51e9::{op_name}"
build/{torch27-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_6677800.abi3.so β†’ torch29-cxx11-cu126-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bb98e687cc11950f2103990b777e6699c4a7285a53092def0063a9684fa951e
3
- size 120216032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a193f66139a77108969642a435eb345b97c24e4a31e53856fd7c65bff6e810
3
+ size 110584256
build/{torch26-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu126-aarch64-linux}/paged_attention/platforms.py RENAMED
File without changes
build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/__init__.py RENAMED
File without changes
build/{torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc β†’ torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc} RENAMED
Binary files a/build/torch27-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-312.pyc and b/build/torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/__init__.cpython-313.pyc differ
 
build/torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_custom_ops.cpython-313.pyc ADDED
Binary file (4.72 kB). View file
 
build/torch29-cxx11-cu128-aarch64-linux/paged_attention/__pycache__/_ops.cpython-313.pyc ADDED
Binary file (542 Bytes). View file
 
build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/_custom_ops.py RENAMED
File without changes
build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/_ops.py RENAMED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _paged_attention_6677800
3
- ops = torch.ops._paged_attention_6677800
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_paged_attention_6677800::{op_name}"
 
1
  import torch
2
+ from . import _paged_attention_b4c51e9
3
+ ops = torch.ops._paged_attention_b4c51e9
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_paged_attention_b4c51e9::{op_name}"
build/torch29-cxx11-cu128-aarch64-linux/paged_attention/_paged_attention_b4c51e9.abi3.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0955cb5ea9b9714579c81efc8810bba7ce46aad33701c244efcb01d021a944f
3
+ size 138174216
build/{torch26-cxx98-cu126-aarch64-linux β†’ torch29-cxx11-cu128-aarch64-linux}/paged_attention/platforms.py RENAMED
File without changes
build/{torch27-cxx11-cu126-aarch64-linux β†’ torch29-cxx11-cu130-aarch64-linux}/paged_attention/__init__.py RENAMED
File without changes