kernels-community
/

rotary

Kaixuanliu commited on 6 days ago

Commit

6069b37

1 Parent(s): d7f24a6

add `apply_rotary_pos_emb` API to make it adapt to transformers

Files changed (2) hide show

rotary-xpu/rotary_xpu.cpp CHANGED Viewed

@@ -38,3 +38,41 @@ void _apply_rotary(torch::Tensor const &x1, torch::Tensor const &x2,
         });
     }
 }

         });
     }
 }
+std::tuple<torch::Tensor, torch::Tensor> apply_rotary_pos_emb(
+    torch::Tensor const &q, torch::Tensor const &k,
+    torch::Tensor const &cos, torch::Tensor const &sin,
+    torch::Tensor const &position_ids, int64_t unsqueeze_dim) {
+    // Handle unsqueeze_dim parameter
+    auto cos_unsqueezed = cos.unsqueeze(unsqueeze_dim);
+    auto sin_unsqueezed = sin.unsqueeze(unsqueeze_dim);
+    // Clone inputs since we'll modify them
+    auto q_rotated = q.clone();
+    auto k_rotated = k.clone();
+    // Get half dimension for rotation
+    int64_t half_dim = q.size(-1) / 2;
+    // Split Q and K for rotation
+    auto q1 = q_rotated.slice(-1, 0, half_dim);
+    auto q2 = q_rotated.slice(-1, half_dim, q.size(-1));
+    auto k1 = k_rotated.slice(-1, 0, half_dim);
+    auto k2 = k_rotated.slice(-1, half_dim, k.size(-1));
+    // Make sure cos/sin match the half dimension
+    auto cos_final = cos_unsqueezed;
+    auto sin_final = sin_unsqueezed;
+    if (cos_unsqueezed.size(-1) != half_dim) {
+        // Trim cos/sin to match half_dim
+        cos_final = cos_unsqueezed.slice(-1, 0, half_dim);
+        sin_final = sin_unsqueezed.slice(-1, 0, half_dim);
+    }
+    // Apply rotary embedding using our kernel
+    _apply_rotary(q1, q2, cos_final, sin_final, q1, q2, false);
+    _apply_rotary(k1, k2, cos_final, sin_final, k1, k2, false);
+    return std::make_tuple(q_rotated, k_rotated);
+}

rotary/rotary_cuda.cu CHANGED Viewed

@@ -43,3 +43,41 @@ void _apply_rotary(torch::Tensor const &x1, torch::Tensor const &x2,
         });
     }
 }

         });
     }
 }
+std::tuple<torch::Tensor, torch::Tensor> apply_rotary_pos_emb(
+    torch::Tensor const &q, torch::Tensor const &k,
+    torch::Tensor const &cos, torch::Tensor const &sin,
+    torch::Tensor const &position_ids, int64_t unsqueeze_dim) {
+    // Handle unsqueeze_dim parameter
+    auto cos_unsqueezed = cos.unsqueeze(unsqueeze_dim);
+    auto sin_unsqueezed = sin.unsqueeze(unsqueeze_dim);
+    // Clone inputs since we'll modify them
+    auto q_rotated = q.clone();
+    auto k_rotated = k.clone();
+    // Get half dimension for rotation
+    int64_t half_dim = q.size(-1) / 2;
+    // Split Q and K for rotation
+    auto q1 = q_rotated.slice(-1, 0, half_dim);
+    auto q2 = q_rotated.slice(-1, half_dim, q.size(-1));
+    auto k1 = k_rotated.slice(-1, 0, half_dim);
+    auto k2 = k_rotated.slice(-1, half_dim, k.size(-1));
+    // Make sure cos/sin match the half dimension
+    auto cos_final = cos_unsqueezed;
+    auto sin_final = sin_unsqueezed;
+    if (cos_unsqueezed.size(-1) != half_dim) {
+        // Trim cos/sin to match half_dim
+        cos_final = cos_unsqueezed.slice(-1, 0, half_dim);
+        sin_final = sin_unsqueezed.slice(-1, 0, half_dim);
+    }
+    // Apply rotary embedding using our kernel
+    _apply_rotary(q1, q2, cos_final, sin_final, q1, q2, false);
+    _apply_rotary(k1, k2, cos_final, sin_final, k1, k2, false);
+    return std::make_tuple(q_rotated, k_rotated);
+}