Motif-Technologies
/

Motif-2.6B

Text Generation

text-generation-inference

Model card Files Files and versions

leejunhyeok commited on 22 days ago

Commit

9b40539

·

verified ·

1 Parent(s): 607612f

Update modeling_motif.py

Files changed (1) hide show

modeling_motif.py +4 -26

modeling_motif.py CHANGED Viewed

@@ -261,35 +261,13 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
         sin (torch.Tensor): Sine values for rotary embedding.
         unsqueeze_dim (int, optional): Dimension along which `cos` and `sin` are unsqueezed.
             Defaults to 1.
-        fused_rope (bool, optional): If True, applies fused rotary embeddings using
-            `moreh_ops.apply_rotary_emb`. If False, computes rotary embeddings manually.
-            Defaults to False.
     Returns:
         Tuple[torch.Tensor, torch.Tensor]: Returns transformed query and key tensors after applying rotary embeddings.
     """
-    '''
-    # (B, NH, S, D_KV) -> (B, S, NH, D_KV)
-    cos = cos.unsqueeze(unsqueeze_dim)
-    sin = sin.unsqueeze(unsqueeze_dim)
-    q_embed = (q * cos) + (rotate_half(q) * sin)
-    k_embed = (k * cos) + (rotate_half(k) * sin)
-    '''
-    q = q.transpose(1, 2)
-    k = k.transpose(1, 2)
-    # Expand 'batch' dim
-    cos = cos.expand(q.shape[0], *cos.shape[1:])
-    sin = sin.expand(q.shape[0], *sin.shape[1:])
-    q_embed = moreh_ops.apply_rotary_emb(q, cos, sin, opcode=1)
-    k_embed = moreh_ops.apply_rotary_emb(k, cos, sin, opcode=1)
-    # (B, S, NH, D_KV) -> (B, NH, S, D_KV)
-    q_embed = q_embed.transpose(1, 2)
-    k_embed = k_embed.transpose(1, 2)
-    return q_embed, k_embed
 class MotifMLP(nn.Module):

         sin (torch.Tensor): Sine values for rotary embedding.
         unsqueeze_dim (int, optional): Dimension along which `cos` and `sin` are unsqueezed.
             Defaults to 1.
     Returns:
         Tuple[torch.Tensor, torch.Tensor]: Returns transformed query and key tensors after applying rotary embeddings.
     """
+    device = q.device
+    return map(
+        lambda x: (x * cos[position_ids].unsqueeze(unsqueeze_dim).to(device)) +
+        (rotate_half(x) * sin[position_ids].unsqueeze(unsqueeze_dim).to(device)), (q, k))
 class MotifMLP(nn.Module):