jinaai
/

xlm-roberta-flash-implementation

🇪🇺 Region: EU

Model card Files Files and versions

jupyterjazz commited on Aug 6, 2024

Commit

3eceb33

·

verified ·

1 Parent(s): e860caa

Update modeling_xlm_roberta.py

Files changed (1) hide show

modeling_xlm_roberta.py +4 -6

modeling_xlm_roberta.py CHANGED Viewed

@@ -210,12 +210,10 @@ class XLMRobertaEncoder(nn.Module):
         subset_mask: (batch, seqlen), dtype=torch.bool
         """
         if key_padding_mask is None or not self.use_flash_attn:
-            mixer_kwargs = (
-                {"key_padding_mask": key_padding_mask.bool()}
-                if key_padding_mask is not None
-                else None
-            )
-            mixer_kwargs['task_type'] = task_type
             for layer in self.layers:
                 if self._grad_checkpointing:
                     hidden_states = torch.utils.checkpoint.checkpoint(

         subset_mask: (batch, seqlen), dtype=torch.bool
         """
         if key_padding_mask is None or not self.use_flash_attn:
+            mixer_kwargs = {'task_type': task_type}
+            if key_padding_mask is not None:
+                mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
             for layer in self.layers:
                 if self._grad_checkpointing:
                     hidden_states = torch.utils.checkpoint.checkpoint(