Enable the predictor masking by default
#5
by
Alwahsh
- opened
- modeling_llama_butler.py +1 -1
modeling_llama_butler.py
CHANGED
@@ -918,7 +918,7 @@ class LlamaAttentionExperimental(nn.Module):
|
|
918 |
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
|
919 |
self.max_position_embeddings = config.max_position_embeddings
|
920 |
self.rope_theta = config.rope_theta
|
921 |
-
self.inference_mode =
|
922 |
self.producer = producer
|
923 |
self.layer_idx = layer_idx
|
924 |
self.token_sparse_method = None
|
|
|
918 |
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
|
919 |
self.max_position_embeddings = config.max_position_embeddings
|
920 |
self.rope_theta = config.rope_theta
|
921 |
+
self.inference_mode = True
|
922 |
self.producer = producer
|
923 |
self.layer_idx = layer_idx
|
924 |
self.token_sparse_method = None
|