Alwahsh commited on
Commit
ad8c078
·
verified ·
1 Parent(s): 64e7fbf

Enable the predictor masking by default

Browse files
Files changed (1) hide show
  1. modeling_llama_butler.py +1 -1
modeling_llama_butler.py CHANGED
@@ -918,7 +918,7 @@ class LlamaAttentionExperimental(nn.Module):
918
  self.num_key_value_groups = self.num_heads // self.num_key_value_heads
919
  self.max_position_embeddings = config.max_position_embeddings
920
  self.rope_theta = config.rope_theta
921
- self.inference_mode = False
922
  self.producer = producer
923
  self.layer_idx = layer_idx
924
  self.token_sparse_method = None
 
918
  self.num_key_value_groups = self.num_heads // self.num_key_value_heads
919
  self.max_position_embeddings = config.max_position_embeddings
920
  self.rope_theta = config.rope_theta
921
+ self.inference_mode = True
922
  self.producer = producer
923
  self.layer_idx = layer_idx
924
  self.token_sparse_method = None