Update modeling_gemmoe.py
Browse files- modeling_gemmoe.py +4 -4
modeling_gemmoe.py
CHANGED
|
@@ -705,8 +705,10 @@ class GemmoeDecoderLayer(nn.Module):
|
|
| 705 |
output_attentions: Optional[bool] = False,
|
| 706 |
output_router_logits: Optional[bool] = False,
|
| 707 |
use_cache: Optional[bool] = False,
|
|
|
|
| 708 |
**kwargs,
|
| 709 |
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
|
|
|
| 710 |
if "padding_mask" in kwargs:
|
| 711 |
warnings.warn(
|
| 712 |
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
|
|
@@ -944,7 +946,6 @@ class GemmoeModel(GemmoePreTrainedModel):
|
|
| 944 |
self.embed_tokens = value
|
| 945 |
|
| 946 |
@add_start_docstrings_to_model_forward(GEMMOE_INPUTS_DOCSTRING)
|
| 947 |
-
# Ignore copy
|
| 948 |
def forward(
|
| 949 |
self,
|
| 950 |
input_ids: torch.LongTensor = None,
|
|
@@ -1215,9 +1216,8 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
|
|
| 1215 |
```python
|
| 1216 |
>>> from transformers import AutoTokenizer, GemmoeForCausalLM
|
| 1217 |
|
| 1218 |
-
>>> model
|
| 1219 |
>>> tokenizer = AutoTokenizer.from_pretrained("google/GEMMA-7b")
|
| 1220 |
-
|
| 1221 |
>>> prompt = "What is your favorite condiment?"
|
| 1222 |
>>> inputs = tokenizer(prompt, return_tensors="pt")
|
| 1223 |
|
|
@@ -1382,7 +1382,6 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
|
|
| 1382 |
)
|
| 1383 |
return reordered_past
|
| 1384 |
|
| 1385 |
-
|
| 1386 |
@add_start_docstrings(
|
| 1387 |
"""
|
| 1388 |
The Gemmoe Model transformer with a sequence classification head on top (linear layer).
|
|
@@ -1447,6 +1446,7 @@ class GemmoeForSequenceClassification(GemmoePreTrainedModel):
|
|
| 1447 |
output_attentions=output_attentions,
|
| 1448 |
output_hidden_states=output_hidden_states,
|
| 1449 |
return_dict=return_dict,
|
|
|
|
| 1450 |
)
|
| 1451 |
hidden_states = transformer_outputs[0]
|
| 1452 |
logits = self.score(hidden_states)
|
|
|
|
| 705 |
output_attentions: Optional[bool] = False,
|
| 706 |
output_router_logits: Optional[bool] = False,
|
| 707 |
use_cache: Optional[bool] = False,
|
| 708 |
+
cache_position: Optional[torch.LongTensor] = None,
|
| 709 |
**kwargs,
|
| 710 |
) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
|
| 711 |
+
# ... (rest of the code remains the same)
|
| 712 |
if "padding_mask" in kwargs:
|
| 713 |
warnings.warn(
|
| 714 |
"Passing `padding_mask` is deprecated and will be removed in v4.37. Please make sure use `attention_mask` instead.`"
|
|
|
|
| 946 |
self.embed_tokens = value
|
| 947 |
|
| 948 |
@add_start_docstrings_to_model_forward(GEMMOE_INPUTS_DOCSTRING)
|
|
|
|
| 949 |
def forward(
|
| 950 |
self,
|
| 951 |
input_ids: torch.LongTensor = None,
|
|
|
|
| 1216 |
```python
|
| 1217 |
>>> from transformers import AutoTokenizer, GemmoeForCausalLM
|
| 1218 |
|
| 1219 |
+
>>> model= GemmoeForCausalLM.from_pretrained("google/GEMMA-7b")
|
| 1220 |
>>> tokenizer = AutoTokenizer.from_pretrained("google/GEMMA-7b")
|
|
|
|
| 1221 |
>>> prompt = "What is your favorite condiment?"
|
| 1222 |
>>> inputs = tokenizer(prompt, return_tensors="pt")
|
| 1223 |
|
|
|
|
| 1382 |
)
|
| 1383 |
return reordered_past
|
| 1384 |
|
|
|
|
| 1385 |
@add_start_docstrings(
|
| 1386 |
"""
|
| 1387 |
The Gemmoe Model transformer with a sequence classification head on top (linear layer).
|
|
|
|
| 1446 |
output_attentions=output_attentions,
|
| 1447 |
output_hidden_states=output_hidden_states,
|
| 1448 |
return_dict=return_dict,
|
| 1449 |
+
cache_position=None,
|
| 1450 |
)
|
| 1451 |
hidden_states = transformer_outputs[0]
|
| 1452 |
logits = self.score(hidden_states)
|