Szymon Tworkowski
commited on
Commit
·
1a5fcb5
1
Parent(s):
b65129a
remove warning
Browse files- modeling_longllama.py +0 -3
modeling_longllama.py
CHANGED
|
@@ -1027,9 +1027,6 @@ def _handle_long_input(
|
|
| 1027 |
attn_length += past_key_values[0][0].shape[-2]
|
| 1028 |
attention_mask = attention_mask[..., -attn_length:] if attention_mask is not None else None
|
| 1029 |
|
| 1030 |
-
if past_key_values is not None and past_key_values[0][0].shape[-2] + remaining_input_length > context_window_length:
|
| 1031 |
-
logger.warning("Currently, the code is not optimized for generating long outputs. "
|
| 1032 |
-
"You see this warning as parts of the local (generation) cache are going to be moved to the memory cache.")
|
| 1033 |
outputs = model(
|
| 1034 |
input_ids=input_ids[..., beg:] if input_ids is not None else None,
|
| 1035 |
attention_mask=attention_mask,
|
|
|
|
| 1027 |
attn_length += past_key_values[0][0].shape[-2]
|
| 1028 |
attention_mask = attention_mask[..., -attn_length:] if attention_mask is not None else None
|
| 1029 |
|
|
|
|
|
|
|
|
|
|
| 1030 |
outputs = model(
|
| 1031 |
input_ids=input_ids[..., beg:] if input_ids is not None else None,
|
| 1032 |
attention_mask=attention_mask,
|