Replace whole-word `LossKwargs` with `TransformersKwargs` in modeling*.py
Browse files- modeling_opensci.py +2 -2
modeling_opensci.py
CHANGED
@@ -40,7 +40,7 @@ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
|
|
40 |
from transformers.processing_utils import Unpack
|
41 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
42 |
from transformers.utils import (
|
43 |
-
|
44 |
add_code_sample_docstrings,
|
45 |
add_start_docstrings,
|
46 |
add_start_docstrings_to_model_forward,
|
@@ -750,7 +750,7 @@ class OpensciModel(OpensciPreTrainedModel):
|
|
750 |
return causal_mask
|
751 |
|
752 |
|
753 |
-
class KwargsForCausalLM(FlashAttentionKwargs,
|
754 |
|
755 |
|
756 |
class OpensciForCausalLM(OpensciPreTrainedModel, GenerationMixin):
|
|
|
40 |
from transformers.processing_utils import Unpack
|
41 |
from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
|
42 |
from transformers.utils import (
|
43 |
+
TransformersKwargs,
|
44 |
add_code_sample_docstrings,
|
45 |
add_start_docstrings,
|
46 |
add_start_docstrings_to_model_forward,
|
|
|
750 |
return causal_mask
|
751 |
|
752 |
|
753 |
+
class KwargsForCausalLM(FlashAttentionKwargs, TransformersKwargs): ...
|
754 |
|
755 |
|
756 |
class OpensciForCausalLM(OpensciPreTrainedModel, GenerationMixin):
|