eunhwanpark-motiftech commited on
Commit
95a3a69
·
verified ·
1 Parent(s): 097873e

Update modeling_motif.py

Browse files
Files changed (1) hide show
  1. modeling_motif.py +8 -16
modeling_motif.py CHANGED
@@ -1,36 +1,28 @@
1
  import math
 
2
  from typing import List, Optional, Tuple, Union
3
 
4
  import torch
 
5
  import torch.utils.checkpoint
6
  from torch import nn
7
  from torch.nn import CrossEntropyLoss
 
 
8
  from transformers.cache_utils import Cache, DynamicCache, SlidingWindowCache, StaticCache
9
  from transformers.generation import GenerationMixin
10
  from transformers.modeling_attn_mask_utils import AttentionMaskConverter
11
  from transformers.modeling_flash_attention_utils import _flash_attention_forward
12
- from transformers.modeling_outputs import (
13
- CausalLMOutputWithPast,
14
- ModelOutput,
15
- )
16
  from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
17
  from transformers.modeling_utils import PreTrainedModel
18
  from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
19
- from transformers.utils import (
20
- add_start_docstrings,
21
- add_start_docstrings_to_model_forward,
22
- is_flash_attn_greater_or_equal_2_10,
23
- is_flash_attn_2_available,
24
- logging,
25
- replace_return_docstrings,
26
- )
27
  from .configuration_motif import MotifConfig
28
- from dataclasses import dataclass
29
 
30
- import torch.nn.functional as F
31
 
32
- from transformers.activations import ACT2CLS as _ACT2CLS
33
- from transformers.activations import ClassInstantier
34
  class PolyNorm(torch.nn.Module):
35
  """
36
  A trainable activation function introduced in https://arxiv.org/html/2411.03884v1.
 
1
  import math
2
+ from dataclasses import dataclass
3
  from typing import List, Optional, Tuple, Union
4
 
5
  import torch
6
+ import torch.nn.functional as F
7
  import torch.utils.checkpoint
8
  from torch import nn
9
  from torch.nn import CrossEntropyLoss
10
+ from transformers.activations import ACT2CLS as _ACT2CLS
11
+ from transformers.activations import ClassInstantier
12
  from transformers.cache_utils import Cache, DynamicCache, SlidingWindowCache, StaticCache
13
  from transformers.generation import GenerationMixin
14
  from transformers.modeling_attn_mask_utils import AttentionMaskConverter
15
  from transformers.modeling_flash_attention_utils import _flash_attention_forward
16
+ from transformers.modeling_outputs import CausalLMOutputWithPast, ModelOutput
 
 
 
17
  from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
18
  from transformers.modeling_utils import PreTrainedModel
19
  from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
20
+ from transformers.utils import (add_start_docstrings, add_start_docstrings_to_model_forward, is_flash_attn_2_available,
21
+ is_flash_attn_greater_or_equal_2_10, logging, replace_return_docstrings)
22
+
 
 
 
 
 
23
  from .configuration_motif import MotifConfig
 
24
 
 
25
 
 
 
26
  class PolyNorm(torch.nn.Module):
27
  """
28
  A trainable activation function introduced in https://arxiv.org/html/2411.03884v1.