yayayaaa commited on
Commit
ef193e5
·
verified ·
1 Parent(s): 9745a44

Update modeling_florence2.py

Browse files

Implement changes from https://huggingface.co/microsoft/Florence-2-large-ft/discussions/38/files

Files changed (1) hide show
  1. modeling_florence2.py +23 -25
modeling_florence2.py CHANGED
@@ -26,9 +26,10 @@ import torch.utils.checkpoint as checkpoint
26
  from torch.nn import CrossEntropyLoss
27
  from collections import OrderedDict
28
  from einops import rearrange
29
- from timm.models.layers import DropPath, trunc_normal_
30
 
31
  from transformers.modeling_utils import PreTrainedModel
 
32
  from transformers.utils import (
33
  ModelOutput,
34
  add_start_docstrings,
@@ -609,29 +610,10 @@ class DaViT(nn.Module):
609
  self.avgpool = nn.AdaptiveAvgPool1d(1)
610
  self.head = nn.Linear(self.embed_dims[-1], num_classes) if num_classes > 0 else nn.Identity()
611
 
612
- self.apply(self._init_weights)
613
-
614
  @property
615
  def dim_out(self):
616
  return self.embed_dims[-1]
617
 
618
- def _init_weights(self, m):
619
- if isinstance(m, nn.Linear):
620
- trunc_normal_(m.weight, std=0.02)
621
- if m.bias is not None:
622
- nn.init.constant_(m.bias, 0)
623
- elif isinstance(m, nn.Conv2d):
624
- nn.init.normal_(m.weight, std=0.02)
625
- for name, _ in m.named_parameters():
626
- if name in ['bias']:
627
- nn.init.constant_(m.bias, 0)
628
- elif isinstance(m, nn.LayerNorm):
629
- nn.init.constant_(m.weight, 1.0)
630
- nn.init.constant_(m.bias, 0)
631
- elif isinstance(m, nn.BatchNorm2d):
632
- nn.init.constant_(m.weight, 1.0)
633
- nn.init.constant_(m.bias, 0)
634
-
635
  def forward_features_unpool(self, x):
636
  """
637
  forward until avg pooling
@@ -1450,7 +1432,18 @@ class Florence2LanguagePreTrainedModel(PreTrainedModel):
1450
  module.weight.data.normal_(mean=0.0, std=std)
1451
  if module.padding_idx is not None:
1452
  module.weight.data[module.padding_idx].zero_()
1453
-
 
 
 
 
 
 
 
 
 
 
 
1454
  @property
1455
  def dummy_inputs(self):
1456
  pad_token = self.config.pad_token_id
@@ -2059,7 +2052,7 @@ class Florence2LanguageModel(Florence2LanguagePreTrainedModel):
2059
  )
2060
 
2061
 
2062
- class Florence2LanguageForConditionalGeneration(Florence2LanguagePreTrainedModel):
2063
  base_model_prefix = "model"
2064
  _tied_weights_keys = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight", "lm_head.weight"]
2065
  _keys_to_ignore_on_load_missing = ["final_logits_bias"]
@@ -2072,7 +2065,13 @@ class Florence2LanguageForConditionalGeneration(Florence2LanguagePreTrainedModel
2072
 
2073
  # Initialize weights and apply final processing
2074
  self.post_init()
2075
-
 
 
 
 
 
 
2076
  def get_encoder(self):
2077
  return self.model.get_encoder()
2078
 
@@ -2530,6 +2529,7 @@ class Florence2VisionModelWithProjection(Florence2PreTrainedModel):
2530
  FLORENCE2_START_DOCSTRING,
2531
  )
2532
  class Florence2ForConditionalGeneration(Florence2PreTrainedModel):
 
2533
  _no_split_modules = []
2534
 
2535
  def __init__(self, config: Florence2Config):
@@ -2546,8 +2546,6 @@ class Florence2ForConditionalGeneration(Florence2PreTrainedModel):
2546
 
2547
  language_model = Florence2LanguageForConditionalGeneration(config=config.text_config)
2548
 
2549
- if language_model._tied_weights_keys is not None:
2550
- self._tied_weights_keys = [f"language_model.{k}" for k in language_model._tied_weights_keys]
2551
  self.language_model = language_model
2552
 
2553
  self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1
 
26
  from torch.nn import CrossEntropyLoss
27
  from collections import OrderedDict
28
  from einops import rearrange
29
+ from timm.layers import DropPath, trunc_normal_
30
 
31
  from transformers.modeling_utils import PreTrainedModel
32
+ from transformers.generation.utils import GenerationMixin
33
  from transformers.utils import (
34
  ModelOutput,
35
  add_start_docstrings,
 
610
  self.avgpool = nn.AdaptiveAvgPool1d(1)
611
  self.head = nn.Linear(self.embed_dims[-1], num_classes) if num_classes > 0 else nn.Identity()
612
 
 
 
613
  @property
614
  def dim_out(self):
615
  return self.embed_dims[-1]
616
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  def forward_features_unpool(self, x):
618
  """
619
  forward until avg pooling
 
1432
  module.weight.data.normal_(mean=0.0, std=std)
1433
  if module.padding_idx is not None:
1434
  module.weight.data[module.padding_idx].zero_()
1435
+ elif isinstance(module, nn.Conv2d):
1436
+ nn.init.normal_(module.weight, std=0.02)
1437
+ for name, _ in module.named_parameters():
1438
+ if name == "bias":
1439
+ nn.init.constant_(module.bias, 0)
1440
+ elif isinstance(module, nn.LayerNorm):
1441
+ nn.init.constant_(module.weight, 1.0)
1442
+ nn.init.constant_(module.bias, 0)
1443
+ elif isinstance(module, nn.BatchNorm2d):
1444
+ nn.init.constant_(module.weight, 1.0)
1445
+ nn.init.constant_(module.bias, 0)
1446
+
1447
  @property
1448
  def dummy_inputs(self):
1449
  pad_token = self.config.pad_token_id
 
2052
  )
2053
 
2054
 
2055
+ class Florence2LanguageForConditionalGeneration(Florence2LanguagePreTrainedModel, GenerationMixin):
2056
  base_model_prefix = "model"
2057
  _tied_weights_keys = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight", "lm_head.weight"]
2058
  _keys_to_ignore_on_load_missing = ["final_logits_bias"]
 
2065
 
2066
  # Initialize weights and apply final processing
2067
  self.post_init()
2068
+
2069
+ def _tie_weights(self):
2070
+ if self.config.tie_word_embeddings:
2071
+ self._tie_or_clone_weights(self.model.encoder.embed_tokens, self.model.shared)
2072
+ self._tie_or_clone_weights(self.model.decoder.embed_tokens, self.model.shared)
2073
+ self._tie_or_clone_weights(self.lm_head, self.model.shared)
2074
+
2075
  def get_encoder(self):
2076
  return self.model.get_encoder()
2077
 
 
2529
  FLORENCE2_START_DOCSTRING,
2530
  )
2531
  class Florence2ForConditionalGeneration(Florence2PreTrainedModel):
2532
+ _tied_weights_keys = ["language_model.encoder.embed_tokens.weight", "language_model.decoder.embed_tokens.weight", "language_model.lm_head.weight"]
2533
  _no_split_modules = []
2534
 
2535
  def __init__(self, config: Florence2Config):
 
2546
 
2547
  language_model = Florence2LanguageForConditionalGeneration(config=config.text_config)
2548
 
 
 
2549
  self.language_model = language_model
2550
 
2551
  self.pad_token_id = self.config.pad_token_id if self.config.pad_token_id is not None else -1