Patrick Haller commited on
Commit
cd8c17d
·
1 Parent(s): 00c609b

Adding SequenceClassification model/head to impl

Browse files
Files changed (3) hide show
  1. README.md +6 -1
  2. config.json +1 -0
  3. modeling_hf_alibaba_nlp_gte.py +61 -5
README.md CHANGED
@@ -9044,6 +9044,11 @@ model-index:
9044
  task:
9045
  type: PairClassification
9046
  ---
 
 
 
 
 
9047
  <h1 align="center">Snowflake's Arctic-embed-m-v2.0</h1>
9048
  <h4 align="center">
9049
  <p>
@@ -9213,4 +9218,4 @@ You also can email Daniel Campos([email protected]).
9213
 
9214
 
9215
  ## License
9216
- Arctic is licensed under the [Apache-2](https://www.apache.org/licenses/LICENSE-2.0). The released models can be used for commercial purposes free of charge.
 
9044
  task:
9045
  type: PairClassification
9046
  ---
9047
+
9048
+ ---
9049
+ > [!IMPORTANT]
9050
+ > This is a fork of the original [snowflake-arctic-embed-m-v2.0](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0) model, which was released under the Apache 2.0 license.
9051
+ ---
9052
  <h1 align="center">Snowflake's Arctic-embed-m-v2.0</h1>
9053
  <h4 align="center">
9054
  <p>
 
9218
 
9219
 
9220
  ## License
9221
+ Arctic is licensed under the [Apache-2](https://www.apache.org/licenses/LICENSE-2.0). The released models can be used for commercial purposes free of charge.
config.json CHANGED
@@ -6,6 +6,7 @@
6
  "auto_map": {
7
  "AutoConfig": "configuration_hf_alibaba_nlp_gte.GteConfig",
8
  "AutoModel": "modeling_hf_alibaba_nlp_gte.GteModel"
 
9
  },
10
  "classifier_dropout": 0.1,
11
  "hidden_act": "gelu",
 
6
  "auto_map": {
7
  "AutoConfig": "configuration_hf_alibaba_nlp_gte.GteConfig",
8
  "AutoModel": "modeling_hf_alibaba_nlp_gte.GteModel"
9
+ "AutoModelForSequenceClassification": "modeling_hf_alibaba_nlp_gte.GteForSequenceClassification"
10
  },
11
  "classifier_dropout": 0.1,
12
  "hidden_act": "gelu",
modeling_hf_alibaba_nlp_gte.py CHANGED
@@ -26,11 +26,7 @@ from transformers.activations import ACT2FN
26
  from transformers.modeling_outputs import (
27
  BaseModelOutput,
28
  BaseModelOutputWithPooling,
29
- MaskedLMOutput,
30
- MultipleChoiceModelOutput,
31
- QuestionAnsweringModelOutput,
32
- SequenceClassifierOutput,
33
- ModelOutput,
34
  )
35
  from transformers.modeling_utils import PreTrainedModel
36
  from transformers.utils import logging
@@ -965,3 +961,63 @@ class GteModel(GtePreTrainedModel):
965
  hidden_states=encoder_outputs.hidden_states,
966
  attentions=encoder_outputs.attentions,
967
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  from transformers.modeling_outputs import (
27
  BaseModelOutput,
28
  BaseModelOutputWithPooling,
29
+ SequenceClassifierOutputWithPast,
 
 
 
 
30
  )
31
  from transformers.modeling_utils import PreTrainedModel
32
  from transformers.utils import logging
 
961
  hidden_states=encoder_outputs.hidden_states,
962
  attentions=encoder_outputs.attentions,
963
  )
964
+
965
+
966
+ class GteForSequenceClassification(GtePreTrainedModel):
967
+
968
+ def __init__(self, config: GteConfig):
969
+
970
+ self.config = config
971
+ self.num_labels = 1
972
+ self.model = GteModel(config, add_pooling_layer=True)
973
+
974
+ self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
975
+ self.loss_function = nn.MSELoss()
976
+
977
+ def get_input_embeddings(self):
978
+ return self.model.embed_tokens
979
+
980
+ def set_input_embeddings(self, value):
981
+ self.model.embed_tokens = value
982
+
983
+ def forward(
984
+ self,
985
+ input_ids: Optional[torch.LongTensor] = None,
986
+ attention_mask: Optional[torch.Tensor] = None,
987
+ position_ids: Optional[torch.LongTensor] = None,
988
+ past_key_values = None,
989
+ inputs_embeds: Optional[torch.FloatTensor] = None,
990
+ labels: Optional[torch.LongTensor] = None,
991
+ use_cache: Optional[bool] = None,
992
+ output_attentions: Optional[bool] = None,
993
+ output_hidden_states: Optional[bool] = None,
994
+ ) -> SequenceClassifierOutputWithPast:
995
+ r"""
996
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
997
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
998
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
999
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1000
+ """
1001
+
1002
+ transformer_outputs = self.model(
1003
+ input_ids,
1004
+ attention_mask=attention_mask,
1005
+ position_ids=position_ids,
1006
+ inputs_embeds=inputs_embeds,
1007
+ output_attentions=output_attentions,
1008
+ output_hidden_states=output_hidden_states,
1009
+ )
1010
+ hidden_states = transformer_outputs.pooler_output
1011
+
1012
+ logits = self.score(hidden_states)
1013
+
1014
+ loss = None
1015
+ if labels is not None:
1016
+ loss = self.loss_function(logits.squeeze(-1), labels.squeeze(-1))
1017
+
1018
+ return SequenceClassifierOutputWithPast(
1019
+ loss=loss,
1020
+ logits=logits,
1021
+ hidden_states=transformer_outputs.hidden_states,
1022
+ attentions=transformer_outputs.attentions,
1023
+ )