Patrick Haller
commited on
Commit
·
cd8c17d
1
Parent(s):
00c609b
Adding SequenceClassification model/head to impl
Browse files- README.md +6 -1
- config.json +1 -0
- modeling_hf_alibaba_nlp_gte.py +61 -5
README.md
CHANGED
@@ -9044,6 +9044,11 @@ model-index:
|
|
9044 |
task:
|
9045 |
type: PairClassification
|
9046 |
---
|
|
|
|
|
|
|
|
|
|
|
9047 |
<h1 align="center">Snowflake's Arctic-embed-m-v2.0</h1>
|
9048 |
<h4 align="center">
|
9049 |
<p>
|
@@ -9213,4 +9218,4 @@ You also can email Daniel Campos([email protected]).
|
|
9213 |
|
9214 |
|
9215 |
## License
|
9216 |
-
Arctic is licensed under the [Apache-2](https://www.apache.org/licenses/LICENSE-2.0). The released models can be used for commercial purposes free of charge.
|
|
|
9044 |
task:
|
9045 |
type: PairClassification
|
9046 |
---
|
9047 |
+
|
9048 |
+
---
|
9049 |
+
> [!IMPORTANT]
|
9050 |
+
> This is a fork of the original [snowflake-arctic-embed-m-v2.0](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0) model, which was released under the Apache 2.0 license.
|
9051 |
+
---
|
9052 |
<h1 align="center">Snowflake's Arctic-embed-m-v2.0</h1>
|
9053 |
<h4 align="center">
|
9054 |
<p>
|
|
|
9218 |
|
9219 |
|
9220 |
## License
|
9221 |
+
Arctic is licensed under the [Apache-2](https://www.apache.org/licenses/LICENSE-2.0). The released models can be used for commercial purposes free of charge.
|
config.json
CHANGED
@@ -6,6 +6,7 @@
|
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_hf_alibaba_nlp_gte.GteConfig",
|
8 |
"AutoModel": "modeling_hf_alibaba_nlp_gte.GteModel"
|
|
|
9 |
},
|
10 |
"classifier_dropout": 0.1,
|
11 |
"hidden_act": "gelu",
|
|
|
6 |
"auto_map": {
|
7 |
"AutoConfig": "configuration_hf_alibaba_nlp_gte.GteConfig",
|
8 |
"AutoModel": "modeling_hf_alibaba_nlp_gte.GteModel"
|
9 |
+
"AutoModelForSequenceClassification": "modeling_hf_alibaba_nlp_gte.GteForSequenceClassification"
|
10 |
},
|
11 |
"classifier_dropout": 0.1,
|
12 |
"hidden_act": "gelu",
|
modeling_hf_alibaba_nlp_gte.py
CHANGED
@@ -26,11 +26,7 @@ from transformers.activations import ACT2FN
|
|
26 |
from transformers.modeling_outputs import (
|
27 |
BaseModelOutput,
|
28 |
BaseModelOutputWithPooling,
|
29 |
-
|
30 |
-
MultipleChoiceModelOutput,
|
31 |
-
QuestionAnsweringModelOutput,
|
32 |
-
SequenceClassifierOutput,
|
33 |
-
ModelOutput,
|
34 |
)
|
35 |
from transformers.modeling_utils import PreTrainedModel
|
36 |
from transformers.utils import logging
|
@@ -965,3 +961,63 @@ class GteModel(GtePreTrainedModel):
|
|
965 |
hidden_states=encoder_outputs.hidden_states,
|
966 |
attentions=encoder_outputs.attentions,
|
967 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
from transformers.modeling_outputs import (
|
27 |
BaseModelOutput,
|
28 |
BaseModelOutputWithPooling,
|
29 |
+
SequenceClassifierOutputWithPast,
|
|
|
|
|
|
|
|
|
30 |
)
|
31 |
from transformers.modeling_utils import PreTrainedModel
|
32 |
from transformers.utils import logging
|
|
|
961 |
hidden_states=encoder_outputs.hidden_states,
|
962 |
attentions=encoder_outputs.attentions,
|
963 |
)
|
964 |
+
|
965 |
+
|
966 |
+
class GteForSequenceClassification(GtePreTrainedModel):
|
967 |
+
|
968 |
+
def __init__(self, config: GteConfig):
|
969 |
+
|
970 |
+
self.config = config
|
971 |
+
self.num_labels = 1
|
972 |
+
self.model = GteModel(config, add_pooling_layer=True)
|
973 |
+
|
974 |
+
self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
|
975 |
+
self.loss_function = nn.MSELoss()
|
976 |
+
|
977 |
+
def get_input_embeddings(self):
|
978 |
+
return self.model.embed_tokens
|
979 |
+
|
980 |
+
def set_input_embeddings(self, value):
|
981 |
+
self.model.embed_tokens = value
|
982 |
+
|
983 |
+
def forward(
|
984 |
+
self,
|
985 |
+
input_ids: Optional[torch.LongTensor] = None,
|
986 |
+
attention_mask: Optional[torch.Tensor] = None,
|
987 |
+
position_ids: Optional[torch.LongTensor] = None,
|
988 |
+
past_key_values = None,
|
989 |
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
990 |
+
labels: Optional[torch.LongTensor] = None,
|
991 |
+
use_cache: Optional[bool] = None,
|
992 |
+
output_attentions: Optional[bool] = None,
|
993 |
+
output_hidden_states: Optional[bool] = None,
|
994 |
+
) -> SequenceClassifierOutputWithPast:
|
995 |
+
r"""
|
996 |
+
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
|
997 |
+
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
|
998 |
+
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
|
999 |
+
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
|
1000 |
+
"""
|
1001 |
+
|
1002 |
+
transformer_outputs = self.model(
|
1003 |
+
input_ids,
|
1004 |
+
attention_mask=attention_mask,
|
1005 |
+
position_ids=position_ids,
|
1006 |
+
inputs_embeds=inputs_embeds,
|
1007 |
+
output_attentions=output_attentions,
|
1008 |
+
output_hidden_states=output_hidden_states,
|
1009 |
+
)
|
1010 |
+
hidden_states = transformer_outputs.pooler_output
|
1011 |
+
|
1012 |
+
logits = self.score(hidden_states)
|
1013 |
+
|
1014 |
+
loss = None
|
1015 |
+
if labels is not None:
|
1016 |
+
loss = self.loss_function(logits.squeeze(-1), labels.squeeze(-1))
|
1017 |
+
|
1018 |
+
return SequenceClassifierOutputWithPast(
|
1019 |
+
loss=loss,
|
1020 |
+
logits=logits,
|
1021 |
+
hidden_states=transformer_outputs.hidden_states,
|
1022 |
+
attentions=transformer_outputs.attentions,
|
1023 |
+
)
|