emanuelaboros
/

lang-detect

Token Classification

language-identification

Model card Files Files and versions

emanuelaboros commited on Feb 27

Commit

ce6d631

·

1 Parent(s): 770b039

testin the trick

Files changed (2) hide show

lang_detect.py +2 -2
modeling_stacked.py +14 -16

lang_detect.py CHANGED Viewed

@@ -16,8 +16,8 @@ class MultitaskTokenClassificationPipeline(Pipeline):
     def _forward(self, text):
         print(f"Do we arrive here? {text}")
         print(f"Let's check the model: {self.model.get_floret_model()}")
-        predictions, probabilities = self.model.get_floret_model().predict([text], k=1)
         return text
     def postprocess(self, text, **kwargs):

     def _forward(self, text):
         print(f"Do we arrive here? {text}")
         print(f"Let's check the model: {self.model.get_floret_model()}")
+        # predictions, probabilities = self.model.get_floret_model().predict([text], k=1)
+        self.model(text)
         return text
     def postprocess(self, text, **kwargs):

modeling_stacked.py CHANGED Viewed

@@ -42,22 +42,20 @@ class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
     def forward(self, input_ids, attention_mask=None, **kwargs):
         # Convert input_ids to strings using tokenizer
         print(f"Check if it arrives here: {input_ids}")
-        if input_ids is not None:
-            tokenizer = kwargs.get("tokenizer")
-            texts = tokenizer.batch_decode(input_ids, skip_special_tokens=True)
-        else:
-            texts = kwargs.get("text", None)
-        if texts:
-            # Floret expects strings, not tensors
-            predictions = [self.model_floret(text) for text in texts]
-            # Convert predictions to tensors for Hugging Face compatibility
-            return torch.tensor(predictions)
-        else:
-            # If no text is found, return dummy output
-            return torch.zeros(
-                (1, 2)
-            )  # Dummy tensor with shape (batch_size, num_classes)
     def state_dict(self, *args, **kwargs):
         # Return an empty state dictionary

     def forward(self, input_ids, attention_mask=None, **kwargs):
         # Convert input_ids to strings using tokenizer
         print(f"Check if it arrives here: {input_ids}")
+        # if input_ids is not None:
+        #     tokenizer = kwargs.get("tokenizer")
+        #     texts = tokenizer.batch_decode(input_ids, skip_special_tokens=True)
+        # else:
+        #     texts = kwargs.get("text", None)
+        #
+        # if texts:
+        #     # Floret expects strings, not tensors
+        #     predictions = [self.model_floret(text) for text in texts]
+        #     # Convert predictions to tensors for Hugging Face compatibility
+        #     return torch.tensor(predictions)
+        # else:
+        # If no text is found, return dummy output
+        return torch.zeros((1, 2))  # Dummy tensor with shape (batch_size, num_classes)
     def state_dict(self, *args, **kwargs):
         # Return an empty state dictionary