Ritvik19
/

sentinet-v1

@@ -1,81 +0,0 @@
-import cleantext
-import joblib
-import os
-class PreTrainedPipeline():
-    def __init__(self, path) -> None:
-        self.models = self.load_models(path)
-    def load_models(self, path) -> dict:
-        models = {}
-        for class_name in [
-            "sentiment_polarity",
-            "opinion",
-            "toxicity",
-            "toxicity__hate",
-            "toxicity__insult",
-            "toxicity__obscene",
-            "toxicity__sexual_explicit",
-            "toxicity__threat",
-            "emotion__no_emotion",
-            "emotion__anger",
-            "emotion__disgust",
-            "emotion__fear",
-            "emotion__guilt",
-            "emotion__humour",
-            "emotion__joy",
-            "emotion__sadness",
-            "emotion__shame",
-            "emotion__surprise",
-        ]:
-            models[class_name] = joblib.load(
-                os.path.join(path, f"{class_name}.bin")
-            )
-        return models
-    def clean_text(self, text) -> str:
-        return cleantext.clean(
-            text,
-            fix_unicode=True,  # fix various unicode errors
-            to_ascii=True,  # transliterate to closest ASCII representation
-            lower=True,  # lowercase text
-            no_line_breaks=False,  # fully strip line breaks as opposed to only normalizing them
-            no_urls=False,  # replace all URLs with a special token
-            no_emails=False,  # replace all email addresses with a special token
-            no_phone_numbers=False,  # replace all phone numbers with a special token
-            no_numbers=False,  # replace all numbers with a special token
-            no_digits=False,  # replace all digits with a special token
-            no_currency_symbols=False,  # replace all currency symbols with a special token
-            no_punct=False,  # remove punctuations
-            replace_with_punct="",  # instead of removing punctuations you may replace them
-            replace_with_url="<URL>",
-            replace_with_email="<EMAIL>",
-            replace_with_phone_number="<PHONE>",
-            replace_with_number="<NUMBER>",
-            replace_with_digit="0",
-            replace_with_currency_symbol="<CUR>",
-            lang="en",  # set to 'de' for German special handling
-        )
-    def get_prediction(self, text, model, scale_min=0, scale_max=100) -> int:
-        return round(model.predict_proba([self.clean_text(text)])[0][1] * (scale_max-scale_min) + scale_min, 2)
-    def call(self, text):
-        result = {}
-        result["sentiment_polarity"] = self.get_prediction(text, self.models["sentiment_polarity"], scale_min=-100, scale_max=100)
-        result["opinion"] = self.get_prediction(text, self.models["opinion"])
-        result["toxicity"] = {
-            class_name: self.get_prediction(text, model)
-            for class_name, model in self.models.items()
-            if class_name.startswith("toxicity")
-        }
-        result["emotion"] = {
-            class_name: self.get_prediction(text, model)
-            for class_name, model in self.models.items()
-            if class_name.startswith("emotion")
-        }
-        return result
-    def __call__(self, texts) -> dict:
-        return [self.call(text) for text in texts]

requirements.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- clean-text