from transformers import pipeline from sklearn.preprocessing import LabelEncoder import joblib import torch import os # Debugging: Print current directory and contents print("Current working directory:", os.getcwd()) print("Contents of the directory:", os.listdir()) # Load the label encoder label_encoder = joblib.load('/repository/label_encoder.pkl') # Use absolute path print("Label encoder loaded successfully.") # Load the model and tokenizer from Hugging Face model_name = "SCANSKY/distilbertTourism-multilingual-sentiment" sentiment_analyzer = pipeline( 'sentiment-analysis', model=model_name, tokenizer=model_name, device=0 if torch.cuda.is_available() else -1 # Use GPU if available ) def get_average_sentiment(positive_count, negative_count, neutral_count): total = positive_count + negative_count + neutral_count if total == 0: return "neutral" positive_pct = (positive_count / total) * 100 negative_pct = (negative_count / total) * 100 neutral_pct = (neutral_count / total) * 100 max_sentiment = max(positive_pct, negative_pct, neutral_pct) if max_sentiment == positive_pct: return "positive" elif max_sentiment == negative_pct: return "negative" else: return "neutral" class EndpointHandler: def __init__(self, model_dir=None): # Model and tokenizer are loaded globally, so no need to reinitialize here # The `model_dir` argument is required by Hugging Face's inference toolkit pass def preprocess(self, data): # Extract the input text from the request text = data.get("inputs", "") return text def inference(self, text): if not text.strip(): return {"error": "Please enter some text for sentiment analysis."} # Split text into lines lines = [line.strip() for line in text.split('\n') if line.strip()] if not lines: return {"error": "Please enter valid text for sentiment analysis."} # Analyze each line total_confidence = 0 positive_count = 0 negative_count = 0 neutral_count = 0 line_results = [] # Store results for each line for line in lines: result = sentiment_analyzer(line) predicted_label_encoded = int(result[0]['label'].split('_')[-1]) predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0] confidence = result[0]['score'] * 100 # Store line and its sentiment result line_results.append({ 'text': line, 'sentiment': predicted_label, 'confidence': confidence }) if predicted_label == 'positive': positive_count += 1 elif predicted_label == 'negative': negative_count += 1 else: neutral_count += 1 total_confidence += confidence # Calculate averages avg_confidence = total_confidence / len(lines) positive_pct = (positive_count / len(lines)) * 100 negative_pct = (negative_count / len(lines)) * 100 neutral_pct = (neutral_count / len(lines)) * 100 # Get average sentiment avg_sentiment = get_average_sentiment(positive_count, negative_count, neutral_count) # Prepare the output output = { "total_lines_analyzed": len(lines), "average_confidence": avg_confidence, "average_sentiment": avg_sentiment, "sentiment_distribution": { "positive": positive_pct, "negative": negative_pct, "neutral": neutral_pct }, "line_results": line_results } return output def postprocess(self, output): if "error" in output: return [{"error": output["error"]}] # Return only the line-level results as a list return output["line_results"] def __call__(self, data): # Main method to handle the request text = self.preprocess(data) output = self.inference(text) return self.postprocess(output)