SCANSKY
/

distilbertTourism-multilingual-sentiment

Text Classification

Model card Files Files and versions

distilbertTourism-multilingual-sentiment / handler.py

SCANSKY's picture

Update handler.py

10789df verified 8 months ago

4.33 kB

	from transformers import pipeline
	from sklearn.preprocessing import LabelEncoder
	import joblib
	import torch
	import os

	# Debugging: Print current directory and contents
	print("Current working directory:", os.getcwd())
	print("Contents of the directory:", os.listdir())

	# Load the label encoder
	label_encoder = joblib.load('/repository/label_encoder.pkl') # Use absolute path
	print("Label encoder loaded successfully.")

	# Load the model and tokenizer from Hugging Face
	model_name = "SCANSKY/distilbertTourism-multilingual-sentiment"
	sentiment_analyzer = pipeline(
	'sentiment-analysis',
	model=model_name,
	tokenizer=model_name,
	device=0 if torch.cuda.is_available() else -1 # Use GPU if available
	)

	def get_average_sentiment(positive_count, negative_count, neutral_count):
	total = positive_count + negative_count + neutral_count
	if total == 0:
	return "neutral"

	positive_pct = (positive_count / total) * 100
	negative_pct = (negative_count / total) * 100
	neutral_pct = (neutral_count / total) * 100

	max_sentiment = max(positive_pct, negative_pct, neutral_pct)

	if max_sentiment == positive_pct:
	return "positive"
	elif max_sentiment == negative_pct:
	return "negative"
	else:
	return "neutral"

	class EndpointHandler:
	def __init__(self, model_dir=None):
	# Model and tokenizer are loaded globally, so no need to reinitialize here
	# The `model_dir` argument is required by Hugging Face's inference toolkit
	pass

	def preprocess(self, data):
	# Extract the input text from the request
	text = data.get("inputs", "")
	return text

	def inference(self, text):
	if not text.strip():
	return {"error": "Please enter some text for sentiment analysis."}

	# Split text into lines
	lines = [line.strip() for line in text.split('\n') if line.strip()]

	if not lines:
	return {"error": "Please enter valid text for sentiment analysis."}

	# Analyze each line
	total_confidence = 0
	positive_count = 0
	negative_count = 0
	neutral_count = 0
	line_results = [] # Store results for each line

	for line in lines:
	result = sentiment_analyzer(line)
	predicted_label_encoded = int(result[0]['label'].split('_')[-1])
	predicted_label = label_encoder.inverse_transform([predicted_label_encoded])[0]
	confidence = result[0]['score'] * 100

	# Store line and its sentiment result
	line_results.append({
	'text': line,
	'sentiment': predicted_label,
	'confidence': confidence
	})

	if predicted_label == 'positive':
	positive_count += 1
	elif predicted_label == 'negative':
	negative_count += 1
	else:
	neutral_count += 1

	total_confidence += confidence

	# Calculate averages
	avg_confidence = total_confidence / len(lines)
	positive_pct = (positive_count / len(lines)) * 100
	negative_pct = (negative_count / len(lines)) * 100
	neutral_pct = (neutral_count / len(lines)) * 100

	# Get average sentiment
	avg_sentiment = get_average_sentiment(positive_count, negative_count, neutral_count)

	# Prepare the output
	output = {
	"total_lines_analyzed": len(lines),
	"average_confidence": avg_confidence,
	"average_sentiment": avg_sentiment,
	"sentiment_distribution": {
	"positive": positive_pct,
	"negative": negative_pct,
	"neutral": neutral_pct
	},
	"line_results": line_results
	}

	return output

	def postprocess(self, output):
	if "error" in output:
	return [{"error": output["error"]}]

	# Return only the line-level results as a list
	return output["line_results"]


	def __call__(self, data):
	# Main method to handle the request
	text = self.preprocess(data)
	output = self.inference(text)
	return self.postprocess(output)