Spaces:

ahmedsqrd
/

model_trace

Runtime error

model_trace / src /evaluation /perplexity_eval.py

Ahmed Ahmed

consolidate

536d515 10 days ago

3.72 kB

	import torch
	import sys
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import numpy as np

	def evaluate_perplexity(model_name, revision="main", test_text=None):
	"""
	Evaluate perplexity on a fixed piece of text.

	Args:
	model_name: Hugging Face model identifier
	revision: Model revision/commit hash
	test_text: Text to evaluate perplexity on (default if None)

	Returns:
	float: Perplexity score (lower is better)
	"""

	try:
	sys.stderr.write(f"Loading model: {model_name} (revision: {revision})\n")
	sys.stderr.flush()

	# Default test text if none provided
	if test_text is None:
	test_text = """Artificial intelligence has transformed the way we live and work, bringing both opportunities and challenges.
	From autonomous vehicles to language models that can engage in human-like conversation, AI technologies are becoming increasingly
	sophisticated. However, with this advancement comes the responsibility to ensure these systems are developed and deployed ethically,
	with careful consideration for privacy, fairness, and transparency. The future of AI will likely depend on how well we balance innovation
	with these important social considerations."""

	sys.stderr.write("Loading tokenizer...\n")
	sys.stderr.flush()
	# Load tokenizer first
	tokenizer = AutoTokenizer.from_pretrained(model_name, revision=revision)
	sys.stderr.write("Tokenizer loaded successfully\n")
	sys.stderr.flush()

	sys.stderr.write("Loading model...\n")
	sys.stderr.flush()
	# Load model
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	revision=revision,
	torch_dtype=torch.float16,
	device_map="auto"
	)
	sys.stderr.write("Model loaded successfully\n")
	sys.stderr.flush()

	sys.stderr.write("Tokenizing input text...\n")
	sys.stderr.flush()
	# Tokenize the text
	inputs = tokenizer(test_text, return_tensors="pt")
	sys.stderr.write(f"Tokenized input shape: {inputs['input_ids'].shape}\n")
	sys.stderr.flush()

	# Move to same device as model
	inputs = {k: v.to(model.device) for k, v in inputs.items()}
	sys.stderr.write(f"Moved inputs to device: {model.device}\n")
	sys.stderr.flush()

	sys.stderr.write("Running forward pass...\n")
	sys.stderr.flush()
	# Calculate loss
	with torch.no_grad():
	outputs = model(**inputs, labels=inputs["input_ids"])
	loss = outputs.loss

	sys.stderr.write(f"Calculated loss: {loss.item()}\n")
	sys.stderr.flush()

	# Calculate perplexity
	perplexity = torch.exp(loss).item()
	sys.stderr.write(f"Final perplexity: {perplexity}\n")
	sys.stderr.flush()

	return perplexity

	except Exception as e:
	import traceback
	sys.stderr.write(f"Error in evaluate_perplexity: {e}\n")
	sys.stderr.write(f"Traceback: {traceback.format_exc()}\n")
	sys.stderr.flush()
	raise

	def create_perplexity_result(model_name, revision, precision, perplexity_score):
	"""
	Create a result file in the expected format.
	"""
	return {
	"config": {
	"model_dtype": f"torch.{precision}",
	"model_name": model_name,
	"model_sha": revision,
	},
	"results": {
	"perplexity": {
	"perplexity": perplexity_score,
	}
	}
	}