Spaces:
Runtime error
Runtime error
import torch | |
import sys | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import numpy as np | |
def evaluate_perplexity(model_name, revision="main", test_text=None): | |
""" | |
Evaluate perplexity on a fixed piece of text. | |
Args: | |
model_name: Hugging Face model identifier | |
revision: Model revision/commit hash | |
test_text: Text to evaluate perplexity on (default if None) | |
Returns: | |
float: Perplexity score (lower is better) | |
""" | |
try: | |
sys.stderr.write(f"Loading model: {model_name} (revision: {revision})\n") | |
sys.stderr.flush() | |
# Default test text if none provided | |
if test_text is None: | |
test_text = """Artificial intelligence has transformed the way we live and work, bringing both opportunities and challenges. | |
From autonomous vehicles to language models that can engage in human-like conversation, AI technologies are becoming increasingly | |
sophisticated. However, with this advancement comes the responsibility to ensure these systems are developed and deployed ethically, | |
with careful consideration for privacy, fairness, and transparency. The future of AI will likely depend on how well we balance innovation | |
with these important social considerations.""" | |
sys.stderr.write("Loading tokenizer...\n") | |
sys.stderr.flush() | |
# Load tokenizer first | |
tokenizer = AutoTokenizer.from_pretrained(model_name, revision=revision) | |
sys.stderr.write("Tokenizer loaded successfully\n") | |
sys.stderr.flush() | |
sys.stderr.write("Loading model...\n") | |
sys.stderr.flush() | |
# Load model | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
revision=revision, | |
torch_dtype=torch.float16, | |
device_map="auto" | |
) | |
sys.stderr.write("Model loaded successfully\n") | |
sys.stderr.flush() | |
sys.stderr.write("Tokenizing input text...\n") | |
sys.stderr.flush() | |
# Tokenize the text | |
inputs = tokenizer(test_text, return_tensors="pt") | |
sys.stderr.write(f"Tokenized input shape: {inputs['input_ids'].shape}\n") | |
sys.stderr.flush() | |
# Move to same device as model | |
inputs = {k: v.to(model.device) for k, v in inputs.items()} | |
sys.stderr.write(f"Moved inputs to device: {model.device}\n") | |
sys.stderr.flush() | |
sys.stderr.write("Running forward pass...\n") | |
sys.stderr.flush() | |
# Calculate loss | |
with torch.no_grad(): | |
outputs = model(**inputs, labels=inputs["input_ids"]) | |
loss = outputs.loss | |
sys.stderr.write(f"Calculated loss: {loss.item()}\n") | |
sys.stderr.flush() | |
# Calculate perplexity | |
perplexity = torch.exp(loss).item() | |
sys.stderr.write(f"Final perplexity: {perplexity}\n") | |
sys.stderr.flush() | |
return perplexity | |
except Exception as e: | |
import traceback | |
sys.stderr.write(f"Error in evaluate_perplexity: {e}\n") | |
sys.stderr.write(f"Traceback: {traceback.format_exc()}\n") | |
sys.stderr.flush() | |
raise | |
def create_perplexity_result(model_name, revision, precision, perplexity_score): | |
""" | |
Create a result file in the expected format. | |
""" | |
return { | |
"config": { | |
"model_dtype": f"torch.{precision}", | |
"model_name": model_name, | |
"model_sha": revision, | |
}, | |
"results": { | |
"perplexity": { | |
"perplexity": perplexity_score, | |
} | |
} | |
} |