Spaces:
Runtime error
Runtime error
import json | |
import os | |
import sys | |
from datetime import datetime | |
from src.evaluation.perplexity_eval import evaluate_perplexity, create_perplexity_result | |
from src.envs import EVAL_RESULTS_PATH, API, RESULTS_REPO | |
def run_dynamic_perplexity_eval(model_name, revision="main", precision="float16"): | |
""" | |
Run perplexity evaluation and save results. | |
""" | |
try: | |
sys.stderr.write(f"Starting dynamic evaluation for {model_name}\n") | |
sys.stderr.flush() | |
# Run evaluation | |
sys.stderr.write("Running perplexity evaluation...\n") | |
sys.stderr.flush() | |
perplexity_score = evaluate_perplexity(model_name, revision) | |
sys.stderr.write(f"Perplexity evaluation completed: {perplexity_score}\n") | |
sys.stderr.flush() | |
# Create result structure | |
result = create_perplexity_result(model_name, revision, precision, perplexity_score) | |
sys.stderr.write(f"Created result structure: {result}\n") | |
sys.stderr.flush() | |
# Save result file | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
result_filename = f"results_{model_name.replace('/', '_')}_{timestamp}.json" | |
# Create directory structure | |
org, model = model_name.split("/") if "/" in model_name else ("", model_name) | |
result_dir = os.path.join(EVAL_RESULTS_PATH, org) if org else EVAL_RESULTS_PATH | |
os.makedirs(result_dir, exist_ok=True) | |
result_path = os.path.join(result_dir, result_filename) | |
sys.stderr.write(f"Saving result to: {result_path}\n") | |
sys.stderr.flush() | |
with open(result_path, "w") as f: | |
json.dump(result, f, indent=2) | |
sys.stderr.write("Result file saved locally\n") | |
sys.stderr.flush() | |
# Upload to Hugging Face dataset | |
try: | |
sys.stderr.write(f"Uploading to HF dataset: {RESULTS_REPO}\n") | |
sys.stderr.flush() | |
API.upload_file( | |
path_or_fileobj=result_path, | |
path_in_repo=result_path.split("eval-results/")[1], | |
repo_id=RESULTS_REPO, | |
repo_type="dataset", | |
commit_message=f"Add perplexity results for {model_name}", | |
) | |
sys.stderr.write("Upload completed successfully\n") | |
sys.stderr.flush() | |
except Exception as upload_error: | |
sys.stderr.write(f"Upload failed: {upload_error}\n") | |
sys.stderr.flush() | |
# Don't fail the whole process if upload fails | |
return True, perplexity_score | |
except Exception as e: | |
import traceback | |
sys.stderr.write(f"Error in run_dynamic_perplexity_eval: {e}\n") | |
sys.stderr.write(f"Traceback: {traceback.format_exc()}\n") | |
sys.stderr.flush() | |
return False, str(e) |