Spaces:
Runtime error
Runtime error
from dataclasses import dataclass | |
from enum import Enum | |
class Task: | |
benchmark: str | |
metric: str | |
col_name: str | |
# Select your tasks here | |
# --------------------------------------------------- | |
class Tasks(Enum): | |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard | |
task0 = Task("perplexity", "perplexity", "Perplexity") | |
NUM_FEWSHOT = 0 # Not used for perplexity | |
# --------------------------------------------------- | |
# Your leaderboard name | |
TITLE = """<h1 align="center" id="space-title">Model Perplexity Leaderboard</h1>""" | |
# What does your leaderboard evaluate? | |
INTRODUCTION_TEXT = """ | |
This leaderboard evaluates language models based on their perplexity scores on a fixed test passage. | |
Lower perplexity scores indicate better performance - it means the model is better at predicting the next token in the text. | |
""" | |
# Which evaluations are you running? | |
LLM_BENCHMARKS_TEXT = """ | |
## How it works | |
The evaluation runs perplexity tests on language models using a fixed test passage about artificial intelligence. | |
Perplexity measures how well a model predicts text - lower scores mean better predictions. | |
## Test Text | |
The evaluation uses the following passage: | |
``` | |
Artificial intelligence has transformed the way we live and work, bringing both opportunities and challenges. | |
From autonomous vehicles to language models that can engage in human-like conversation, AI technologies are becoming increasingly | |
sophisticated. However, with this advancement comes the responsibility to ensure these systems are developed and deployed ethically, | |
with careful consideration for privacy, fairness, and transparency. The future of AI will likely depend on how well we balance innovation | |
with these important social considerations. | |
``` | |
""" | |
EVALUATION_QUEUE_TEXT = """ | |
## Before submitting a model | |
1. Make sure your model is public on the Hugging Face Hub | |
2. The model should be loadable with AutoModelForCausalLM | |
3. The model should support text generation tasks | |
""" | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = "" | |