Spaces:
Runtime error
Runtime error
File size: 4,069 Bytes
359f755 77c0f20 359f755 de8f813 24c8512 359f755 1b2d49a 359f755 24c8512 77c0f20 70ea05e 359f755 24c8512 77c0f20 359f755 1b2d49a 359f755 70ea05e 24c8512 77c0f20 24c8512 de8f813 24c8512 77c0f20 24c8512 70ea05e 77c0f20 359f755 77c0f20 359f755 77c0f20 359f755 77c0f20 359f755 77c0f20 359f755 70ea05e 77c0f20 70ea05e 77c0f20 70ea05e 77c0f20 359f755 77c0f20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
import gradio as gr
from gradio_leaderboard import Leaderboard
import pandas as pd
from huggingface_hub import snapshot_download, create_repo
from huggingface_hub.utils import RepositoryNotFoundError
import os
from src.about import (
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
AutoEvalColumn,
fields,
)
from src.envs import API, EVAL_RESULTS_PATH, RESULTS_REPO, TOKEN, OWNER
from src.populate import get_leaderboard_df
from src.evaluation.dynamic_eval import run_dynamic_perplexity_eval
def init_leaderboard(dataframe):
if dataframe is None:
raise ValueError("Leaderboard DataFrame is None.")
return Leaderboard(
value=dataframe,
select_columns=[c.name for c in fields(AutoEvalColumn) if not c.hidden],
search_columns=[AutoEvalColumn.model.name],
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
filter_columns=[
AutoEvalColumn.model_type.name,
AutoEvalColumn.precision.name,
],
)
def run_perplexity_test(model_name, revision, precision):
"""Run perplexity evaluation on demand."""
if not model_name:
return "Please enter a model name."
success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
if success:
return f"β
Perplexity evaluation completed!\nPerplexity: {result:.4f}\n\nResults have been saved and will appear in the leaderboard shortly."
else:
return f"β Evaluation failed: {result}"
# Initialize results repository and directory
try:
# Try to download existing repository
try:
snapshot_download(
repo_id=RESULTS_REPO,
local_dir=EVAL_RESULTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN
)
except RepositoryNotFoundError:
# Create the repository if it doesn't exist
print(f"Creating new results repository: {RESULTS_REPO}")
create_repo(
repo_id=RESULTS_REPO,
repo_type="dataset",
private=False,
token=TOKEN
)
# Create local directory
os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
except Exception as e:
print(f"Error initializing results: {e}")
# Ensure local directory exists even if repo operations fail
os.makedirs(EVAL_RESULTS_PATH, exist_ok=True)
# Get initial leaderboard data
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
# Create the Gradio interface
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("π
Leaderboard", elem_id="leaderboard-tab", id=0):
leaderboard = init_leaderboard(LEADERBOARD_DF)
with gr.TabItem("π About", elem_id="about-tab", id=1):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("π§ͺ Test Model", elem_id="test-model-tab", id=2):
with gr.Row():
with gr.Column():
model_name = gr.Textbox(label="Model name", placeholder="org/model-name")
revision = gr.Textbox(label="Revision", placeholder="main", value="main")
precision = gr.Dropdown(
choices=["float16", "bfloat16"],
label="Precision",
value="float16"
)
with gr.Column():
test_button = gr.Button("π Run Perplexity Test", variant="primary")
result = gr.Markdown()
test_button.click(
run_perplexity_test,
[model_name, revision, precision],
result
)
demo.queue(default_concurrency_limit=5).launch() |