model_trace / app.py
Ahmed Ahmed
consolidate
77c0f20
raw
history blame
3.15 kB
import gradio as gr
from gradio_leaderboard import Leaderboard
import pandas as pd
from huggingface_hub import snapshot_download
from src.about import (
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
AutoEvalColumn,
)
from src.envs import API, EVAL_RESULTS_PATH, RESULTS_REPO, TOKEN
from src.populate import get_leaderboard_df
from src.evaluation.dynamic_eval import run_dynamic_perplexity_eval
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
return Leaderboard(
dataframe,
headers=COLS,
column_config={
AutoEvalColumn.model.name: "markdown",
},
)
def run_perplexity_test(model_name, revision, precision):
"""Run perplexity evaluation on demand."""
if not model_name:
return "Please enter a model name."
success, result = run_dynamic_perplexity_eval(model_name, revision, precision)
if success:
return f"βœ… Perplexity evaluation completed!\nPerplexity: {result:.4f}\n\nResults have been saved and will appear in the leaderboard shortly."
else:
return f"❌ Evaluation failed: {result}"
# Initialize results directory
try:
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
)
except Exception as e:
print(f"Error initializing results: {e}")
# Get initial leaderboard data
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
# Create the Gradio interface
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… Leaderboard", elem_id="leaderboard-tab", id=0):
leaderboard = init_leaderboard(LEADERBOARD_DF)
with gr.TabItem("πŸ“ About", elem_id="about-tab", id=1):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸ§ͺ Test Model", elem_id="test-model-tab", id=2):
with gr.Row():
with gr.Column():
model_name = gr.Textbox(label="Model name", placeholder="org/model-name")
revision = gr.Textbox(label="Revision", placeholder="main", value="main")
precision = gr.Dropdown(
choices=["float16", "bfloat16"],
label="Precision",
value="float16"
)
with gr.Column():
test_button = gr.Button("πŸš€ Run Perplexity Test", variant="primary")
result = gr.Markdown()
test_button.click(
run_perplexity_test,
[model_name, revision, precision],
result
)
demo.queue(default_concurrency_limit=5).launch()