AdaDTur
commited on
Commit
·
2a9bd05
1
Parent(s):
d690449
leaderboard
Browse files- leaderboard.py +79 -0
- results.csv +6 -0
leaderboard.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
LEADERBOARD_FILE = "results.csv"
|
5 |
+
|
6 |
+
def load_leaderboard():
|
7 |
+
return pd.read_csv(LEADERBOARD_FILE)
|
8 |
+
|
9 |
+
def display_leaderboard(score_type="Overall", open_filter="Both"):
|
10 |
+
df = load_leaderboard()
|
11 |
+
|
12 |
+
if open_filter != "Both":
|
13 |
+
if open_filter == "Open":
|
14 |
+
df = df[df["Open"] == True]
|
15 |
+
elif open_filter == "Closed":
|
16 |
+
df = df[df["Open"] == False]
|
17 |
+
|
18 |
+
if score_type == "Human-Generated Split":
|
19 |
+
df = df[["Model", "Human-Generated Safe Tasks Score", "Human-Generated Harm Tasks Score"]]
|
20 |
+
sort_col = "Human-Generated Safe Tasks Score"
|
21 |
+
elif score_type == "HITL Split":
|
22 |
+
df = df[["Model", "HITL Safe Tasks Score", "HITL Harm Tasks Score"]]
|
23 |
+
sort_col = "HITL Safe Tasks Score"
|
24 |
+
else:
|
25 |
+
df = df[["Model", "Overall Safe Tasks Score", "Overall Harm Tasks Score", "Refusal Rate", "Normalized Safety Score"]]
|
26 |
+
sort_col = "Overall Safe Tasks Score"
|
27 |
+
|
28 |
+
return df.sort_values(by=sort_col, ascending=False)
|
29 |
+
|
30 |
+
def display_category_scores(open_filter="Both"):
|
31 |
+
df = load_leaderboard()
|
32 |
+
|
33 |
+
if open_filter != "Both":
|
34 |
+
if open_filter == "Open":
|
35 |
+
df = df[df["Open"] == True]
|
36 |
+
elif open_filter == "Closed":
|
37 |
+
df = df[df["Open"] == False]
|
38 |
+
|
39 |
+
df = df[["Model", "Bias", "Cybercrime", "Harassment", "Misinformation", "Illegal Activity"]]
|
40 |
+
return df
|
41 |
+
|
42 |
+
with gr.Blocks() as demo:
|
43 |
+
gr.Markdown("# SafeArena Leaderboard")
|
44 |
+
|
45 |
+
score_filter = gr.Dropdown(label="Data Split", choices=["Full Split", "Human-Generated Split", "HITL Split"], value="Overall")
|
46 |
+
open_state = gr.State("Both")
|
47 |
+
|
48 |
+
with gr.Row():
|
49 |
+
gr.Markdown("### Filter Models")
|
50 |
+
open_both = gr.Button("Both", variant="primary")
|
51 |
+
open_open = gr.Button("Open")
|
52 |
+
open_closed = gr.Button("Closed")
|
53 |
+
|
54 |
+
table_overall = gr.Dataframe(value=display_leaderboard("Overall", "Both"), interactive=False)
|
55 |
+
table_categories = gr.Dataframe(value=display_category_scores("Both"), interactive=False)
|
56 |
+
|
57 |
+
open_both.click(
|
58 |
+
lambda score: (display_leaderboard(score, "Both"), display_category_scores("Both"), "Both"),
|
59 |
+
inputs=[score_filter],
|
60 |
+
outputs=[table_overall, table_categories, open_state]
|
61 |
+
)
|
62 |
+
open_open.click(
|
63 |
+
lambda score: (display_leaderboard(score, "Open"), display_category_scores("Open"), "Open"),
|
64 |
+
inputs=[score_filter],
|
65 |
+
outputs=[table_overall, table_categories, open_state]
|
66 |
+
)
|
67 |
+
open_closed.click(
|
68 |
+
lambda score: (display_leaderboard(score, "Closed"), display_category_scores("Closed"), "Closed"),
|
69 |
+
inputs=[score_filter],
|
70 |
+
outputs=[table_overall, table_categories, open_state]
|
71 |
+
)
|
72 |
+
|
73 |
+
score_filter.change(
|
74 |
+
lambda score, open_val: (display_leaderboard(score, open_val), display_category_scores(open_val)),
|
75 |
+
inputs=[score_filter, open_state],
|
76 |
+
outputs=[table_overall, table_categories]
|
77 |
+
)
|
78 |
+
|
79 |
+
demo.launch(share=True)
|
results.csv
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Model,Overall Safe Tasks Score,Overall Harm Tasks Score,Human-Generated Safe Tasks Score,Human-Generated Harm Tasks Score,HITL Safe Tasks Score,HITL Harm Tasks Score,Refusal Rate,Normalized Safety Score,Open,Bias,Cybercrime,Harassment,Misinformation,Illegal Activity
|
2 |
+
GPT-4o,34.4,22.8,44.0,26.0,28.0,20.7,30.2,31.7,False,14.0,16.0,16.0,28.0,40.0
|
3 |
+
GPT-4o-Mini,18.4,14.0,22.0,14.0,16.0,14.0,36.5,35.7,False,6.0,8.0,14.0,24.0,18.0
|
4 |
+
Claude-3.5-Sonnet,21.2,7.6,24.0,12.0,19.3,4.7,57.7,55.0,False,4.0,6.0,5.0,12.0,12.0
|
5 |
+
Llama-3.2-90B,8.4,11.2,10.1,10.0,7.3,12.0,14.0,34.0,True,22.0,8.0,10.0,14.0,2.0
|
6 |
+
Qwen-2-VL-72B,24.4,26.0,30.0,29.0,20.7,24.0,0.8,21.5,True,34.0,18.0,18.0,30.0,30.0
|