AdaDTur commited on
Commit
2a9bd05
·
1 Parent(s): d690449

leaderboard

Browse files
Files changed (2) hide show
  1. leaderboard.py +79 -0
  2. results.csv +6 -0
leaderboard.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ LEADERBOARD_FILE = "results.csv"
5
+
6
+ def load_leaderboard():
7
+ return pd.read_csv(LEADERBOARD_FILE)
8
+
9
+ def display_leaderboard(score_type="Overall", open_filter="Both"):
10
+ df = load_leaderboard()
11
+
12
+ if open_filter != "Both":
13
+ if open_filter == "Open":
14
+ df = df[df["Open"] == True]
15
+ elif open_filter == "Closed":
16
+ df = df[df["Open"] == False]
17
+
18
+ if score_type == "Human-Generated Split":
19
+ df = df[["Model", "Human-Generated Safe Tasks Score", "Human-Generated Harm Tasks Score"]]
20
+ sort_col = "Human-Generated Safe Tasks Score"
21
+ elif score_type == "HITL Split":
22
+ df = df[["Model", "HITL Safe Tasks Score", "HITL Harm Tasks Score"]]
23
+ sort_col = "HITL Safe Tasks Score"
24
+ else:
25
+ df = df[["Model", "Overall Safe Tasks Score", "Overall Harm Tasks Score", "Refusal Rate", "Normalized Safety Score"]]
26
+ sort_col = "Overall Safe Tasks Score"
27
+
28
+ return df.sort_values(by=sort_col, ascending=False)
29
+
30
+ def display_category_scores(open_filter="Both"):
31
+ df = load_leaderboard()
32
+
33
+ if open_filter != "Both":
34
+ if open_filter == "Open":
35
+ df = df[df["Open"] == True]
36
+ elif open_filter == "Closed":
37
+ df = df[df["Open"] == False]
38
+
39
+ df = df[["Model", "Bias", "Cybercrime", "Harassment", "Misinformation", "Illegal Activity"]]
40
+ return df
41
+
42
+ with gr.Blocks() as demo:
43
+ gr.Markdown("# SafeArena Leaderboard")
44
+
45
+ score_filter = gr.Dropdown(label="Data Split", choices=["Full Split", "Human-Generated Split", "HITL Split"], value="Overall")
46
+ open_state = gr.State("Both")
47
+
48
+ with gr.Row():
49
+ gr.Markdown("### Filter Models")
50
+ open_both = gr.Button("Both", variant="primary")
51
+ open_open = gr.Button("Open")
52
+ open_closed = gr.Button("Closed")
53
+
54
+ table_overall = gr.Dataframe(value=display_leaderboard("Overall", "Both"), interactive=False)
55
+ table_categories = gr.Dataframe(value=display_category_scores("Both"), interactive=False)
56
+
57
+ open_both.click(
58
+ lambda score: (display_leaderboard(score, "Both"), display_category_scores("Both"), "Both"),
59
+ inputs=[score_filter],
60
+ outputs=[table_overall, table_categories, open_state]
61
+ )
62
+ open_open.click(
63
+ lambda score: (display_leaderboard(score, "Open"), display_category_scores("Open"), "Open"),
64
+ inputs=[score_filter],
65
+ outputs=[table_overall, table_categories, open_state]
66
+ )
67
+ open_closed.click(
68
+ lambda score: (display_leaderboard(score, "Closed"), display_category_scores("Closed"), "Closed"),
69
+ inputs=[score_filter],
70
+ outputs=[table_overall, table_categories, open_state]
71
+ )
72
+
73
+ score_filter.change(
74
+ lambda score, open_val: (display_leaderboard(score, open_val), display_category_scores(open_val)),
75
+ inputs=[score_filter, open_state],
76
+ outputs=[table_overall, table_categories]
77
+ )
78
+
79
+ demo.launch(share=True)
results.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ Model,Overall Safe Tasks Score,Overall Harm Tasks Score,Human-Generated Safe Tasks Score,Human-Generated Harm Tasks Score,HITL Safe Tasks Score,HITL Harm Tasks Score,Refusal Rate,Normalized Safety Score,Open,Bias,Cybercrime,Harassment,Misinformation,Illegal Activity
2
+ GPT-4o,34.4,22.8,44.0,26.0,28.0,20.7,30.2,31.7,False,14.0,16.0,16.0,28.0,40.0
3
+ GPT-4o-Mini,18.4,14.0,22.0,14.0,16.0,14.0,36.5,35.7,False,6.0,8.0,14.0,24.0,18.0
4
+ Claude-3.5-Sonnet,21.2,7.6,24.0,12.0,19.3,4.7,57.7,55.0,False,4.0,6.0,5.0,12.0,12.0
5
+ Llama-3.2-90B,8.4,11.2,10.1,10.0,7.3,12.0,14.0,34.0,True,22.0,8.0,10.0,14.0,2.0
6
+ Qwen-2-VL-72B,24.4,26.0,30.0,29.0,20.7,24.0,0.8,21.5,True,34.0,18.0,18.0,30.0,30.0