evijit HF staff commited on
Commit
9959b2f
·
verified ·
1 Parent(s): c3e6774

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +219 -64
app.py CHANGED
@@ -348,34 +348,82 @@ def create_source_html(sources):
348
  html += "</div>"
349
  return html
350
 
351
- def create_leaderboard():
352
  scores = []
353
  for model, data in models.items():
354
  total_score = 0
355
  total_questions = 0
 
356
 
357
- for category in data['scores'].values():
 
 
 
 
358
  for section in category.values():
359
  if section['status'] != 'N/A':
360
  questions = section.get('questions', {})
361
- total_score += sum(1 for q in questions.values() if q)
362
- total_questions += len(questions)
 
 
 
 
 
363
 
 
364
  score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
365
- scores.append((model, score_percentage))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- df = pd.DataFrame(scores, columns=['Model', 'Score Percentage'])
368
- df = df.sort_values('Score Percentage', ascending=False).reset_index(drop=True)
369
 
370
- html = "<div class='card leaderboard-card'>"
371
- html += "<div class='card-title'>AI Model Social Impact Leaderboard</div>"
372
- html += "<table class='leaderboard-table'>"
373
- html += "<tr><th>Rank</th><th>Model</th><th>Score Percentage</th></tr>"
374
- for i, (_, row) in enumerate(df.iterrows(), 1):
375
- html += f"<tr><td>{i}</td><td>{row['Model']}</td><td>{row['Score Percentage']:.2f}%</td></tr>"
376
- html += "</table></div>"
377
 
378
- return html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
 
380
  def create_category_chart(selected_models, selected_categories):
381
  if not selected_models:
@@ -1070,6 +1118,98 @@ css = """
1070
  .dark .completion-bar-container.na .completion-bar {
1071
  background-color: #666;
1072
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1073
  """
1074
 
1075
  first_model = next(iter(models.values()))
@@ -1080,7 +1220,7 @@ with gr.Blocks(css=css) as demo:
1080
 
1081
  with gr.Row():
1082
  tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
1083
- label="Select Tab", value="Leaderboard")
1084
 
1085
  with gr.Row():
1086
  model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
@@ -1088,15 +1228,25 @@ with gr.Blocks(css=css) as demo:
1088
  value="",
1089
  interactive=True, visible=False)
1090
  model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
1091
- label="Select Models for Comparison",
1092
- multiselect=True, interactive=True, visible=False)
1093
- category_filter = gr.CheckboxGroup(choices=category_choices,
1094
- label="Filter Categories",
1095
- value=category_choices,
1096
- visible=False)
 
 
 
 
 
1097
 
1098
  with gr.Column(visible=True) as leaderboard_tab:
1099
- leaderboard_output = gr.HTML()
 
 
 
 
 
1100
 
1101
  with gr.Column(visible=False) as category_analysis_tab:
1102
  category_chart = gr.Plot()
@@ -1106,55 +1256,60 @@ with gr.Blocks(css=css) as demo:
1106
  all_category_cards = gr.HTML()
1107
  total_score = gr.Markdown()
1108
 
1109
- # Initialize the dashboard with the leaderboard
1110
- leaderboard_output.value = create_leaderboard()
 
 
 
 
1111
 
 
1112
  def update_dashboard(tab, selected_models, selected_model, selected_categories):
1113
- leaderboard_visibility = gr.update(visible=False)
1114
- category_chart_visibility = gr.update(visible=False)
1115
- detailed_scorecard_visibility = gr.update(visible=False)
1116
- model_chooser_visibility = gr.update(visible=False)
1117
- model_multi_chooser_visibility = gr.update(visible=False)
1118
- category_filter_visibility = gr.update(visible=False)
1119
-
1120
- if tab == "Leaderboard":
1121
- leaderboard_visibility = gr.update(visible=True)
1122
- leaderboard_html = create_leaderboard()
1123
- return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
1124
- model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
1125
- gr.update(value=leaderboard_html), gr.update(), gr.update(), gr.update(), gr.update()]
1126
-
1127
- elif tab == "Category Analysis":
1128
- category_chart_visibility = gr.update(visible=True)
1129
- model_multi_chooser_visibility = gr.update(visible=True)
1130
- category_filter_visibility = gr.update(visible=True)
1131
- category_plot = create_category_chart(selected_models or [], selected_categories)
1132
- return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
1133
- model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
1134
- gr.update(), gr.update(value=category_plot), gr.update(), gr.update(), gr.update()]
1135
 
1136
- elif tab == "Detailed Scorecard":
1137
- detailed_scorecard_visibility = gr.update(visible=True)
1138
- model_chooser_visibility = gr.update(visible=True)
1139
- category_filter_visibility = gr.update(visible=True)
1140
- if selected_model:
1141
- scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
1142
- else:
1143
- scorecard_updates = [
1144
- gr.update(value="Please select a model to view details.", visible=True),
1145
- gr.update(visible=False),
1146
- gr.update(visible=False)
1147
- ]
1148
- return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
1149
- model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
1150
- gr.update(), gr.update()] + scorecard_updates
 
 
 
 
 
 
 
 
 
1151
 
1152
  # Set up event handlers
1153
  tab_selection.change(
1154
  fn=update_dashboard,
1155
  inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
1156
  outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
1157
- model_chooser, model_multi_chooser, category_filter,
1158
  leaderboard_output, category_chart, model_metadata,
1159
  all_category_cards, total_score]
1160
  )
@@ -1181,7 +1336,7 @@ with gr.Blocks(css=css) as demo:
1181
  fn=update_dashboard,
1182
  inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
1183
  outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
1184
- model_chooser, model_multi_chooser, category_filter,
1185
  leaderboard_output, category_chart, model_metadata,
1186
  all_category_cards, total_score]
1187
  )
 
348
  html += "</div>"
349
  return html
350
 
351
+ def create_leaderboard(selected_categories):
352
  scores = []
353
  for model, data in models.items():
354
  total_score = 0
355
  total_questions = 0
356
+ score_by_category = {}
357
 
358
+ # Calculate scores by category
359
+ for category_name, category in data['scores'].items():
360
+ category_score = 0
361
+ category_total = 0
362
+
363
  for section in category.values():
364
  if section['status'] != 'N/A':
365
  questions = section.get('questions', {})
366
+ category_score += sum(1 for q in questions.values() if q)
367
+ category_total += len(questions)
368
+
369
+ if category_total > 0:
370
+ score_by_category[category_name] = (category_score / category_total) * 100
371
+ total_score += category_score
372
+ total_questions += category_total
373
 
374
+ # Calculate overall score
375
  score_percentage = (total_score / total_questions * 100) if total_questions > 0 else 0
376
+
377
+ # Get model type
378
+ model_type = data['metadata'].get('Type', 'Unknown')
379
+
380
+ # Create entry with numerical scores
381
+ model_entry = {
382
+ 'Model': model,
383
+ 'Type': model_type,
384
+ 'Overall Completion Rate': score_percentage
385
+ }
386
+
387
+ # Add selected category scores with emojis
388
+ category_map = {
389
+ '1. Bias, Stereotypes, and Representational Harms Evaluation': '⚖️ Bias and Fairness',
390
+ '2. Cultural Values and Sensitive Content Evaluation': '🌍 Cultural Values',
391
+ '3. Disparate Performance Evaluation': '📊 Disparate Performance',
392
+ '4. Environmental Costs and Carbon Emissions Evaluation': '🌱 Environmental Impact',
393
+ '5. Privacy and Data Protection Evaluation': '🔒 Privacy',
394
+ '6. Financial Costs Evaluation': '💰 Financial Costs',
395
+ '7. Data and Content Moderation Labor Evaluation': '👥 Labor Practices'
396
+ }
397
+
398
+ for full_cat_name, display_name in category_map.items():
399
+ if full_cat_name in selected_categories:
400
+ score = score_by_category.get(full_cat_name, 0)
401
+ model_entry[display_name] = score
402
+
403
+ scores.append(model_entry)
404
 
405
+ # Convert to DataFrame
406
+ df = pd.DataFrame(scores)
407
 
408
+ # Sort by Overall Completion Rate descending
409
+ df = df.sort_values('Overall Completion Rate', ascending=False)
 
 
 
 
 
410
 
411
+ # Add rank column based on current sort
412
+ df.insert(0, 'Rank', range(1, len(df) + 1))
413
+
414
+ # Format scores with % after sorting
415
+ numeric_columns = ['Overall Completion Rate'] + list(category_map.values())
416
+ for col in df.columns:
417
+ if col in numeric_columns:
418
+ df[col] = df[col].apply(lambda x: f"{x:.1f}%")
419
+
420
+ return df
421
+
422
+ with gr.Column(visible=True) as leaderboard_tab:
423
+ leaderboard_output = gr.DataFrame(
424
+ interactive=True, # Allow sorting
425
+ wrap=True
426
+ )
427
 
428
  def create_category_chart(selected_models, selected_categories):
429
  if not selected_models:
 
1118
  .dark .completion-bar-container.na .completion-bar {
1119
  background-color: #666;
1120
  }
1121
+ .leaderboard-filters {
1122
+ margin-bottom: 20px;
1123
+ padding: 15px;
1124
+ background-color: #f8f9fa;
1125
+ border-radius: 8px;
1126
+ }
1127
+
1128
+ .dark .leaderboard-filters {
1129
+ background-color: #2a2a2a;
1130
+ }
1131
+
1132
+ .filter-group {
1133
+ margin-bottom: 10px;
1134
+ }
1135
+
1136
+ .filter-label {
1137
+ font-weight: 600;
1138
+ margin-bottom: 5px;
1139
+ display: block;
1140
+ }
1141
+
1142
+ .score-column {
1143
+ background-color: #f0f7ff;
1144
+ }
1145
+
1146
+ .dark .score-column {
1147
+ background-color: #1a2733;
1148
+ }
1149
+
1150
+ .metric-header {
1151
+ font-size: 0.9em;
1152
+ color: #666;
1153
+ text-align: center;
1154
+ }
1155
+
1156
+ .dark .metric-header {
1157
+ color: #aaa;
1158
+ }
1159
+
1160
+ .table-container {
1161
+ overflow-x: auto;
1162
+ }
1163
+
1164
+ .leaderboard-table td {
1165
+ white-space: nowrap;
1166
+ }
1167
+
1168
+ .score-cell {
1169
+ text-align: right;
1170
+ padding-right: 15px !important;
1171
+ }
1172
+
1173
+ .model-cell {
1174
+ max-width: 300px;
1175
+ overflow: hidden;
1176
+ text-overflow: ellipsis;
1177
+ white-space: nowrap;
1178
+ }
1179
+
1180
+ .leaderboard-table {
1181
+ width: 100%;
1182
+ border-collapse: collapse;
1183
+ }
1184
+
1185
+ .leaderboard-table th,
1186
+ .leaderboard-table td {
1187
+ padding: 10px;
1188
+ text-align: left;
1189
+ border: 1px solid #e0e0e0;
1190
+ }
1191
+
1192
+ .dark .leaderboard-table th,
1193
+ .dark .leaderboard-table td {
1194
+ border-color: #444;
1195
+ }
1196
+
1197
+ .leaderboard-table th {
1198
+ background-color: #f2f2f2;
1199
+ font-weight: bold;
1200
+ }
1201
+
1202
+ .dark .leaderboard-table th {
1203
+ background-color: #2c3e50;
1204
+ }
1205
+
1206
+ .leaderboard-table tr:hover {
1207
+ background-color: #f5f5f5;
1208
+ }
1209
+
1210
+ .dark .leaderboard-table tr:hover {
1211
+ background-color: #2d2d2d;
1212
+ }
1213
  """
1214
 
1215
  first_model = next(iter(models.values()))
 
1220
 
1221
  with gr.Row():
1222
  tab_selection = gr.Radio(["Leaderboard", "Category Analysis", "Detailed Scorecard"],
1223
+ label="Select Tab", value="Leaderboard")
1224
 
1225
  with gr.Row():
1226
  model_chooser = gr.Dropdown(choices=[""] + list(models.keys()),
 
1228
  value="",
1229
  interactive=True, visible=False)
1230
  model_multi_chooser = gr.Dropdown(choices=list(models.keys()),
1231
+ label="Select Models for Comparison",
1232
+ value=[],
1233
+ multiselect=True,
1234
+ interactive=True,
1235
+ visible=False,
1236
+ info="Select one or more models")
1237
+
1238
+ # Category filter now visible for all tabs
1239
+ category_filter = gr.CheckboxGroup(choices=category_choices,
1240
+ label="Filter Categories",
1241
+ value=category_choices)
1242
 
1243
  with gr.Column(visible=True) as leaderboard_tab:
1244
+ leaderboard_output = gr.DataFrame(
1245
+ headers=["Rank", "Model", "Type", "Overall Score"],
1246
+ datatype=["number", "str", "str", "str"],
1247
+ interactive=False,
1248
+ wrap=True
1249
+ )
1250
 
1251
  with gr.Column(visible=False) as category_analysis_tab:
1252
  category_chart = gr.Plot()
 
1256
  all_category_cards = gr.HTML()
1257
  total_score = gr.Markdown()
1258
 
1259
+ # Initialize the dashboard
1260
+ def init_leaderboard():
1261
+ df = create_leaderboard(category_filter.value)
1262
+ return df
1263
+
1264
+ leaderboard_output.value = init_leaderboard()
1265
 
1266
+ # Update handlers
1267
  def update_dashboard(tab, selected_models, selected_model, selected_categories):
1268
+ leaderboard_visibility = gr.update(visible=False)
1269
+ category_chart_visibility = gr.update(visible=False)
1270
+ detailed_scorecard_visibility = gr.update(visible=False)
1271
+ model_chooser_visibility = gr.update(visible=False)
1272
+ model_multi_chooser_visibility = gr.update(visible=False)
1273
+
1274
+ if tab == "Leaderboard":
1275
+ leaderboard_visibility = gr.update(visible=True)
1276
+ df = create_leaderboard(selected_categories)
1277
+ return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
1278
+ model_chooser_visibility, model_multi_chooser_visibility,
1279
+ gr.update(value=df), gr.update(), gr.update(), gr.update(), gr.update()]
1280
+
 
 
 
 
 
 
 
 
 
1281
 
1282
+ elif tab == "Category Analysis":
1283
+ category_chart_visibility = gr.update(visible=True)
1284
+ model_multi_chooser_visibility = gr.update(visible=True)
1285
+ category_filter_visibility = gr.update(visible=True)
1286
+ category_plot = create_category_chart(selected_models or [], selected_categories)
1287
+ return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
1288
+ model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
1289
+ None, gr.update(value=category_plot), gr.update(), gr.update(), gr.update()]
1290
+
1291
+ elif tab == "Detailed Scorecard":
1292
+ detailed_scorecard_visibility = gr.update(visible=True)
1293
+ model_chooser_visibility = gr.update(visible=True)
1294
+ category_filter_visibility = gr.update(visible=True)
1295
+ if selected_model:
1296
+ scorecard_updates = update_detailed_scorecard(selected_model, selected_categories)
1297
+ else:
1298
+ scorecard_updates = [
1299
+ gr.update(value="Please select a model to view details.", visible=True),
1300
+ gr.update(visible=False),
1301
+ gr.update(visible=False)
1302
+ ]
1303
+ return [leaderboard_visibility, category_chart_visibility, detailed_scorecard_visibility,
1304
+ model_chooser_visibility, model_multi_chooser_visibility, category_filter_visibility,
1305
+ None, None] + scorecard_updates
1306
 
1307
  # Set up event handlers
1308
  tab_selection.change(
1309
  fn=update_dashboard,
1310
  inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
1311
  outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
1312
+ model_chooser, model_multi_chooser,
1313
  leaderboard_output, category_chart, model_metadata,
1314
  all_category_cards, total_score]
1315
  )
 
1336
  fn=update_dashboard,
1337
  inputs=[tab_selection, model_multi_chooser, model_chooser, category_filter],
1338
  outputs=[leaderboard_tab, category_analysis_tab, detailed_scorecard_tab,
1339
+ model_chooser, model_multi_chooser,
1340
  leaderboard_output, category_chart, model_metadata,
1341
  all_category_cards, total_score]
1342
  )