Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on Feb 14, 2024

Commit

0b8c16d

1 Parent(s): ab74236

upload plot

Browse files

Files changed (3) hide show

app.py +6 -1
src/plt.py +53 -0
src/utils.py +12 -0

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
 from datasets import load_dataset
 from src.utils import load_all_data
 from src.md import ABOUT_TEXT, TOP_TEXT
 import numpy as np
 api = HfApi()
@@ -210,7 +211,11 @@ with gr.Blocks() as app:
                 sample_display = gr.Markdown("{sampled data loads here}")
             button.click(fn=random_sample, inputs=[subset_selector], outputs=[sample_display])
 # Load data when app starts, TODO make this used somewhere...
 # def load_data_on_start():

 from datasets import load_dataset
 from src.utils import load_all_data
 from src.md import ABOUT_TEXT, TOP_TEXT
+from src.plt import plot_avg_correlation
 import numpy as np
 api = HfApi()
                 sample_display = gr.Markdown("{sampled data loads here}")
             button.click(fn=random_sample, inputs=[subset_selector], outputs=[sample_display])
+        # removed plot because not pretty enough
+        # with gr.TabItem("Model Correlation"):
+        #     with gr.Row():
+        #         plot = plot_avg_correlation(herm_data_avg, prefs_data)
+        #         gr.Plot(plot)
 # Load data when app starts, TODO make this used somewhere...
 # def load_data_on_start():

src/plt.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import matplotlib.pyplot as plt
+import pandas as pd
+from .utils import undo_hyperlink
+def plot_avg_correlation(df1, df2):
+    """
+    Plots the "average" column for each unique model that appears in both dataframes.
+    Parameters:
+    - df1: pandas DataFrame containing columns "model" and "average".
+    - df2: pandas DataFrame containing columns "model" and "average".
+    """
+    # Identify the unique models that appear in both DataFrames
+    common_models = pd.Series(list(set(df1['model']) & set(df2['model'])))
+    # Set up the plot
+    plt.figure(figsize=(13, 6), constrained_layout=True)
+    # axes from 0 to 1 for x and y
+    plt.xlim(0.475, 0.8)
+    plt.ylim(0.475, 0.8)
+    # larger font (16)
+    plt.rcParams.update({'font.size': 12, 'axes.labelsize': 14,'axes.titlesize': 14})
+    # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
+    # plt.tight_layout()
+    # plt.margins(0,0)
+    for model in common_models:
+        # Filter data for the current model
+        df1_model_data = df1[df1['model'] == model]['average'].values
+        df2_model_data = df2[df2['model'] == model]['average'].values
+        # Plotting
+        plt.scatter(df1_model_data, df2_model_data, label=model)
+        m_name = undo_hyperlink(model)
+        if m_name == "No text found":
+            m_name = "Random"
+        # Add text above each point like
+        # plt.text(x[i] + 0.1, y[i] + 0.1, label, ha='left', va='bottom')
+        plt.text(df1_model_data - .005, df2_model_data, m_name, horizontalalignment='right', verticalalignment='center')
+    # add correlation line to scatter plot
+    # first, compute correlation
+    corr = df1['average'].corr(df2['average'])
+    # add correlation line based on corr
+    plt.xlabel('HERM Eval. Set Avg.', fontsize=16)
+    plt.ylabel('Pref. Test Sets Avg.', fontsize=16)
+    # plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
+    return plt

src/utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pathlib import Path
 from datasets import load_dataset
 import numpy as np
 import os
 # From Open LLM Leaderboard
 def model_hyperlink(link, model_name):
@@ -10,6 +11,17 @@ def model_hyperlink(link, model_name):
         return "random"
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
 # Define a function to fetch and process data
 def load_all_data(data_repo, subdir:str, subsubsets=False):    # use HF api to pull the git repo
     dir = Path(data_repo)

 from datasets import load_dataset
 import numpy as np
 import os
+import re
 # From Open LLM Leaderboard
 def model_hyperlink(link, model_name):
         return "random"
     return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
+def undo_hyperlink(html_string):
+    # Regex pattern to match content inside > and <
+    pattern = r'>[^<]+<'
+    match = re.search(pattern, html_string)
+    if match:
+        # Extract the matched text and remove leading '>' and trailing '<'
+        return match.group(0)[1:-1]
+    else:
+        return "No text found"
 # Define a function to fetch and process data
 def load_all_data(data_repo, subdir:str, subsubsets=False):    # use HF api to pull the git repo
     dir = Path(data_repo)