Olas Predict Actual Performance

import gradio as gr
import pandas as pd

tools = pd.read_csv("./data/tools.csv")
# all_trades = pd.read_csv('./data/all_trades_profitability.csv')

demo = gr.Blocks()

INC_TOOLS = [
    'prediction-online', 
    'prediction-offline', 
    'claude-prediction-online', 
    'claude-prediction-offline', 
    'prediction-offline-sme',
    'prediction-online-sme',
    'prediction-request-rag',
    'prediction-request-reasoning',
    'prediction-url-cot-claude', 
    'prediction-request-rag-claude',
    'prediction-request-reasoning-claude'
]

def set_error(row):
    if row.error not in [True, False]:
        if not row.prompt_response:
            return True
        return False
    return row.error

def get_error_data():
    tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
    tools_inc['error'] = tools_inc.apply(set_error, axis=1)
    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
    error['error_perc'] = (error[True] / (error[False] + error[True]))*100
    error['total_requests'] = error[False] + error[True]

    return error

def get_error_data_all(error):
    error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True:'sum'}).reset_index()
    error_total['error_perc'] = (error_total[True] / error_total['total_requests'])*100
    # convert column name to string
    error_total.columns = error_total.columns.astype(str)
    # format all values to 4 decimal places for error_perc
    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
    return error_total

error = get_error_data()
error_all = get_error_data_all(error)
print(error_all.head())

with demo:
    gr.HTML("<h1>Olas Predict Actual Performance</h1>")
    gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")

    with gr.Tabs():
        with gr.TabItem("🔥 Error Dashboard"):
            with gr.Row():
                gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
            with gr.Row():
                # plot 
                with gr.Column():
                    gr.LinePlot(
                        value=error_all,
                        x="request_month_year_week",
                        y="error_perc",
                        title="Error Percentage",
                        x_title="Week",
                        y_title="Error Percentage",
                        height=400,
                        show_label=True
                    )
            gr.Markdown("This plot shows the percentage of requests that resulted in an error.")
            
            # Dropdown for selecting the tool
            sel_tool = gr.Dropdown(
                value="prediction-online", 
                choices=INC_TOOLS, 
                label="Select a tool"
            )
            plot_tool_error = gr.LinePlot(
                title="Error Percentage", 
                x_title="Week", 
                y_title="Error Percentage",
                render=False
            )
            
            # Dropdown for selecting the week
            sel_week = gr.Dropdown(
                value=error['request_month_year_week'].iloc[-1], 
                choices=error['request_month_year_week'].unique().tolist(), 
                label="Select a week"
            )
            plot_week_error = gr.BarPlot(
                title="Error Percentage", 
                x_title="Tool", 
                y_title="Error Percentage",
                render=False
            )

            def update_tool_plot(selected_tool):
                filtered_data = error[error['tool'] == selected_tool]
                # convert column name to string
                filtered_data.columns = filtered_data.columns.astype(str)
                # conver error_perc to 4 decimal place
                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
                print(filtered_data.head())
                return {
                    "x": filtered_data['request_month_year_week'].tolist(),
                    "y": filtered_data['error_perc'].tolist(),
                }
            
            def update_week_plot(selected_week):
                filtered_data = error[error['request_month_year_week'] == selected_week]
                filtered_data.columns = filtered_data.columns.astype(str)
                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
                print(filtered_data.head())
                return {
                    "x": filtered_data['tool'].tolist(),
                    "y": filtered_data['error_perc'].tolist(),
                }

            sel_tool.change(fn=update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
            sel_week.change(fn=update_week_plot, inputs=sel_week, outputs=plot_week_error)
            
            with gr.Row():
                plot_tool_error.render()
            with gr.Row():
                plot_week_error.render()
                
        with gr.TabItem("ℹ️ About"):
            with gr.Accordion("About the Benchmark", open=False):
                gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")

demo.queue(default_concurrency_limit=40).launch()