Spaces:

duckdb-nsql-hub
/

DuckDB-SQL-Eval

Sleeping

File size: 4,445 Bytes

977063a
edbe15e
 
 
977063a
edbe15e
 
 
 
 
 
 
 
 
 
 
42f819f
edbe15e
42f819f
 
5051da6
edbe15e
 
 
 
 
 
4babb59
 
edbe15e
4babb59
edbe15e
 
 
 
4babb59
 
 
 
edbe15e
 
 
 
 
 
 
470a9a5
acfff07
5051da6
edbe15e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
470a9a5
 
 
0524e8d
 
 
470a9a5
edbe15e
470a9a5
 
5051da6
49c6a0b
5051da6
edbe15e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ea5642
 
 
 
 
edbe15e
 
 
 
 
 
977063a
b9dc6d6

import gradio as gr
import os
from evaluation_logic import run_evaluation
from eval.predict import PROMPT_FORMATTERS

PROMPT_TEMPLATES = {
    "duckdbinstgraniteshort": PROMPT_FORMATTERS["duckdbinstgraniteshort"]().PROMPT_TEMPLATE,
    "duckdbinst": PROMPT_FORMATTERS["duckdbinst"]().PROMPT_TEMPLATE,
}

def gradio_run_evaluation(inference_api, model_name, prompt_format, openrouter_token=None, custom_prompt=None):
    # Set environment variable if OpenRouter token is provided
    if inference_api == "openrouter":
        os.environ["OPENROUTER_API_KEY"] = str(openrouter_token)

    # We now pass both the format name and content to evaluation
    output = []
    for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format, custom_prompt):
        output.append(result)
        yield "\n".join(output)

def update_token_visibility(api):
    """Update visibility of the OpenRouter token input"""
    return gr.update(visible=api == "openrouter")

def update_prompt_template(prompt_format):
    """Update the template content when a preset is selected"""
    if prompt_format == "custom":  # Don't update content when switching to custom
        return gr.update()  # Skip update
    if prompt_format in PROMPT_TEMPLATES:
        return PROMPT_TEMPLATES[prompt_format]
    return ""

def handle_template_edit(prompt_format, new_template):
    """Handle when user edits the template"""
    # If we're already in custom mode, don't trigger another format change
    if prompt_format == "custom":
        return gr.update()

    # If the template matches a preset exactly, keep the preset name
    for format_name, template in PROMPT_TEMPLATES.items():
        if template.strip() == new_template.strip():
            return format_name
    # Otherwise switch to custom
    return "custom"

with gr.Blocks(gr.themes.Soft()) as demo:
    gr.Markdown("# DuckDB SQL Evaluation App")

    with gr.Row():
        with gr.Column():
            inference_api = gr.Dropdown(
                label="Inference API",
                choices=['openrouter'],
                value="openrouter"
            )

            openrouter_token = gr.Textbox(
                label="OpenRouter API Token",
                placeholder="Enter your OpenRouter API token",
                type="password",
                visible=True
            )

            model_name = gr.Textbox(
                label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)"
            )

            gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")

    with gr.Row():
        with gr.Column():
            # Add 'custom' to the choices
            prompt_format = gr.Dropdown(
                label="Prompt Format",
                choices=['duckdbinst', 'duckdbinstgraniteshort', 'custom'],
                value="duckdbinstgraniteshort"
            )

            custom_prompt = gr.TextArea(
                label="Prompt Template Content",
                placeholder="Enter your custom prompt template here or select a preset format above.",
                lines=10,
                value=PROMPT_TEMPLATES['duckdbinstgraniteshort']  # Set initial value
            )

    gr.Examples(
        examples=[
            ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst", "", ""],
            ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort", "", ""],
            ["openrouter", "mistralai/mistral-nemo", "duckdbinst", "", ""],
        ],
        inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
    )

    start_btn = gr.Button("Start Evaluation")
    output = gr.Textbox(label="Output", lines=20)

    # Update token visibility
    inference_api.change(
        fn=update_token_visibility,
        inputs=[inference_api],
        outputs=[openrouter_token]
    )

    # Update template content when preset is selected
    prompt_format.change(
        fn=update_prompt_template,
        inputs=[prompt_format],
        outputs=[custom_prompt]
    )

    # Update format dropdown when template is edited
    custom_prompt.change(
        fn=handle_template_edit,
        inputs=[prompt_format, custom_prompt],
        outputs=[prompt_format]
    )

    start_btn.click(
        fn=gradio_run_evaluation,
        inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
        outputs=output
    )

demo.queue().launch()