Spaces:

kamil1300
/

agent_course

Runtime error

App Files Files Community

kamil1300 commited on Jul 11

Commit

1c7b749

verified ·

1 Parent(s): 48944d2

Update app.py

Browse files

Files changed (1) hide show

app.py +138 -46

app.py CHANGED Viewed

@@ -1,21 +1,114 @@
-""" Basic Agent Evaluation Runner"""
 import os
-import inspect
 import gradio as gr
 import requests
 import pandas as pd
 from agent.agent import chat_with_agent
-# (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __call__(self, question: str) -> dict:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
@@ -28,34 +121,34 @@ class BasicAgent:
             "reasoning_trace": answer  # Using the full response as reasoning trace
         }
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
-    if profile:
-        username= f"{profile.username}"
-        print(f"User logged in: {username}")
     else:
-        print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
-    # 1. Instantiate Agent ( modify this part to create your agent)
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
-    # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
-    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     print(agent_code)
     # 2. Fetch Questions
@@ -86,6 +179,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
@@ -97,6 +192,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             model_answer = agent_response.get("model_answer", "")
             reasoning_trace = agent_response.get("reasoning_trace", "")
             # Create JSON-line format entry
             json_line_entry = {
                 "task_id": task_id,
@@ -113,7 +213,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             results_log.append({
                 "Task ID": task_id,
                 "Question": display_question,
-                "Model Answer": display_answer
             })
         except Exception as e:
@@ -127,7 +228,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
              results_log.append({
                  "Task ID": task_id,
                  "Question": question_text[:200] + "..." if question_text and len(question_text) > 200 else question_text,
-                 "Model Answer": f"AGENT ERROR: {e}"
              })
     if not answers_payload:
@@ -182,58 +284,48 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
-# --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
-    gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
-        1.  Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
-        2.  Log in to your Hugging Face account using the button below. This uses your HF username for submission.
-        3.  Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
-        ---
-        **Disclaimers:**
-        Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
-        This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
         """
     )
-    gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
-    # Removed max_rows=10 from DataFrame constructor
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
-    print("\n" + "-" * 30 + " App Starting " + "-" * 30)
-    # Print helpful startup info
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
-        print(f"   Runtime URL: https://{space_host_startup}.hf.space")
     else:
-        print("ℹ️  SPACE_HOST not found. Likely running locally.")
     if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
-        print(f"   Repo Tree: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
-        print("ℹ️  SPACE_ID not found. Repo URL cannot be determined.")
-    print("-" * 70)
-    print("Launching Gradio Interface for Basic Agent Evaluation...")
-    # Do NOT use share=True in Hugging Face Space
-    demo.launch()  # Hugging Face handles URL + SSR internally

+""" Agent Evaluation Runner"""
 import os
 import gradio as gr
 import requests
 import pandas as pd
+import json
+import re
+import string
+import warnings
+import numpy as np
 from agent.agent import chat_with_agent
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Scoring Functions ---
+def normalize_number_str(number_str: str) -> float:
+    # we replace these common units and commas to allow
+    # conversion to float
+    for char in ["$", "%", ","]:
+        number_str = number_str.replace(char, "")
+    try:
+        return float(number_str)
+    except ValueError:
+        print(f"String {number_str} cannot be normalized to number str.")
+        return float("inf")
+def split_string(
+    s: str,
+    char_list: list[str] = [",", ";"],
+) -> list[str]:
+    pattern = f"[{''.join(char_list)}]"
+    return re.split(pattern, s)
+def normalize_str(input_str, remove_punct=True) -> str:
+    """
+    Normalize a string by:
+    - Removing all white spaces
+    - Optionally removing punctuation (if remove_punct is True)
+    - Converting to lowercase
+    Parameters:
+    - input_str: str, the string to normalize
+    - remove_punct: bool, whether to remove punctuation (default: True)
+    Returns:
+    - str, the normalized string
+    """
+    # Remove all white spaces. Required e.g for seagull vs. sea gull
+    no_spaces = re.sub(r"\s", "", input_str)
+    # Remove punctuation, if specified.
+    if remove_punct:
+        translator = str.maketrans("", "", string.punctuation)
+        return no_spaces.lower().translate(translator)
+    else:
+        return no_spaces.lower()
+def question_scorer(
+    model_answer: str,
+    ground_truth: str,
+) -> bool:
+    def is_float(element: any) -> bool:
+        try:
+            float(element)
+            return True
+        except ValueError:
+            return False
+    if model_answer is None:
+        model_answer = "None"
+    # if gt is a number
+    if is_float(ground_truth):
+        print(f"Evaluating {model_answer} as a number.")
+        normalized_answer = normalize_number_str(model_answer)
+        return normalized_answer == float(ground_truth)
+    # if gt is a list
+    elif any(char in ground_truth for char in [",", ";"]):
+        print(f"Evaluating {model_answer} as a comma separated list.")
+        # question with the fish: normalization removes punct
+        gt_elems = split_string(ground_truth)
+        ma_elems = split_string(model_answer)
+        # check length is the same
+        if len(gt_elems) != len(ma_elems):
+            warnings.warn(
+                "Answer lists have different lengths, returning False.", UserWarning
+            )
+            return False
+        # compare each element as float or str
+        comparisons = []
+        for ma_elem, gt_elem in zip(ma_elems, gt_elems):
+            if is_float(gt_elem):
+                normalized_ma_elem = normalize_number_str(ma_elem)
+                comparisons.append(normalized_ma_elem == float(gt_elem))
+            else:
+                # we do not remove punct since comparisons can include punct
+                comparisons.append(
+                    normalize_str(ma_elem, remove_punct=False)
+                    == normalize_str(gt_elem, remove_punct=False)
+                )
+        return all(comparisons)
+    # if gt is a str
+    else:
+        print(f"Evaluating {model_answer} as a string.")
+        return normalize_str(model_answer) == normalize_str(ground_truth)
+# --- Agent Definition ---
 class BasicAgent:
     def __call__(self, question: str) -> dict:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
             "reasoning_trace": answer  # Using the full response as reasoning trace
         }
+def run_and_submit_all(username_input=""):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")
+    # Get username from input
+    if username_input:
+        username = username_input.strip()
+        print(f"Using provided username: {username}")
     else:
+        print("No username provided.")
+        return "Please provide a username.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    # 1. Instantiate Agent
     try:
         agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
+    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/your-space-id/tree/main"
     print(agent_code)
     # 2. Fetch Questions
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        ground_truth = item.get("ground_truth", "")  # Get ground truth if available
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
             model_answer = agent_response.get("model_answer", "")
             reasoning_trace = agent_response.get("reasoning_trace", "")
+            # Score the answer if ground truth is available
+            score = None
+            if ground_truth:
+                score = question_scorer(model_answer, ground_truth)
             # Create JSON-line format entry
             json_line_entry = {
                 "task_id": task_id,
             results_log.append({
                 "Task ID": task_id,
                 "Question": display_question,
+                "Model Answer": display_answer,
+                "Score": "✓" if score else "✗" if score is False else "N/A"
             })
         except Exception as e:
              results_log.append({
                  "Task ID": task_id,
                  "Question": question_text[:200] + "..." if question_text and len(question_text) > 200 else question_text,
+                 "Model Answer": f"AGENT ERROR: {e}",
+                 "Score": "ERROR"
              })
     if not answers_payload:
         results_df = pd.DataFrame(results_log)
         return status_message, results_df
+# --- Build Gradio Interface ---
 with gr.Blocks() as demo:
+    gr.Markdown("# Agent Evaluation Runner")
     gr.Markdown(
         """
         **Instructions:**
+        1. Enter your Hugging Face username in the text box below.
+        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
+        **Note:** This will take some time as the agent processes all questions.
         """
     )
+    username_input = gr.Textbox(label="Enter your Hugging Face username", placeholder="your_username")
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
     results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
     run_button.click(
         fn=run_and_submit_all,
+        inputs=[username_input],
         outputs=[status_output, results_table]
     )
 if __name__ == "__main__":
+    print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
     space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")
+        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
     else:
+        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")
     if space_id_startup:
         print(f"✅ SPACE_ID found: {space_id_startup}")
         print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
+        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
     else:
+        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
+    print("-"*(60 + len(" App Starting ")) + "\n")
+    print("Launching Gradio Interface for Agent Evaluation...")
+    demo.launch(debug=True, share=True)