kamil1300 commited on
Commit
1c7b749
·
verified ·
1 Parent(s): 48944d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -46
app.py CHANGED
@@ -1,21 +1,114 @@
1
- """ Basic Agent Evaluation Runner"""
2
  import os
3
- import inspect
4
  import gradio as gr
5
  import requests
6
  import pandas as pd
 
 
 
 
 
7
  from agent.agent import chat_with_agent
8
 
9
-
10
-
11
- # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
- # --- Basic Agent Definition ---
16
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
 
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  class BasicAgent:
20
  def __call__(self, question: str) -> dict:
21
  print(f"Agent received question (first 50 chars): {question[:50]}...")
@@ -28,34 +121,34 @@ class BasicAgent:
28
  "reasoning_trace": answer # Using the full response as reasoning trace
29
  }
30
 
31
-
32
- def run_and_submit_all( profile: gr.OAuthProfile | None):
33
  """
34
  Fetches all questions, runs the BasicAgent on them, submits all answers,
35
  and displays the results.
36
  """
37
  # --- Determine HF Space Runtime URL and Repo URL ---
38
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
39
 
40
- if profile:
41
- username= f"{profile.username}"
42
- print(f"User logged in: {username}")
 
43
  else:
44
- print("User not logged in.")
45
- return "Please Login to Hugging Face with the button.", None
46
 
47
  api_url = DEFAULT_API_URL
48
  questions_url = f"{api_url}/questions"
49
  submit_url = f"{api_url}/submit"
50
 
51
- # 1. Instantiate Agent ( modify this part to create your agent)
52
  try:
53
  agent = BasicAgent()
54
  except Exception as e:
55
  print(f"Error instantiating agent: {e}")
56
  return f"Error initializing agent: {e}", None
57
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
58
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
59
  print(agent_code)
60
 
61
  # 2. Fetch Questions
@@ -86,6 +179,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
86
  for item in questions_data:
87
  task_id = item.get("task_id")
88
  question_text = item.get("question")
 
 
89
  if not task_id or question_text is None:
90
  print(f"Skipping item with missing task_id or question: {item}")
91
  continue
@@ -97,6 +192,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
97
  model_answer = agent_response.get("model_answer", "")
98
  reasoning_trace = agent_response.get("reasoning_trace", "")
99
 
 
 
 
 
 
100
  # Create JSON-line format entry
101
  json_line_entry = {
102
  "task_id": task_id,
@@ -113,7 +213,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
113
  results_log.append({
114
  "Task ID": task_id,
115
  "Question": display_question,
116
- "Model Answer": display_answer
 
117
  })
118
 
119
  except Exception as e:
@@ -127,7 +228,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
127
  results_log.append({
128
  "Task ID": task_id,
129
  "Question": question_text[:200] + "..." if question_text and len(question_text) > 200 else question_text,
130
- "Model Answer": f"AGENT ERROR: {e}"
 
131
  })
132
 
133
  if not answers_payload:
@@ -182,58 +284,48 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
182
  results_df = pd.DataFrame(results_log)
183
  return status_message, results_df
184
 
185
-
186
- # --- Build Gradio Interface using Blocks ---
187
  with gr.Blocks() as demo:
188
- gr.Markdown("# Basic Agent Evaluation Runner")
189
  gr.Markdown(
190
  """
191
  **Instructions:**
192
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
193
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
194
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
195
- ---
196
- **Disclaimers:**
197
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
198
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
199
  """
200
  )
201
 
202
- gr.LoginButton()
203
-
204
  run_button = gr.Button("Run Evaluation & Submit All Answers")
205
-
206
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
207
- # Removed max_rows=10 from DataFrame constructor
208
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
209
 
210
  run_button.click(
211
  fn=run_and_submit_all,
 
212
  outputs=[status_output, results_table]
213
  )
214
 
215
  if __name__ == "__main__":
216
- print("\n" + "-" * 30 + " App Starting " + "-" * 30)
217
-
218
- # Print helpful startup info
219
  space_host_startup = os.getenv("SPACE_HOST")
220
  space_id_startup = os.getenv("SPACE_ID")
221
 
222
  if space_host_startup:
223
  print(f"✅ SPACE_HOST found: {space_host_startup}")
224
- print(f" Runtime URL: https://{space_host_startup}.hf.space")
225
  else:
226
- print("ℹ️ SPACE_HOST not found. Likely running locally.")
227
 
228
  if space_id_startup:
229
  print(f"✅ SPACE_ID found: {space_id_startup}")
230
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
231
- print(f" Repo Tree: https://huggingface.co/spaces/{space_id_startup}/tree/main")
232
  else:
233
- print("ℹ️ SPACE_ID not found. Repo URL cannot be determined.")
234
-
235
- print("-" * 70)
236
- print("Launching Gradio Interface for Basic Agent Evaluation...")
237
 
238
- # Do NOT use share=True in Hugging Face Space
239
- demo.launch() # Hugging Face handles URL + SSR internally
 
 
1
+ """ Agent Evaluation Runner"""
2
  import os
 
3
  import gradio as gr
4
  import requests
5
  import pandas as pd
6
+ import json
7
+ import re
8
+ import string
9
+ import warnings
10
+ import numpy as np
11
  from agent.agent import chat_with_agent
12
 
 
 
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
+ # --- Scoring Functions ---
17
+ def normalize_number_str(number_str: str) -> float:
18
+ # we replace these common units and commas to allow
19
+ # conversion to float
20
+ for char in ["$", "%", ","]:
21
+ number_str = number_str.replace(char, "")
22
+ try:
23
+ return float(number_str)
24
+ except ValueError:
25
+ print(f"String {number_str} cannot be normalized to number str.")
26
+ return float("inf")
27
+
28
+ def split_string(
29
+ s: str,
30
+ char_list: list[str] = [",", ";"],
31
+ ) -> list[str]:
32
+ pattern = f"[{''.join(char_list)}]"
33
+ return re.split(pattern, s)
34
+
35
+ def normalize_str(input_str, remove_punct=True) -> str:
36
+ """
37
+ Normalize a string by:
38
+ - Removing all white spaces
39
+ - Optionally removing punctuation (if remove_punct is True)
40
+ - Converting to lowercase
41
+ Parameters:
42
+ - input_str: str, the string to normalize
43
+ - remove_punct: bool, whether to remove punctuation (default: True)
44
+ Returns:
45
+ - str, the normalized string
46
+ """
47
+ # Remove all white spaces. Required e.g for seagull vs. sea gull
48
+ no_spaces = re.sub(r"\s", "", input_str)
49
+
50
+ # Remove punctuation, if specified.
51
+ if remove_punct:
52
+ translator = str.maketrans("", "", string.punctuation)
53
+ return no_spaces.lower().translate(translator)
54
+ else:
55
+ return no_spaces.lower()
56
+
57
+ def question_scorer(
58
+ model_answer: str,
59
+ ground_truth: str,
60
+ ) -> bool:
61
+ def is_float(element: any) -> bool:
62
+ try:
63
+ float(element)
64
+ return True
65
+ except ValueError:
66
+ return False
67
+
68
+ if model_answer is None:
69
+ model_answer = "None"
70
+
71
+ # if gt is a number
72
+ if is_float(ground_truth):
73
+ print(f"Evaluating {model_answer} as a number.")
74
+ normalized_answer = normalize_number_str(model_answer)
75
+ return normalized_answer == float(ground_truth)
76
+
77
+ # if gt is a list
78
+ elif any(char in ground_truth for char in [",", ";"]):
79
+ print(f"Evaluating {model_answer} as a comma separated list.")
80
+ # question with the fish: normalization removes punct
81
 
82
+ gt_elems = split_string(ground_truth)
83
+ ma_elems = split_string(model_answer)
84
 
85
+ # check length is the same
86
+ if len(gt_elems) != len(ma_elems):
87
+ warnings.warn(
88
+ "Answer lists have different lengths, returning False.", UserWarning
89
+ )
90
+ return False
91
+
92
+ # compare each element as float or str
93
+ comparisons = []
94
+ for ma_elem, gt_elem in zip(ma_elems, gt_elems):
95
+ if is_float(gt_elem):
96
+ normalized_ma_elem = normalize_number_str(ma_elem)
97
+ comparisons.append(normalized_ma_elem == float(gt_elem))
98
+ else:
99
+ # we do not remove punct since comparisons can include punct
100
+ comparisons.append(
101
+ normalize_str(ma_elem, remove_punct=False)
102
+ == normalize_str(gt_elem, remove_punct=False)
103
+ )
104
+ return all(comparisons)
105
+
106
+ # if gt is a str
107
+ else:
108
+ print(f"Evaluating {model_answer} as a string.")
109
+ return normalize_str(model_answer) == normalize_str(ground_truth)
110
+
111
+ # --- Agent Definition ---
112
  class BasicAgent:
113
  def __call__(self, question: str) -> dict:
114
  print(f"Agent received question (first 50 chars): {question[:50]}...")
 
121
  "reasoning_trace": answer # Using the full response as reasoning trace
122
  }
123
 
124
+ def run_and_submit_all(username_input=""):
 
125
  """
126
  Fetches all questions, runs the BasicAgent on them, submits all answers,
127
  and displays the results.
128
  """
129
  # --- Determine HF Space Runtime URL and Repo URL ---
130
+ space_id = os.getenv("SPACE_ID")
131
 
132
+ # Get username from input
133
+ if username_input:
134
+ username = username_input.strip()
135
+ print(f"Using provided username: {username}")
136
  else:
137
+ print("No username provided.")
138
+ return "Please provide a username.", None
139
 
140
  api_url = DEFAULT_API_URL
141
  questions_url = f"{api_url}/questions"
142
  submit_url = f"{api_url}/submit"
143
 
144
+ # 1. Instantiate Agent
145
  try:
146
  agent = BasicAgent()
147
  except Exception as e:
148
  print(f"Error instantiating agent: {e}")
149
  return f"Error initializing agent: {e}", None
150
+
151
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/your-space-id/tree/main"
152
  print(agent_code)
153
 
154
  # 2. Fetch Questions
 
179
  for item in questions_data:
180
  task_id = item.get("task_id")
181
  question_text = item.get("question")
182
+ ground_truth = item.get("ground_truth", "") # Get ground truth if available
183
+
184
  if not task_id or question_text is None:
185
  print(f"Skipping item with missing task_id or question: {item}")
186
  continue
 
192
  model_answer = agent_response.get("model_answer", "")
193
  reasoning_trace = agent_response.get("reasoning_trace", "")
194
 
195
+ # Score the answer if ground truth is available
196
+ score = None
197
+ if ground_truth:
198
+ score = question_scorer(model_answer, ground_truth)
199
+
200
  # Create JSON-line format entry
201
  json_line_entry = {
202
  "task_id": task_id,
 
213
  results_log.append({
214
  "Task ID": task_id,
215
  "Question": display_question,
216
+ "Model Answer": display_answer,
217
+ "Score": "✓" if score else "✗" if score is False else "N/A"
218
  })
219
 
220
  except Exception as e:
 
228
  results_log.append({
229
  "Task ID": task_id,
230
  "Question": question_text[:200] + "..." if question_text and len(question_text) > 200 else question_text,
231
+ "Model Answer": f"AGENT ERROR: {e}",
232
+ "Score": "ERROR"
233
  })
234
 
235
  if not answers_payload:
 
284
  results_df = pd.DataFrame(results_log)
285
  return status_message, results_df
286
 
287
+ # --- Build Gradio Interface ---
 
288
  with gr.Blocks() as demo:
289
+ gr.Markdown("# Agent Evaluation Runner")
290
  gr.Markdown(
291
  """
292
  **Instructions:**
293
+ 1. Enter your Hugging Face username in the text box below.
294
+ 2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
295
+
296
+ **Note:** This will take some time as the agent processes all questions.
 
 
 
297
  """
298
  )
299
 
300
+ username_input = gr.Textbox(label="Enter your Hugging Face username", placeholder="your_username")
 
301
  run_button = gr.Button("Run Evaluation & Submit All Answers")
 
302
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
303
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
304
 
305
  run_button.click(
306
  fn=run_and_submit_all,
307
+ inputs=[username_input],
308
  outputs=[status_output, results_table]
309
  )
310
 
311
  if __name__ == "__main__":
312
+ print("\n" + "-"*30 + " App Starting " + "-"*30)
 
 
313
  space_host_startup = os.getenv("SPACE_HOST")
314
  space_id_startup = os.getenv("SPACE_ID")
315
 
316
  if space_host_startup:
317
  print(f"✅ SPACE_HOST found: {space_host_startup}")
318
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
319
  else:
320
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
321
 
322
  if space_id_startup:
323
  print(f"✅ SPACE_ID found: {space_id_startup}")
324
  print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
325
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
326
  else:
327
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
 
 
 
328
 
329
+ print("-"*(60 + len(" App Starting ")) + "\n")
330
+ print("Launching Gradio Interface for Agent Evaluation...")
331
+ demo.launch(debug=True, share=True)