kamil1300 commited on
Commit
cff4af2
·
verified ·
1 Parent(s): 9cb298e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -160
app.py CHANGED
@@ -4,122 +4,42 @@ import gradio as gr
4
  import requests
5
  import pandas as pd
6
  import json
7
- import re
8
- import string
9
- import warnings
10
- import numpy as np
11
  from agent.agent import chat_with_agent
12
 
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
- # --- Scoring Functions ---
17
- def normalize_number_str(number_str: str) -> float:
18
- # we replace these common units and commas to allow
19
- # conversion to float
20
- for char in ["$", "%", ","]:
21
- number_str = number_str.replace(char, "")
22
- try:
23
- return float(number_str)
24
- except ValueError:
25
- print(f"String {number_str} cannot be normalized to number str.")
26
- return float("inf")
27
-
28
- def split_string(
29
- s: str,
30
- char_list: list[str] = [",", ";"],
31
- ) -> list[str]:
32
- pattern = f"[{''.join(char_list)}]"
33
- return re.split(pattern, s)
34
-
35
- def normalize_str(input_str, remove_punct=True) -> str:
36
- """
37
- Normalize a string by:
38
- - Removing all white spaces
39
- - Optionally removing punctuation (if remove_punct is True)
40
- - Converting to lowercase
41
- Parameters:
42
- - input_str: str, the string to normalize
43
- - remove_punct: bool, whether to remove punctuation (default: True)
44
- Returns:
45
- - str, the normalized string
46
- """
47
- # Remove all white spaces. Required e.g for seagull vs. sea gull
48
- no_spaces = re.sub(r"\s", "", input_str)
49
-
50
- # Remove punctuation, if specified.
51
- if remove_punct:
52
- translator = str.maketrans("", "", string.punctuation)
53
- return no_spaces.lower().translate(translator)
54
- else:
55
- return no_spaces.lower()
56
-
57
- def question_scorer(
58
- model_answer: str,
59
- ground_truth: str,
60
- ) -> bool:
61
- def is_float(element: any) -> bool:
62
- try:
63
- float(element)
64
- return True
65
- except ValueError:
66
- return False
67
-
68
- if model_answer is None:
69
- model_answer = "None"
70
-
71
- # if gt is a number
72
- if is_float(ground_truth):
73
- print(f"Evaluating {model_answer} as a number.")
74
- normalized_answer = normalize_number_str(model_answer)
75
- return normalized_answer == float(ground_truth)
76
-
77
- # if gt is a list
78
- elif any(char in ground_truth for char in [",", ";"]):
79
- print(f"Evaluating {model_answer} as a comma separated list.")
80
- # question with the fish: normalization removes punct
81
-
82
- gt_elems = split_string(ground_truth)
83
- ma_elems = split_string(model_answer)
84
-
85
- # check length is the same
86
- if len(gt_elems) != len(ma_elems):
87
- warnings.warn(
88
- "Answer lists have different lengths, returning False.", UserWarning
89
- )
90
- return False
91
-
92
- # compare each element as float or str
93
- comparisons = []
94
- for ma_elem, gt_elem in zip(ma_elems, gt_elems):
95
- if is_float(gt_elem):
96
- normalized_ma_elem = normalize_number_str(ma_elem)
97
- comparisons.append(normalized_ma_elem == float(gt_elem))
98
- else:
99
- # we do not remove punct since comparisons can include punct
100
- comparisons.append(
101
- normalize_str(ma_elem, remove_punct=False)
102
- == normalize_str(gt_elem, remove_punct=False)
103
- )
104
- return all(comparisons)
105
-
106
- # if gt is a str
107
- else:
108
- print(f"Evaluating {model_answer} as a string.")
109
- return normalize_str(model_answer) == normalize_str(ground_truth)
110
-
111
  # --- Agent Definition ---
112
  class BasicAgent:
113
- def __call__(self, question: str) -> dict:
114
- print(f"Agent received question (first 50 chars): {question[:50]}...")
115
- # Get response from the agent
116
  answer = chat_with_agent(question)
 
 
 
 
 
 
 
 
117
 
118
- # Return in the format expected by the API
119
- return {
120
- "model_answer": answer,
121
- "reasoning_trace": answer # Using the full response as reasoning trace
122
- }
 
 
 
 
 
 
 
 
 
 
123
 
124
  def run_and_submit_all(username_input=""):
125
  """
@@ -148,7 +68,7 @@ def run_and_submit_all(username_input=""):
148
  print(f"Error instantiating agent: {e}")
149
  return f"Error initializing agent: {e}", None
150
 
151
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/your-space-id/tree/main"
152
  print(agent_code)
153
 
154
  # 2. Fetch Questions
@@ -160,69 +80,96 @@ def run_and_submit_all(username_input=""):
160
  if not questions_data:
161
  print("Fetched questions list is empty.")
162
  return "Fetched questions list is empty or invalid format.", None
163
- print(f"Fetched {len(questions_data)} questions.")
164
- except requests.exceptions.RequestException as e:
 
 
 
165
  print(f"Error fetching questions: {e}")
166
  return f"Error fetching questions: {e}", None
167
- except requests.exceptions.JSONDecodeError as e:
168
- print(f"Error decoding JSON response from questions endpoint: {e}")
169
- print(f"Response text: {response.text[:500]}")
170
- return f"Error decoding server response for questions: {e}", None
171
- except Exception as e:
172
- print(f"An unexpected error occurred fetching questions: {e}")
173
- return f"An unexpected error occurred fetching questions: {e}", None
174
 
175
  # 3. Run your Agent
176
  results_log = []
177
  answers_payload = []
178
  print(f"Running agent on {len(questions_data)} questions...")
 
179
  for item in questions_data:
180
  task_id = item.get("task_id")
181
  question_text = item.get("question")
182
- ground_truth = item.get("ground_truth", "") # Get ground truth if available
183
 
184
  if not task_id or question_text is None:
185
  print(f"Skipping item with missing task_id or question: {item}")
186
  continue
 
187
  try:
188
- # Get structured response from agent
189
- agent_response = agent(question_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
 
191
- # Extract model_answer and reasoning_trace
192
- model_answer = agent_response.get("model_answer", "")
193
- reasoning_trace = agent_response.get("reasoning_trace", "")
194
 
195
- # Score the answer if ground truth is available
196
- score = None
197
- if ground_truth:
198
- score = question_scorer(model_answer, ground_truth)
199
 
200
- # Create JSON-line format entry
201
- json_line_entry = {
202
  "task_id": task_id,
203
- "model_answer": model_answer,
204
- "reasoning_trace": reasoning_trace
205
  }
206
 
207
- answers_payload.append(json_line_entry)
 
 
208
 
209
  # For display in the table, show truncated versions
210
  display_question = question_text[:200] + "..." if len(question_text) > 200 else question_text
211
- display_answer = model_answer[:200] + "..." if len(model_answer) > 200 else model_answer
212
 
213
  results_log.append({
214
  "Task ID": task_id,
215
  "Question": display_question,
216
  "Model Answer": display_answer,
217
- "Score": "" if score else "✗" if score is False else "N/A"
218
  })
219
 
220
  except Exception as e:
221
  print(f"Error running agent on task {task_id}: {e}")
222
  error_response = {
223
  "task_id": task_id,
224
- "model_answer": f"AGENT ERROR: {e}",
225
- "reasoning_trace": f"Agent encountered an error while processing the question: {str(e)}"
226
  }
227
  answers_payload.append(error_response)
228
  results_log.append({
@@ -236,8 +183,20 @@ def run_and_submit_all(username_input=""):
236
  print("Agent did not produce any answers to submit.")
237
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
238
 
239
- # 4. Prepare Submission
240
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
 
 
 
 
 
 
 
 
 
 
 
 
241
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
242
  print(status_update)
243
 
@@ -257,29 +216,8 @@ def run_and_submit_all(username_input=""):
257
  print("Submission successful.")
258
  results_df = pd.DataFrame(results_log)
259
  return final_status, results_df
260
- except requests.exceptions.HTTPError as e:
261
- error_detail = f"Server responded with status {e.response.status_code}."
262
- try:
263
- error_json = e.response.json()
264
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
265
- except requests.exceptions.JSONDecodeError:
266
- error_detail += f" Response: {e.response.text[:500]}"
267
- status_message = f"Submission Failed: {error_detail}"
268
- print(status_message)
269
- results_df = pd.DataFrame(results_log)
270
- return status_message, results_df
271
- except requests.exceptions.Timeout:
272
- status_message = "Submission Failed: The request timed out."
273
- print(status_message)
274
- results_df = pd.DataFrame(results_log)
275
- return status_message, results_df
276
- except requests.exceptions.RequestException as e:
277
- status_message = f"Submission Failed: Network error - {e}"
278
- print(status_message)
279
- results_df = pd.DataFrame(results_log)
280
- return status_message, results_df
281
  except Exception as e:
282
- status_message = f"An unexpected error occurred during submission: {e}"
283
  print(status_message)
284
  results_df = pd.DataFrame(results_log)
285
  return status_message, results_df
 
4
  import requests
5
  import pandas as pd
6
  import json
7
+ import time
 
 
 
8
  from agent.agent import chat_with_agent
9
 
10
  # --- Constants ---
11
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # --- Agent Definition ---
14
  class BasicAgent:
15
+ def __call__(self, question: str) -> str:
16
+ print(f"Agent received question: {question}")
17
+ # Get response from the agent using your LLM
18
  answer = chat_with_agent(question)
19
+ return answer.strip() # Return just the clean answer
20
+
21
+ def download_task_file(task_id, api_url):
22
+ """Download file associated with a task ID"""
23
+ url = f"{api_url}/files/{task_id}"
24
+
25
+ try:
26
+ response = requests.get(url)
27
 
28
+ if response.status_code == 200:
29
+ try:
30
+ content = response.text
31
+ if len(content) > 50000: # Limit to 50KB
32
+ content = content[:50000]
33
+ return content
34
+ except UnicodeDecodeError:
35
+ return f"[Binary file content - {len(response.content)} bytes]"
36
+ elif response.status_code == 404:
37
+ return None
38
+ else:
39
+ return None
40
+
41
+ except Exception as e:
42
+ return None
43
 
44
  def run_and_submit_all(username_input=""):
45
  """
 
68
  print(f"Error instantiating agent: {e}")
69
  return f"Error initializing agent: {e}", None
70
 
71
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/kamil1300/agent_course/tree/main"
72
  print(agent_code)
73
 
74
  # 2. Fetch Questions
 
80
  if not questions_data:
81
  print("Fetched questions list is empty.")
82
  return "Fetched questions list is empty or invalid format.", None
83
+
84
+ # Limit to only 20 questions
85
+ questions_data = questions_data[:20]
86
+ print(f"Fetched {len(questions_data)} questions (limited to 20).")
87
+ except Exception as e:
88
  print(f"Error fetching questions: {e}")
89
  return f"Error fetching questions: {e}", None
 
 
 
 
 
 
 
90
 
91
  # 3. Run your Agent
92
  results_log = []
93
  answers_payload = []
94
  print(f"Running agent on {len(questions_data)} questions...")
95
+
96
  for item in questions_data:
97
  task_id = item.get("task_id")
98
  question_text = item.get("question")
 
99
 
100
  if not task_id or question_text is None:
101
  print(f"Skipping item with missing task_id or question: {item}")
102
  continue
103
+
104
  try:
105
+ # Download task file if available
106
+ task_file_content = download_task_file(task_id, api_url)
107
+
108
+ # Prepare the full context for the agent
109
+ if task_file_content:
110
+ full_context = f"Context/File Content:\n{task_file_content}\n\nQuestion: {question_text}"
111
+ print(f"\n--- Question {task_id} ---")
112
+ print(f"Question: {question_text}")
113
+ print(f"File content length: {len(task_file_content)} characters")
114
+ print(f"File content preview: {task_file_content[:200]}...")
115
+ else:
116
+ full_context = question_text
117
+ print(f"\n--- Question {task_id} ---")
118
+ print(f"Question: {question_text}")
119
+ print("No file content available")
120
+
121
+ # Get answer from your LLM agent with full context
122
+ submitted_answer = agent(full_context)
123
+
124
+ # Clean up the answer - extract only the final answer after "FINAL ANSWER:"
125
+ if "FINAL ANSWER:" in submitted_answer:
126
+ submitted_answer = submitted_answer.split("FINAL ANSWER:")[-1].strip()
127
+
128
+ # Remove any extra explanations or context
129
+ if "\n\n" in submitted_answer:
130
+ submitted_answer = submitted_answer.split("\n\n")[0].strip()
131
+
132
+ # Take only the first sentence if it's still too long
133
+ if len(submitted_answer.split()) > 5:
134
+ submitted_answer = submitted_answer.split('.')[0].strip()
135
+
136
+ # Better answer cleaning
137
+ submitted_answer = submitted_answer.strip()
138
+ submitted_answer = submitted_answer.replace('"', '') # Remove quotes
139
+ submitted_answer = submitted_answer.lower() # Standardize case
140
 
141
+ # Print the answer for debugging
142
+ print(f"Answer: {submitted_answer}")
 
143
 
144
+ # Small delay to avoid overwhelming the API
145
+ time.sleep(1)
 
 
146
 
147
+ # Create answer entry in the required format
148
+ answer_entry = {
149
  "task_id": task_id,
150
+ "submitted_answer": submitted_answer
 
151
  }
152
 
153
+ answers_payload.append(answer_entry)
154
+ print(f"Answer Entry: {answer_entry}")
155
+ print("-" * 50)
156
 
157
  # For display in the table, show truncated versions
158
  display_question = question_text[:200] + "..." if len(question_text) > 200 else question_text
159
+ display_answer = submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
160
 
161
  results_log.append({
162
  "Task ID": task_id,
163
  "Question": display_question,
164
  "Model Answer": display_answer,
165
+ "Score": "N/A" # No scoring since ground truth not available
166
  })
167
 
168
  except Exception as e:
169
  print(f"Error running agent on task {task_id}: {e}")
170
  error_response = {
171
  "task_id": task_id,
172
+ "submitted_answer": f"AGENT ERROR: {e}"
 
173
  }
174
  answers_payload.append(error_response)
175
  results_log.append({
 
183
  print("Agent did not produce any answers to submit.")
184
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
185
 
186
+ # 4. Prepare Submission in the required format
187
+ submission_data = {
188
+ "username": username.strip(),
189
+ "agent_code": agent_code,
190
+ "answers": answers_payload
191
+ }
192
+
193
+ # Print the final submission format
194
+ print("\n" + "="*60)
195
+ print("FINAL SUBMISSION FORMAT:")
196
+ print("="*60)
197
+ print(json.dumps(submission_data, indent=2))
198
+ print("="*60)
199
+
200
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
201
  print(status_update)
202
 
 
216
  print("Submission successful.")
217
  results_df = pd.DataFrame(results_log)
218
  return final_status, results_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  except Exception as e:
220
+ status_message = f"Submission Failed: {e}"
221
  print(status_message)
222
  results_df = pd.DataFrame(results_log)
223
  return status_message, results_df