agent_course / app.py
kamil1300's picture
Update app.py
cff4af2 verified
""" Agent Evaluation Runner"""
import os
import gradio as gr
import requests
import pandas as pd
import json
import time
from agent.agent import chat_with_agent
# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
# --- Agent Definition ---
class BasicAgent:
def __call__(self, question: str) -> str:
print(f"Agent received question: {question}")
# Get response from the agent using your LLM
answer = chat_with_agent(question)
return answer.strip() # Return just the clean answer
def download_task_file(task_id, api_url):
"""Download file associated with a task ID"""
url = f"{api_url}/files/{task_id}"
try:
response = requests.get(url)
if response.status_code == 200:
try:
content = response.text
if len(content) > 50000: # Limit to 50KB
content = content[:50000]
return content
except UnicodeDecodeError:
return f"[Binary file content - {len(response.content)} bytes]"
elif response.status_code == 404:
return None
else:
return None
except Exception as e:
return None
def run_and_submit_all(username_input=""):
"""
Fetches all questions, runs the BasicAgent on them, submits all answers,
and displays the results.
"""
# --- Determine HF Space Runtime URL and Repo URL ---
space_id = os.getenv("SPACE_ID")
# Get username from input
if username_input:
username = username_input.strip()
print(f"Using provided username: {username}")
else:
print("No username provided.")
return "Please provide a username.", None
api_url = DEFAULT_API_URL
questions_url = f"{api_url}/questions"
submit_url = f"{api_url}/submit"
# 1. Instantiate Agent
try:
agent = BasicAgent()
except Exception as e:
print(f"Error instantiating agent: {e}")
return f"Error initializing agent: {e}", None
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main" if space_id else "https://huggingface.co/spaces/kamil1300/agent_course/tree/main"
print(agent_code)
# 2. Fetch Questions
print(f"Fetching questions from: {questions_url}")
try:
response = requests.get(questions_url, timeout=15)
response.raise_for_status()
questions_data = response.json()
if not questions_data:
print("Fetched questions list is empty.")
return "Fetched questions list is empty or invalid format.", None
# Limit to only 20 questions
questions_data = questions_data[:20]
print(f"Fetched {len(questions_data)} questions (limited to 20).")
except Exception as e:
print(f"Error fetching questions: {e}")
return f"Error fetching questions: {e}", None
# 3. Run your Agent
results_log = []
answers_payload = []
print(f"Running agent on {len(questions_data)} questions...")
for item in questions_data:
task_id = item.get("task_id")
question_text = item.get("question")
if not task_id or question_text is None:
print(f"Skipping item with missing task_id or question: {item}")
continue
try:
# Download task file if available
task_file_content = download_task_file(task_id, api_url)
# Prepare the full context for the agent
if task_file_content:
full_context = f"Context/File Content:\n{task_file_content}\n\nQuestion: {question_text}"
print(f"\n--- Question {task_id} ---")
print(f"Question: {question_text}")
print(f"File content length: {len(task_file_content)} characters")
print(f"File content preview: {task_file_content[:200]}...")
else:
full_context = question_text
print(f"\n--- Question {task_id} ---")
print(f"Question: {question_text}")
print("No file content available")
# Get answer from your LLM agent with full context
submitted_answer = agent(full_context)
# Clean up the answer - extract only the final answer after "FINAL ANSWER:"
if "FINAL ANSWER:" in submitted_answer:
submitted_answer = submitted_answer.split("FINAL ANSWER:")[-1].strip()
# Remove any extra explanations or context
if "\n\n" in submitted_answer:
submitted_answer = submitted_answer.split("\n\n")[0].strip()
# Take only the first sentence if it's still too long
if len(submitted_answer.split()) > 5:
submitted_answer = submitted_answer.split('.')[0].strip()
# Better answer cleaning
submitted_answer = submitted_answer.strip()
submitted_answer = submitted_answer.replace('"', '') # Remove quotes
submitted_answer = submitted_answer.lower() # Standardize case
# Print the answer for debugging
print(f"Answer: {submitted_answer}")
# Small delay to avoid overwhelming the API
time.sleep(1)
# Create answer entry in the required format
answer_entry = {
"task_id": task_id,
"submitted_answer": submitted_answer
}
answers_payload.append(answer_entry)
print(f"Answer Entry: {answer_entry}")
print("-" * 50)
# For display in the table, show truncated versions
display_question = question_text[:200] + "..." if len(question_text) > 200 else question_text
display_answer = submitted_answer[:200] + "..." if len(submitted_answer) > 200 else submitted_answer
results_log.append({
"Task ID": task_id,
"Question": display_question,
"Model Answer": display_answer,
"Score": "N/A" # No scoring since ground truth not available
})
except Exception as e:
print(f"Error running agent on task {task_id}: {e}")
error_response = {
"task_id": task_id,
"submitted_answer": f"AGENT ERROR: {e}"
}
answers_payload.append(error_response)
results_log.append({
"Task ID": task_id,
"Question": question_text[:200] + "..." if question_text and len(question_text) > 200 else question_text,
"Model Answer": f"AGENT ERROR: {e}",
"Score": "ERROR"
})
if not answers_payload:
print("Agent did not produce any answers to submit.")
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
# 4. Prepare Submission in the required format
submission_data = {
"username": username.strip(),
"agent_code": agent_code,
"answers": answers_payload
}
# Print the final submission format
print("\n" + "="*60)
print("FINAL SUBMISSION FORMAT:")
print("="*60)
print(json.dumps(submission_data, indent=2))
print("="*60)
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
print(status_update)
# 5. Submit
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
try:
response = requests.post(submit_url, json=submission_data, timeout=60)
response.raise_for_status()
result_data = response.json()
final_status = (
f"Submission Successful!\n"
f"User: {result_data.get('username')}\n"
f"Overall Score: {result_data.get('score', 'N/A')}% "
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
f"Message: {result_data.get('message', 'No message received.')}"
)
print("Submission successful.")
results_df = pd.DataFrame(results_log)
return final_status, results_df
except Exception as e:
status_message = f"Submission Failed: {e}"
print(status_message)
results_df = pd.DataFrame(results_log)
return status_message, results_df
# --- Build Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# Agent Evaluation Runner")
gr.Markdown(
"""
**Instructions:**
1. Enter your Hugging Face username in the text box below.
2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
**Note:** This will take some time as the agent processes all questions.
"""
)
username_input = gr.Textbox(label="Enter your Hugging Face username", placeholder="your_username")
run_button = gr.Button("Run Evaluation & Submit All Answers")
status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
run_button.click(
fn=run_and_submit_all,
inputs=[username_input],
outputs=[status_output, results_table]
)
if __name__ == "__main__":
print("\n" + "-"*30 + " App Starting " + "-"*30)
space_host_startup = os.getenv("SPACE_HOST")
space_id_startup = os.getenv("SPACE_ID")
if space_host_startup:
print(f"✅ SPACE_HOST found: {space_host_startup}")
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
else:
print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
if space_id_startup:
print(f"✅ SPACE_ID found: {space_id_startup}")
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
else:
print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
print("-"*(60 + len(" App Starting ")) + "\n")
print("Launching Gradio Interface for Agent Evaluation...")
demo.launch(debug=True, share=True)