cyberosa commited on
Commit
67ca165
·
1 Parent(s): 885d78b

first commit with multiagent proposal

Browse files
Files changed (5) hide show
  1. .gitignore +163 -0
  2. README.md +9 -7
  3. agents.py +44 -0
  4. app.py +210 -0
  5. requirements.txt +9 -0
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ .DS_Store
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ tmp/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ #Pipfile.lock
99
+
100
+ # poetry
101
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
+ #poetry.lock
106
+
107
+ # pdm
108
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
+ #pdm.lock
110
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
+ # in version control.
112
+ # https://pdm.fming.dev/#use-with-ide
113
+ .pdm.toml
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
README.md CHANGED
@@ -1,13 +1,15 @@
1
  ---
2
- title: SkynetAgent
3
- emoji: 📉
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.34.0
8
  app_file: app.py
9
  pinned: false
10
- short_description: Agent for the final assignment of the Smolagents course
 
 
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Skynet Agent
3
+ emoji: 🕵🏻‍♂️
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
  sdk: gradio
7
+ sdk_version: 5.25.2
8
  app_file: app.py
9
  pinned: false
10
+ hf_oauth: true
11
+ # optional, default duration is 8 hours/480 minutes. Max duration is 30 days/43200 minutes.
12
+ hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
agents.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import (
2
+ LiteLLMModel,
3
+ ToolCallingAgent,
4
+ CodeAgent,
5
+ VisitWebpageTool,
6
+ DuckDuckGoSearchTool,
7
+ WebSearchTool,
8
+ )
9
+ import os
10
+
11
+ # list of models to test
12
+ # deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
13
+ # Qwen/Qwen2.5-Coder-32B-Instruct
14
+ # together provider
15
+ # deepseek-ai/DeepSeek-V3
16
+ gemini_api_key = os.environ.get("GEMINI_API_KEY", None)
17
+ model = LiteLLMModel(
18
+ model_id="gemini/gemini-2.0-flash-lite",
19
+ api_key=gemini_api_key,
20
+ )
21
+
22
+
23
+ class SkynetMultiAgent:
24
+ def __init__(self):
25
+ web_agent = ToolCallingAgent(
26
+ tools=[WebSearchTool(), VisitWebpageTool()],
27
+ model=model,
28
+ max_steps=10,
29
+ name="web_search_agent",
30
+ add_base_tools=True,
31
+ description="Runs web searches for you.",
32
+ )
33
+ self.agent = CodeAgent(
34
+ tools=[],
35
+ model=model,
36
+ managed_agents=[web_agent],
37
+ additional_authorized_imports=["time", "numpy", "pandas"],
38
+ )
39
+
40
+ def __call__(self, question: str) -> str:
41
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
42
+ fixed_answer = self.agent.run(question, max_steps=10)
43
+ print(f"Agent returning fixed answer: {fixed_answer}")
44
+ return fixed_answer
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import pandas as pd
5
+ from agents import SkynetMultiAgent
6
+
7
+ # (Keep Constants as is)
8
+ # --- Constants ---
9
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
+
11
+
12
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
13
+ """
14
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
15
+ and displays the results.
16
+ """
17
+ # --- Determine HF Space Runtime URL and Repo URL ---
18
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
19
+
20
+ if profile:
21
+ username = f"{profile.username}"
22
+ print(f"User logged in: {username}")
23
+ else:
24
+ print("User not logged in.")
25
+ return "Please Login to Hugging Face with the button.", None
26
+
27
+ api_url = DEFAULT_API_URL
28
+ questions_url = f"{api_url}/questions"
29
+ submit_url = f"{api_url}/submit"
30
+
31
+ # 1. Instantiate Agent ( modify this part to create your agent)
32
+ try:
33
+ agent = SkynetMultiAgent()
34
+ print(f"Agent instantiated successfully: {agent}")
35
+ except Exception as e:
36
+ print(f"Error instantiating agent: {e}")
37
+ return f"Error initializing agent: {e}", None
38
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
39
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
40
+ print(agent_code)
41
+
42
+ # 2. Fetch Questions
43
+ print(f"Fetching questions from: {questions_url}")
44
+ try:
45
+ response = requests.get(
46
+ questions_url, timeout=30
47
+ ) # increasing timeout because gemini model.
48
+ response.raise_for_status()
49
+ questions_data = response.json()
50
+ if not questions_data:
51
+ print("Fetched questions list is empty.")
52
+ return "Fetched questions list is empty or invalid format.", None
53
+ print(f"Fetched {len(questions_data)} questions.")
54
+ except requests.exceptions.RequestException as e:
55
+ print(f"Error fetching questions: {e}")
56
+ return f"Error fetching questions: {e}", None
57
+ except requests.exceptions.JSONDecodeError as e:
58
+ print(f"Error decoding JSON response from questions endpoint: {e}")
59
+ print(f"Response text: {response.text[:500]}")
60
+ return f"Error decoding server response for questions: {e}", None
61
+ except Exception as e:
62
+ print(f"An unexpected error occurred fetching questions: {e}")
63
+ return f"An unexpected error occurred fetching questions: {e}", None
64
+
65
+ # 3. Run your Agent
66
+ results_log = []
67
+ answers_payload = []
68
+ print(f"Running agent on {len(questions_data)} questions...")
69
+ for item in questions_data:
70
+ task_id = item.get("task_id")
71
+ question_text = item.get("question")
72
+ if not task_id or question_text is None:
73
+ print(f"Skipping item with missing task_id or question: {item}")
74
+ continue
75
+ try:
76
+ submitted_answer = agent(question_text)
77
+ answers_payload.append(
78
+ {"task_id": task_id, "submitted_answer": submitted_answer}
79
+ )
80
+ results_log.append(
81
+ {
82
+ "Task ID": task_id,
83
+ "Question": question_text,
84
+ "Submitted Answer": submitted_answer,
85
+ }
86
+ )
87
+ except Exception as e:
88
+ print(f"Error running agent on task {task_id}: {e}")
89
+ results_log.append(
90
+ {
91
+ "Task ID": task_id,
92
+ "Question": question_text,
93
+ "Submitted Answer": f"AGENT ERROR: {e}",
94
+ }
95
+ )
96
+
97
+ if not answers_payload:
98
+ print("Agent did not produce any answers to submit.")
99
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
100
+
101
+ # 4. Prepare Submission
102
+ submission_data = {
103
+ "username": username.strip(),
104
+ "agent_code": agent_code,
105
+ "answers": answers_payload,
106
+ }
107
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
108
+ print(status_update)
109
+
110
+ # 5. Submit
111
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
112
+ try:
113
+ response = requests.post(submit_url, json=submission_data, timeout=60)
114
+ response.raise_for_status()
115
+ result_data = response.json()
116
+ final_status = (
117
+ f"Submission Successful!\n"
118
+ f"User: {result_data.get('username')}\n"
119
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
120
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
121
+ f"Message: {result_data.get('message', 'No message received.')}"
122
+ )
123
+ print("Submission successful.")
124
+ results_df = pd.DataFrame(results_log)
125
+ return final_status, results_df
126
+ except requests.exceptions.HTTPError as e:
127
+ error_detail = f"Server responded with status {e.response.status_code}."
128
+ try:
129
+ error_json = e.response.json()
130
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
131
+ except requests.exceptions.JSONDecodeError:
132
+ error_detail += f" Response: {e.response.text[:500]}"
133
+ status_message = f"Submission Failed: {error_detail}"
134
+ print(status_message)
135
+ results_df = pd.DataFrame(results_log)
136
+ return status_message, results_df
137
+ except requests.exceptions.Timeout:
138
+ status_message = "Submission Failed: The request timed out."
139
+ print(status_message)
140
+ results_df = pd.DataFrame(results_log)
141
+ return status_message, results_df
142
+ except requests.exceptions.RequestException as e:
143
+ status_message = f"Submission Failed: Network error - {e}"
144
+ print(status_message)
145
+ results_df = pd.DataFrame(results_log)
146
+ return status_message, results_df
147
+ except Exception as e:
148
+ status_message = f"An unexpected error occurred during submission: {e}"
149
+ print(status_message)
150
+ results_df = pd.DataFrame(results_log)
151
+ return status_message, results_df
152
+
153
+
154
+ # --- Build Gradio Interface using Blocks ---
155
+ with gr.Blocks() as demo:
156
+ gr.Markdown("# Basic Agent Evaluation Runner")
157
+ gr.Markdown(
158
+ """
159
+ **Instructions:**
160
+
161
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
162
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
163
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
164
+
165
+ ---
166
+ **Disclaimers:**
167
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
168
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
169
+ """
170
+ )
171
+
172
+ gr.LoginButton()
173
+
174
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
175
+
176
+ status_output = gr.Textbox(
177
+ label="Run Status / Submission Result", lines=5, interactive=False
178
+ )
179
+ # Removed max_rows=10 from DataFrame constructor
180
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
181
+
182
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
183
+
184
+ if __name__ == "__main__":
185
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
186
+ # Check for SPACE_HOST and SPACE_ID at startup for information
187
+ space_host_startup = os.getenv("SPACE_HOST")
188
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
189
+
190
+ if space_host_startup:
191
+ print(f"✅ SPACE_HOST found: {space_host_startup}")
192
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
193
+ else:
194
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
195
+
196
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
197
+ print(f"✅ SPACE_ID found: {space_id_startup}")
198
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
199
+ print(
200
+ f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main"
201
+ )
202
+ else:
203
+ print(
204
+ "ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined."
205
+ )
206
+
207
+ print("-" * (60 + len(" App Starting ")) + "\n")
208
+
209
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
210
+ demo.launch(debug=True, share=False)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ requests
3
+ markdownify==1.1.0
4
+ smolagents==1.15.0
5
+ duckduckgo_search==8.0.1
6
+ pandas==2.2.3
7
+ gradio_client
8
+ gradio[oauth]
9
+ litellm==1.67.0.post1