lupantech commited on
Commit
ac47a36
Β·
1 Parent(s): 327d072

opentools-->octotools; added remaining tools; polished the ui

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .gitignore +177 -0
  2. README.md +7 -0
  3. app.py +41 -23
  4. app_bak_0215.py +307 -0
  5. {opentools β†’ octotools}/__init__.py +0 -0
  6. {opentools β†’ octotools}/engine/__init__.py +0 -0
  7. {opentools β†’ octotools}/engine/base.py +0 -0
  8. {opentools β†’ octotools}/engine/openai.py +2 -2
  9. {opentools β†’ octotools}/models/__init__.py +0 -0
  10. {opentools β†’ octotools}/models/executor.py +2 -2
  11. {opentools β†’ octotools}/models/formatters.py +0 -0
  12. {opentools β†’ octotools}/models/initializer.py +10 -10
  13. {opentools β†’ octotools}/models/memory.py +0 -0
  14. {opentools β†’ octotools}/models/planner.py +3 -3
  15. {opentools β†’ octotools}/models/utils.py +0 -0
  16. {opentools β†’ octotools}/tools/README.md +1 -1
  17. {opentools β†’ octotools}/tools/__init__.py +0 -0
  18. octotools/tools/advanced_object_detector/__init__.py +0 -0
  19. octotools/tools/advanced_object_detector/examples/baseball.png +0 -0
  20. octotools/tools/advanced_object_detector/test.log +366 -0
  21. octotools/tools/advanced_object_detector/tool.py +236 -0
  22. octotools/tools/arxiv_paper_searcher/__init__.py +0 -0
  23. octotools/tools/arxiv_paper_searcher/test.log +120 -0
  24. octotools/tools/arxiv_paper_searcher/tool.py +165 -0
  25. {opentools β†’ octotools}/tools/base.py +2 -2
  26. {opentools β†’ octotools}/tools/generalist_solution_generator/tool.py +3 -3
  27. octotools/tools/google_search/__init__.py +0 -0
  28. octotools/tools/google_search/test.log +29 -0
  29. octotools/tools/google_search/tool.py +136 -0
  30. octotools/tools/image_captioner/__init__.py +0 -0
  31. octotools/tools/image_captioner/examples/baseball.png +0 -0
  32. octotools/tools/image_captioner/test.log +7 -0
  33. octotools/tools/image_captioner/tool.py +96 -0
  34. octotools/tools/nature_news_fetcher/__init__.py +0 -0
  35. octotools/tools/nature_news_fetcher/test.log +180 -0
  36. octotools/tools/nature_news_fetcher/tool.py +181 -0
  37. octotools/tools/object_detector/__init__.py +0 -0
  38. octotools/tools/object_detector/examples/baseball.png +0 -0
  39. octotools/tools/object_detector/test.log +112 -0
  40. octotools/tools/object_detector/tool.py +179 -0
  41. octotools/tools/pubmed_search/__init__.py +0 -0
  42. octotools/tools/pubmed_search/test.log +3 -0
  43. octotools/tools/pubmed_search/tool.py +112 -0
  44. octotools/tools/python_code_generator/__init__.py +0 -0
  45. octotools/tools/python_code_generator/test.log +13 -0
  46. octotools/tools/python_code_generator/tool.py +243 -0
  47. octotools/tools/relevant_patch_zoomer/__init__.py +0 -0
  48. octotools/tools/relevant_patch_zoomer/examples/car.png +0 -0
  49. octotools/tools/relevant_patch_zoomer/test.log +10 -0
  50. octotools/tools/relevant_patch_zoomer/tool.py +188 -0
.gitignore ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Byte-compiled / optimized / DLL files
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ # *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # poetry
99
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
+ #poetry.lock
104
+
105
+ # pdm
106
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
+ #pdm.lock
108
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
+ # in version control.
110
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
111
+ .pdm.toml
112
+ .pdm-python
113
+ .pdm-build/
114
+
115
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
+ __pypackages__/
117
+
118
+ # Celery stuff
119
+ celerybeat-schedule
120
+ celerybeat.pid
121
+
122
+ # SageMath parsed files
123
+ *.sage.py
124
+
125
+ # Environments
126
+ .env
127
+ .venv
128
+ env/
129
+ venv/
130
+ ENV/
131
+ env.bak/
132
+ venv.bak/
133
+
134
+ # Spyder project settings
135
+ .spyderproject
136
+ .spyproject
137
+
138
+ # Rope project settings
139
+ .ropeproject
140
+
141
+ # mkdocs documentation
142
+ /site
143
+
144
+ # mypy
145
+ .mypy_cache/
146
+ .dmypy.json
147
+ dmypy.json
148
+
149
+ # Pyre type checker
150
+ .pyre/
151
+
152
+ # pytype static type analyzer
153
+ .pytype/
154
+
155
+ # Cython debug symbols
156
+ cython_debug/
157
+
158
+ # PyCharm
159
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
162
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
+ #.idea/
164
+
165
+ # [Octotools]
166
+ octotools.egg-info/
167
+ locals/
168
+ results/
169
+ logs/
170
+ *.zip
171
+ *.pt
172
+ cache/
173
+ tool_cache/
174
+ detected_objects/
175
+
176
+ # [Gradio]
177
+ demo_solver_cache/
README.md CHANGED
@@ -15,3 +15,10 @@ pinned: false
15
  - https://www.gradio.app/guides/agents-and-tool-usage
16
 
17
  - Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
15
  - https://www.gradio.app/guides/agents-and-tool-usage
16
 
17
  - Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
18
+
19
+
20
+ ### Example
21
+
22
+ ```
23
+ How many baseballs are there?
24
+ ```
app.py CHANGED
@@ -16,17 +16,12 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
16
  project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
17
  sys.path.insert(0, project_root)
18
 
19
- from opentools.models.initializer import Initializer
20
- from opentools.models.planner import Planner
21
- from opentools.models.memory import Memory
22
- from opentools.models.executor import Executor
23
- from opentools.models.utils import make_json_serializable
24
-
25
- # class ChatMessage:
26
- # def __init__(self, role: str, content: str, metadata: dict = None):
27
- # self.role = role
28
- # self.content = content
29
- # self.metadata = metadata or {}
30
 
31
  class Solver:
32
  def __init__(
@@ -59,9 +54,6 @@ class Solver:
59
  self.output_types = output_types.lower().split(',')
60
  assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
61
 
62
- # self.benchmark_data = self.load_benchmark_data()
63
-
64
-
65
 
66
  def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
67
  """
@@ -189,8 +181,9 @@ class Solver:
189
  messages.append(ChatMessage(role="assistant", content="βœ… Problem-solving process complete."))
190
  yield messages
191
 
 
192
  def parse_arguments():
193
- parser = argparse.ArgumentParser(description="Run the OpenTools demo with specified parameters.")
194
  parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
195
  parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
196
  parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
@@ -215,7 +208,7 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
215
  """
216
 
217
  if api_key is None:
218
- return [["assistant", "⚠️ Error: API Key is required."]]
219
 
220
  # Initialize Tools
221
  enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
@@ -271,16 +264,16 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
271
 
272
 
273
  def main(args):
274
- # ========== Gradio Interface ==========
275
  with gr.Blocks() as demo:
276
- gr.Markdown("# 🧠 OctoTools AI Solver") # Title
277
 
278
  with gr.Row():
279
  with gr.Column(scale=1):
280
  api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
281
  user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
282
- max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1)
283
- max_time = gr.Slider(value=180, minimum=60, maximum=300, step=20)
284
  with gr.Column(scale=3):
285
  chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
286
  # chatbot_output.like(lambda x: print(f"User liked: {x}"))
@@ -296,13 +289,38 @@ def main(args):
296
 
297
  # Link button click to function
298
  run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image, max_steps, max_time, api_key], outputs=chatbot_output)
299
- # ========== Gradio Interface ==========
300
 
301
  # Launch the Gradio app
302
  demo.launch()
303
 
304
 
305
-
306
  if __name__ == "__main__":
307
  args = parse_arguments()
308
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
17
  sys.path.insert(0, project_root)
18
 
19
+ from octotools.models.initializer import Initializer
20
+ from octotools.models.planner import Planner
21
+ from octotools.models.memory import Memory
22
+ from octotools.models.executor import Executor
23
+ from octotools.models.utils import make_json_serializable
24
+
 
 
 
 
 
25
 
26
  class Solver:
27
  def __init__(
 
54
  self.output_types = output_types.lower().split(',')
55
  assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
56
 
 
 
 
57
 
58
  def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
59
  """
 
181
  messages.append(ChatMessage(role="assistant", content="βœ… Problem-solving process complete."))
182
  yield messages
183
 
184
+
185
  def parse_arguments():
186
+ parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
187
  parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
188
  parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
189
  parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
 
208
  """
209
 
210
  if api_key is None:
211
+ return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
212
 
213
  # Initialize Tools
214
  enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
 
264
 
265
 
266
  def main(args):
267
+ #################### Gradio Interface ####################
268
  with gr.Blocks() as demo:
269
+ gr.Markdown("# 🧠 The OctoTools Agentic Solver") # Title
270
 
271
  with gr.Row():
272
  with gr.Column(scale=1):
273
  api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
274
  user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
275
+ max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
276
+ max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
277
  with gr.Column(scale=3):
278
  chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
279
  # chatbot_output.like(lambda x: print(f"User liked: {x}"))
 
289
 
290
  # Link button click to function
291
  run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image, max_steps, max_time, api_key], outputs=chatbot_output)
292
+ #################### Gradio Interface ####################
293
 
294
  # Launch the Gradio app
295
  demo.launch()
296
 
297
 
 
298
  if __name__ == "__main__":
299
  args = parse_arguments()
300
+
301
+ # Manually set enabled tools
302
+ # args.enabled_tools = "Generalist_Solution_Generator_Tool"
303
+
304
+
305
+ # All tools
306
+ all_tools = [
307
+ "Generalist_Solution_Generator_Tool",
308
+
309
+ "Image_Captioner_Tool",
310
+ "Object_Detector_Tool",
311
+ "Text_Detector_Tool",
312
+ "Relevant_Patch_Zoomer_Tool",
313
+
314
+ "Python_Code_Generator_Tool",
315
+
316
+ "ArXiv_Paper_Searcher_Tool",
317
+ "Google_Search_Tool",
318
+ "Nature_News_Fetcher_Tool",
319
+ "Pubmed_Search_Tool",
320
+ "URL_Text_Extractor_Tool",
321
+ "Wikipedia_Knowledge_Searcher_Tool"
322
+ ]
323
+ args.enabled_tools = ",".join(all_tools)
324
+
325
+ main(args)
326
+
app_bak_0215.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import argparse
5
+ import time
6
+ import io
7
+ import uuid
8
+ from PIL import Image
9
+ from typing import List, Dict, Any, Iterator
10
+ import gradio as gr
11
+
12
+ # Add the project root to the Python path
13
+ current_dir = os.path.dirname(os.path.abspath(__file__))
14
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
15
+ sys.path.insert(0, project_root)
16
+
17
+ from opentools.models.initializer import Initializer
18
+ from opentools.models.planner import Planner
19
+ from opentools.models.memory import Memory
20
+ from opentools.models.executor import Executor
21
+ from opentools.models.utlis import make_json_serializable
22
+
23
+ solver = None
24
+
25
+ class ChatMessage:
26
+ def __init__(self, role: str, content: str, metadata: dict = None):
27
+ self.role = role
28
+ self.content = content
29
+ self.metadata = metadata or {}
30
+
31
+ class Solver:
32
+ def __init__(
33
+ self,
34
+ planner,
35
+ memory,
36
+ executor,
37
+ task: str,
38
+ task_description: str,
39
+ output_types: str = "base,final,direct",
40
+ index: int = 0,
41
+ verbose: bool = True,
42
+ max_steps: int = 10,
43
+ max_time: int = 60,
44
+ output_json_dir: str = "results",
45
+ root_cache_dir: str = "cache"
46
+ ):
47
+ self.planner = planner
48
+ self.memory = memory
49
+ self.executor = executor
50
+ self.task = task
51
+ self.task_description = task_description
52
+ self.index = index
53
+ self.verbose = verbose
54
+ self.max_steps = max_steps
55
+ self.max_time = max_time
56
+ self.output_json_dir = output_json_dir
57
+ self.root_cache_dir = root_cache_dir
58
+
59
+ self.output_types = output_types.lower().split(',')
60
+ assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
61
+
62
+ # self.benchmark_data = self.load_benchmark_data()
63
+
64
+
65
+
66
+ def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
67
+ """
68
+ Streams intermediate thoughts and final responses for the problem-solving process based on user input.
69
+
70
+ Args:
71
+ user_query (str): The text query input from the user.
72
+ user_image (Image.Image): The uploaded image from the user (PIL Image object).
73
+ messages (list): A list of ChatMessage objects to store the streamed responses.
74
+ """
75
+
76
+ if user_image:
77
+ # # Convert PIL Image to bytes (for processing)
78
+ # img_bytes_io = io.BytesIO()
79
+ # user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
80
+ # img_bytes = img_bytes_io.getvalue() # Get bytes
81
+
82
+ # Use image paths instead of bytes,
83
+ os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
84
+ img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
85
+ user_image.save(img_path)
86
+ else:
87
+ img_path = None
88
+
89
+ # Set query cache
90
+ _cache_dir = os.path.join(self.root_cache_dir)
91
+ self.executor.set_query_cache_dir(_cache_dir)
92
+
93
+ # Step 1: Display the received inputs
94
+ if user_image:
95
+ messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}\nπŸ–ΌοΈ Image Uploaded"))
96
+ else:
97
+ messages.append(ChatMessage(role="assistant", content=f"πŸ“ Received Query: {user_query}"))
98
+ yield messages
99
+
100
+ # Step 2: Add "thinking" status while processing
101
+ messages.append(ChatMessage(
102
+ role="assistant",
103
+ content="",
104
+ metadata={"title": "⏳ Thinking: Processing input..."}
105
+ ))
106
+
107
+ # Step 3: Initialize problem-solving state
108
+ start_time = time.time()
109
+ step_count = 0
110
+ json_data = {"query": user_query, "image": "Image received as bytes"}
111
+
112
+ # Step 4: Query Analysis
113
+ query_analysis = self.planner.analyze_query(user_query, img_path)
114
+ json_data["query_analysis"] = query_analysis
115
+ messages.append(ChatMessage(role="assistant", content=f"πŸ” Query Analysis:\n{query_analysis}"))
116
+ yield messages
117
+
118
+ # Step 5: Execution loop (similar to your step-by-step solver)
119
+ while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
120
+ step_count += 1
121
+ messages.append(ChatMessage(role="assistant", content=f"πŸ”„ Step {step_count}: Generating next step..."))
122
+ yield messages
123
+
124
+ # Generate the next step
125
+ next_step = self.planner.generate_next_step(
126
+ user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
127
+ )
128
+ context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
129
+
130
+ # Display the step information
131
+ messages.append(ChatMessage(
132
+ role="assistant",
133
+ content=f"πŸ“Œ Step {step_count} Details:\n- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}"
134
+ ))
135
+ yield messages
136
+
137
+ # Handle tool execution or errors
138
+ if tool_name not in self.planner.available_tools:
139
+ messages.append(ChatMessage(role="assistant", content=f"⚠️ Error: Tool '{tool_name}' is not available."))
140
+ yield messages
141
+ continue
142
+
143
+ # Execute the tool command
144
+ tool_command = self.executor.generate_tool_command(
145
+ user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
146
+ )
147
+ explanation, command = self.executor.extract_explanation_and_command(tool_command)
148
+ result = self.executor.execute_tool_command(tool_name, command)
149
+ result = make_json_serializable(result)
150
+
151
+ messages.append(ChatMessage(role="assistant", content=f"βœ… Step {step_count} Result:\n{json.dumps(result, indent=4)}"))
152
+ yield messages
153
+
154
+ # Step 6: Memory update and stopping condition
155
+ self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
156
+ stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
157
+ conclusion = self.planner.extract_conclusion(stop_verification)
158
+
159
+ messages.append(ChatMessage(role="assistant", content=f"πŸ›‘ Step {step_count} Conclusion: {conclusion}"))
160
+ yield messages
161
+
162
+ if conclusion == 'STOP':
163
+ break
164
+
165
+ # Step 7: Generate Final Output (if needed)
166
+ if 'final' in self.output_types:
167
+ final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
168
+ messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
169
+ yield messages
170
+
171
+ if 'direct' in self.output_types:
172
+ direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
173
+ messages.append(ChatMessage(role="assistant", content=f"πŸ”Ή Direct Output:\n{direct_output}"))
174
+ yield messages
175
+
176
+ # Step 8: Completion Message
177
+ messages.append(ChatMessage(role="assistant", content="βœ… Problem-solving process complete."))
178
+ yield messages
179
+
180
+ def parse_arguments():
181
+ parser = argparse.ArgumentParser(description="Run the OpenTools demo with specified parameters.")
182
+ parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
183
+ parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
184
+ parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
185
+ parser.add_argument("--task", default="minitoolbench", help="Task to run.")
186
+ parser.add_argument("--task_description", default="", help="Task description.")
187
+ parser.add_argument(
188
+ "--output_types",
189
+ default="base,final,direct",
190
+ help="Comma-separated list of required outputs (base,final,direct)"
191
+ )
192
+ parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
193
+ parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
194
+ parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
195
+ parser.add_argument("--max_steps", type=int, default=10, help="Maximum number of steps to execute.")
196
+ parser.add_argument("--max_time", type=int, default=60, help="Maximum time allowed in seconds.")
197
+ parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
198
+ return parser.parse_args()
199
+
200
+
201
+ def solve_problem_gradio(user_query, user_image):
202
+ """
203
+ Wrapper function to connect the solver to Gradio.
204
+ Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
205
+ """
206
+ global solver # Ensure we're using the globally defined solver
207
+
208
+ if solver is None:
209
+ return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
210
+
211
+ messages = [] # Initialize message list
212
+ for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
213
+ yield [[msg.role, msg.content] for msg in message_batch] # Ensure correct format for Gradio Chatbot
214
+
215
+
216
+
217
+ def main(args):
218
+ global solver
219
+ # Initialize Tools
220
+ enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
221
+
222
+
223
+ # Instantiate Initializer
224
+ initializer = Initializer(
225
+ enabled_tools=enabled_tools,
226
+ model_string=args.llm_engine_name
227
+ )
228
+
229
+ # Instantiate Planner
230
+ planner = Planner(
231
+ llm_engine_name=args.llm_engine_name,
232
+ toolbox_metadata=initializer.toolbox_metadata,
233
+ available_tools=initializer.available_tools
234
+ )
235
+
236
+ # Instantiate Memory
237
+ memory = Memory()
238
+
239
+ # Instantiate Executor
240
+ executor = Executor(
241
+ llm_engine_name=args.llm_engine_name,
242
+ root_cache_dir=args.root_cache_dir,
243
+ enable_signal=False
244
+ )
245
+
246
+ # Instantiate Solver
247
+ solver = Solver(
248
+ planner=planner,
249
+ memory=memory,
250
+ executor=executor,
251
+ task=args.task,
252
+ task_description=args.task_description,
253
+ output_types=args.output_types, # Add new parameter
254
+ verbose=args.verbose,
255
+ max_steps=args.max_steps,
256
+ max_time=args.max_time,
257
+ output_json_dir=args.output_json_dir,
258
+ root_cache_dir=args.root_cache_dir
259
+ )
260
+
261
+ # Test Inputs
262
+ # user_query = "How many balls are there in the image?"
263
+ # user_image_path = "/home/sheng/toolbox-agent/mathvista_113.png" # Replace with your actual image path
264
+
265
+ # # Load the image as a PIL object
266
+ # user_image = Image.open(user_image_path).convert("RGB") # Ensure it's in RGB mode
267
+
268
+ # print("\n=== Starting Problem Solving ===\n")
269
+ # messages = []
270
+ # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
271
+ # for message in message_batch:
272
+ # print(f"{message.role}: {message.content}")
273
+
274
+ # messages = []
275
+ # solver.stream_solve_user_problem(user_query, user_image, messages)
276
+
277
+
278
+ # def solve_problem_stream(user_query, user_image):
279
+ # messages = [] # Ensure it's a list of [role, content] pairs
280
+
281
+ # for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
282
+ # yield message_batch # Stream messages correctly in tuple format
283
+
284
+ # solve_problem_stream(user_query, user_image)
285
+
286
+ # ========== Gradio Interface ==========
287
+ with gr.Blocks() as demo:
288
+ gr.Markdown("# 🧠 OctoTools AI Solver") # Title
289
+
290
+ with gr.Row():
291
+ user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
292
+ user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
293
+
294
+ run_button = gr.Button("Run") # Run button
295
+ chatbot_output = gr.Chatbot(label="Problem-Solving Output")
296
+
297
+ # Link button click to function
298
+ run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image], outputs=chatbot_output)
299
+
300
+ # Launch the Gradio app
301
+ demo.launch()
302
+
303
+
304
+
305
+ if __name__ == "__main__":
306
+ args = parse_arguments()
307
+ main(args)
{opentools β†’ octotools}/__init__.py RENAMED
File without changes
{opentools β†’ octotools}/engine/__init__.py RENAMED
File without changes
{opentools β†’ octotools}/engine/base.py RENAMED
File without changes
{opentools β†’ octotools}/engine/openai.py RENAMED
@@ -51,9 +51,9 @@ class ChatOpenAI(EngineLM, CachedEngine):
51
  :param is_multimodal:
52
  """
53
  if enable_cache:
54
- root = platformdirs.user_cache_dir("opentools")
55
  cache_path = os.path.join(root, f"cache_openai_{model_string}.db")
56
- # For example, cache_path = /root/.cache/opentools/cache_openai_gpt-4o-mini.db
57
  # print(f"Cache path: {cache_path}")
58
 
59
  self.image_cache_dir = os.path.join(root, "image_cache")
 
51
  :param is_multimodal:
52
  """
53
  if enable_cache:
54
+ root = platformdirs.user_cache_dir("octotools")
55
  cache_path = os.path.join(root, f"cache_openai_{model_string}.db")
56
+ # For example, cache_path = /root/.cache/octotools/cache_openai_gpt-4o-mini.db
57
  # print(f"Cache path: {cache_path}")
58
 
59
  self.image_cache_dir = os.path.join(root, "image_cache")
{opentools β†’ octotools}/models/__init__.py RENAMED
File without changes
{opentools β†’ octotools}/models/executor.py RENAMED
@@ -5,8 +5,8 @@ import re
5
  from typing import Dict, Any, List
6
  from datetime import datetime
7
 
8
- from opentools.engine.openai import ChatOpenAI
9
- from opentools.models.formatters import ToolCommand
10
 
11
  import signal
12
  from typing import Dict, Any, List, Optional
 
5
  from typing import Dict, Any, List
6
  from datetime import datetime
7
 
8
+ from octotools.engine.openai import ChatOpenAI
9
+ from octotools.models.formatters import ToolCommand
10
 
11
  import signal
12
  from typing import Dict, Any, List, Optional
{opentools β†’ octotools}/models/formatters.py RENAMED
File without changes
{opentools β†’ octotools}/models/initializer.py RENAMED
@@ -14,7 +14,7 @@ class Initializer:
14
  self.model_string = model_string # llm model string
15
  self.api_key = api_key
16
 
17
- print("\nInitializing OpenTools...")
18
  print(f"Enabled tools: {self.enabled_tools}")
19
  print(f"LLM model string: {self.model_string}")
20
  self._set_up_tools()
@@ -22,8 +22,8 @@ class Initializer:
22
  def get_project_root(self):
23
  current_dir = os.path.dirname(os.path.abspath(__file__))
24
  while current_dir != '/':
25
- if os.path.exists(os.path.join(current_dir, 'opentools')):
26
- return os.path.join(current_dir, 'opentools')
27
  current_dir = os.path.dirname(current_dir)
28
  raise Exception("Could not find project root")
29
 
@@ -31,15 +31,15 @@ class Initializer:
31
  # Implementation of load_tools_and_get_metadata function
32
  print("Loading tools and getting metadata...")
33
  self.toolbox_metadata = {}
34
- opentools_dir = self.get_project_root()
35
- tools_dir = os.path.join(opentools_dir, 'tools')
36
 
37
- print(f"OpenTools directory: {opentools_dir}")
38
  print(f"Tools directory: {tools_dir}")
39
 
40
- # Add the OpenTools directory and its parent to the Python path
41
- sys.path.insert(0, opentools_dir)
42
- sys.path.insert(0, os.path.dirname(opentools_dir))
43
  print(f"Updated Python path: {sys.path}")
44
 
45
  if not os.path.exists(tools_dir):
@@ -52,7 +52,7 @@ class Initializer:
52
  file = 'tool.py'
53
  module_path = os.path.join(root, file)
54
  module_name = os.path.splitext(file)[0]
55
- relative_path = os.path.relpath(module_path, opentools_dir)
56
  import_path = '.'.join(os.path.split(relative_path)).replace(os.sep, '.')[:-3]
57
 
58
  print(f"\nAttempting to import: {import_path}")
 
14
  self.model_string = model_string # llm model string
15
  self.api_key = api_key
16
 
17
+ print("\nInitializing OctoTools...")
18
  print(f"Enabled tools: {self.enabled_tools}")
19
  print(f"LLM model string: {self.model_string}")
20
  self._set_up_tools()
 
22
  def get_project_root(self):
23
  current_dir = os.path.dirname(os.path.abspath(__file__))
24
  while current_dir != '/':
25
+ if os.path.exists(os.path.join(current_dir, 'octotools')):
26
+ return os.path.join(current_dir, 'octotools')
27
  current_dir = os.path.dirname(current_dir)
28
  raise Exception("Could not find project root")
29
 
 
31
  # Implementation of load_tools_and_get_metadata function
32
  print("Loading tools and getting metadata...")
33
  self.toolbox_metadata = {}
34
+ octotools_dir = self.get_project_root()
35
+ tools_dir = os.path.join(octotools_dir, 'tools')
36
 
37
+ print(f"OctoTools directory: {octotools_dir}")
38
  print(f"Tools directory: {tools_dir}")
39
 
40
+ # Add the OctoTools directory and its parent to the Python path
41
+ sys.path.insert(0, octotools_dir)
42
+ sys.path.insert(0, os.path.dirname(octotools_dir))
43
  print(f"Updated Python path: {sys.path}")
44
 
45
  if not os.path.exists(tools_dir):
 
52
  file = 'tool.py'
53
  module_path = os.path.join(root, file)
54
  module_name = os.path.splitext(file)[0]
55
+ relative_path = os.path.relpath(module_path, octotools_dir)
56
  import_path = '.'.join(os.path.split(relative_path)).replace(os.sep, '.')[:-3]
57
 
58
  print(f"\nAttempting to import: {import_path}")
{opentools β†’ octotools}/models/memory.py RENAMED
File without changes
{opentools β†’ octotools}/models/planner.py RENAMED
@@ -4,9 +4,9 @@ from PIL import Image
4
  from io import BytesIO
5
  from typing import Dict, Any, List, Tuple
6
 
7
- from opentools.engine.openai import ChatOpenAI
8
- from opentools.models.memory import Memory
9
- from opentools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
10
 
11
  class Planner:
12
  def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None, api_key: str = None):
 
4
  from io import BytesIO
5
  from typing import Dict, Any, List, Tuple
6
 
7
+ from octotools.engine.openai import ChatOpenAI
8
+ from octotools.models.memory import Memory
9
+ from octotools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
10
 
11
  class Planner:
12
  def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None, api_key: str = None):
{opentools β†’ octotools}/models/utils.py RENAMED
File without changes
{opentools β†’ octotools}/tools/README.md RENAMED
@@ -8,7 +8,7 @@ To test the text detection tool, follow these steps:
8
  Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
9
 
10
  ```sh
11
- cd your_path/toolbox-agent/opentools
12
  ```
13
 
14
  2. **Run the Text Detection Tool:**
 
8
  Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
9
 
10
  ```sh
11
+ cd your_path/toolbox-agent/octotools
12
  ```
13
 
14
  2. **Run the Text Detection Tool:**
{opentools β†’ octotools}/tools/__init__.py RENAMED
File without changes
octotools/tools/advanced_object_detector/__init__.py ADDED
File without changes
octotools/tools/advanced_object_detector/examples/baseball.png ADDED
octotools/tools/advanced_object_detector/test.log ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'code': 0, 'msg': 'ok', 'data': {'task_uuid': '2d4337f5-403d-419b-9832-64b0f888f146'}}
2
+ task_uuid:2d4337f5-403d-419b-9832-64b0f888f146
3
+ [
4
+ {
5
+ "label": "baseball",
6
+ "confidence score": 0.73,
7
+ "box": [
8
+ 614,
9
+ 137,
10
+ 671,
11
+ 191
12
+ ],
13
+ "saved_image_path": "detected_objects/baseball_baseball_1.png"
14
+ },
15
+ {
16
+ "label": "baseball",
17
+ "confidence score": 0.73,
18
+ "box": [
19
+ 114,
20
+ 377,
21
+ 171,
22
+ 431
23
+ ],
24
+ "saved_image_path": "detected_objects/baseball_baseball_2.png"
25
+ },
26
+ {
27
+ "label": "baseball",
28
+ "confidence score": 0.72,
29
+ "box": [
30
+ 632,
31
+ 67,
32
+ 689,
33
+ 126
34
+ ],
35
+ "saved_image_path": "detected_objects/baseball_baseball_3.png"
36
+ },
37
+ {
38
+ "label": "baseball",
39
+ "confidence score": 0.72,
40
+ "box": [
41
+ 132,
42
+ 67,
43
+ 189,
44
+ 126
45
+ ],
46
+ "saved_image_path": "detected_objects/baseball_baseball_4.png"
47
+ },
48
+ {
49
+ "label": "baseball",
50
+ "confidence score": 0.71,
51
+ "box": [
52
+ 382,
53
+ 67,
54
+ 439,
55
+ 126
56
+ ],
57
+ "saved_image_path": "detected_objects/baseball_baseball_5.png"
58
+ },
59
+ {
60
+ "label": "baseball",
61
+ "confidence score": 0.71,
62
+ "box": [
63
+ 364,
64
+ 137,
65
+ 421,
66
+ 191
67
+ ],
68
+ "saved_image_path": "detected_objects/baseball_baseball_6.png"
69
+ },
70
+ {
71
+ "label": "baseball",
72
+ "confidence score": 0.71,
73
+ "box": [
74
+ 132,
75
+ 307,
76
+ 189,
77
+ 366
78
+ ],
79
+ "saved_image_path": "detected_objects/baseball_baseball_7.png"
80
+ },
81
+ {
82
+ "label": "baseball",
83
+ "confidence score": 0.71,
84
+ "box": [
85
+ 114,
86
+ 136,
87
+ 171,
88
+ 191
89
+ ],
90
+ "saved_image_path": "detected_objects/baseball_baseball_8.png"
91
+ },
92
+ {
93
+ "label": "baseball",
94
+ "confidence score": 0.7,
95
+ "box": [
96
+ 57,
97
+ 49,
98
+ 115,
99
+ 107
100
+ ],
101
+ "saved_image_path": "detected_objects/baseball_baseball_9.png"
102
+ },
103
+ {
104
+ "label": "baseball",
105
+ "confidence score": 0.69,
106
+ "box": [
107
+ 307,
108
+ 49,
109
+ 365,
110
+ 106
111
+ ],
112
+ "saved_image_path": "detected_objects/baseball_baseball_10.png"
113
+ },
114
+ {
115
+ "label": "baseball",
116
+ "confidence score": 0.68,
117
+ "box": [
118
+ 57,
119
+ 289,
120
+ 115,
121
+ 346
122
+ ],
123
+ "saved_image_path": "detected_objects/baseball_baseball_11.png"
124
+ },
125
+ {
126
+ "label": "baseball",
127
+ "confidence score": 0.68,
128
+ "box": [
129
+ 86,
130
+ 335,
131
+ 143,
132
+ 393
133
+ ],
134
+ "saved_image_path": "detected_objects/baseball_baseball_12.png"
135
+ },
136
+ {
137
+ "label": "baseball",
138
+ "confidence score": 0.68,
139
+ "box": [
140
+ 557,
141
+ 49,
142
+ 615,
143
+ 107
144
+ ],
145
+ "saved_image_path": "detected_objects/baseball_baseball_13.png"
146
+ },
147
+ {
148
+ "label": "baseball",
149
+ "confidence score": 0.68,
150
+ "box": [
151
+ 35,
152
+ 352,
153
+ 92,
154
+ 410
155
+ ],
156
+ "saved_image_path": "detected_objects/baseball_baseball_14.png"
157
+ },
158
+ {
159
+ "label": "baseball",
160
+ "confidence score": 0.68,
161
+ "box": [
162
+ 86,
163
+ 95,
164
+ 143,
165
+ 153
166
+ ],
167
+ "saved_image_path": "detected_objects/baseball_baseball_15.png"
168
+ },
169
+ {
170
+ "label": "baseball",
171
+ "confidence score": 0.67,
172
+ "box": [
173
+ 586,
174
+ 95,
175
+ 643,
176
+ 153
177
+ ],
178
+ "saved_image_path": "detected_objects/baseball_baseball_16.png"
179
+ },
180
+ {
181
+ "label": "baseball",
182
+ "confidence score": 0.66,
183
+ "box": [
184
+ 285,
185
+ 111,
186
+ 342,
187
+ 169
188
+ ],
189
+ "saved_image_path": "detected_objects/baseball_baseball_17.png"
190
+ },
191
+ {
192
+ "label": "baseball",
193
+ "confidence score": 0.66,
194
+ "box": [
195
+ 35,
196
+ 111,
197
+ 91,
198
+ 170
199
+ ],
200
+ "saved_image_path": "detected_objects/baseball_baseball_18.png"
201
+ },
202
+ {
203
+ "label": "baseball",
204
+ "confidence score": 0.66,
205
+ "box": [
206
+ 535,
207
+ 111,
208
+ 592,
209
+ 169
210
+ ],
211
+ "saved_image_path": "detected_objects/baseball_baseball_19.png"
212
+ },
213
+ {
214
+ "label": "baseball",
215
+ "confidence score": 0.66,
216
+ "box": [
217
+ 337,
218
+ 95,
219
+ 393,
220
+ 153
221
+ ],
222
+ "saved_image_path": "detected_objects/baseball_baseball_20.png"
223
+ },
224
+ {
225
+ "label": "basket",
226
+ "confidence score": 0.41,
227
+ "box": [
228
+ 1,
229
+ 2,
230
+ 218,
231
+ 216
232
+ ],
233
+ "saved_image_path": "detected_objects/baseball_basket_1.png"
234
+ },
235
+ {
236
+ "label": "basket",
237
+ "confidence score": 0.39,
238
+ "box": [
239
+ 501,
240
+ 2,
241
+ 718,
242
+ 216
243
+ ],
244
+ "saved_image_path": "detected_objects/baseball_basket_2.png"
245
+ },
246
+ {
247
+ "label": "basket",
248
+ "confidence score": 0.38,
249
+ "box": [
250
+ 2,
251
+ 242,
252
+ 218,
253
+ 456
254
+ ],
255
+ "saved_image_path": "detected_objects/baseball_basket_3.png"
256
+ },
257
+ {
258
+ "label": "basket",
259
+ "confidence score": 0.38,
260
+ "box": [
261
+ 251,
262
+ 2,
263
+ 468,
264
+ 216
265
+ ],
266
+ "saved_image_path": "detected_objects/baseball_basket_4.png"
267
+ }
268
+ ]
269
+ Detected Objects:
270
+ Detected baseball with confidence 0.73
271
+ Bounding box: [614, 137, 671, 191]
272
+ Saved image (with padding): detected_objects/baseball_baseball_1.png
273
+
274
+ Detected baseball with confidence 0.73
275
+ Bounding box: [114, 377, 171, 431]
276
+ Saved image (with padding): detected_objects/baseball_baseball_2.png
277
+
278
+ Detected baseball with confidence 0.72
279
+ Bounding box: [632, 67, 689, 126]
280
+ Saved image (with padding): detected_objects/baseball_baseball_3.png
281
+
282
+ Detected baseball with confidence 0.72
283
+ Bounding box: [132, 67, 189, 126]
284
+ Saved image (with padding): detected_objects/baseball_baseball_4.png
285
+
286
+ Detected baseball with confidence 0.71
287
+ Bounding box: [382, 67, 439, 126]
288
+ Saved image (with padding): detected_objects/baseball_baseball_5.png
289
+
290
+ Detected baseball with confidence 0.71
291
+ Bounding box: [364, 137, 421, 191]
292
+ Saved image (with padding): detected_objects/baseball_baseball_6.png
293
+
294
+ Detected baseball with confidence 0.71
295
+ Bounding box: [132, 307, 189, 366]
296
+ Saved image (with padding): detected_objects/baseball_baseball_7.png
297
+
298
+ Detected baseball with confidence 0.71
299
+ Bounding box: [114, 136, 171, 191]
300
+ Saved image (with padding): detected_objects/baseball_baseball_8.png
301
+
302
+ Detected baseball with confidence 0.7
303
+ Bounding box: [57, 49, 115, 107]
304
+ Saved image (with padding): detected_objects/baseball_baseball_9.png
305
+
306
+ Detected baseball with confidence 0.69
307
+ Bounding box: [307, 49, 365, 106]
308
+ Saved image (with padding): detected_objects/baseball_baseball_10.png
309
+
310
+ Detected baseball with confidence 0.68
311
+ Bounding box: [57, 289, 115, 346]
312
+ Saved image (with padding): detected_objects/baseball_baseball_11.png
313
+
314
+ Detected baseball with confidence 0.68
315
+ Bounding box: [86, 335, 143, 393]
316
+ Saved image (with padding): detected_objects/baseball_baseball_12.png
317
+
318
+ Detected baseball with confidence 0.68
319
+ Bounding box: [557, 49, 615, 107]
320
+ Saved image (with padding): detected_objects/baseball_baseball_13.png
321
+
322
+ Detected baseball with confidence 0.68
323
+ Bounding box: [35, 352, 92, 410]
324
+ Saved image (with padding): detected_objects/baseball_baseball_14.png
325
+
326
+ Detected baseball with confidence 0.68
327
+ Bounding box: [86, 95, 143, 153]
328
+ Saved image (with padding): detected_objects/baseball_baseball_15.png
329
+
330
+ Detected baseball with confidence 0.67
331
+ Bounding box: [586, 95, 643, 153]
332
+ Saved image (with padding): detected_objects/baseball_baseball_16.png
333
+
334
+ Detected baseball with confidence 0.66
335
+ Bounding box: [285, 111, 342, 169]
336
+ Saved image (with padding): detected_objects/baseball_baseball_17.png
337
+
338
+ Detected baseball with confidence 0.66
339
+ Bounding box: [35, 111, 91, 170]
340
+ Saved image (with padding): detected_objects/baseball_baseball_18.png
341
+
342
+ Detected baseball with confidence 0.66
343
+ Bounding box: [535, 111, 592, 169]
344
+ Saved image (with padding): detected_objects/baseball_baseball_19.png
345
+
346
+ Detected baseball with confidence 0.66
347
+ Bounding box: [337, 95, 393, 153]
348
+ Saved image (with padding): detected_objects/baseball_baseball_20.png
349
+
350
+ Detected basket with confidence 0.41
351
+ Bounding box: [1, 2, 218, 216]
352
+ Saved image (with padding): detected_objects/baseball_basket_1.png
353
+
354
+ Detected basket with confidence 0.39
355
+ Bounding box: [501, 2, 718, 216]
356
+ Saved image (with padding): detected_objects/baseball_basket_2.png
357
+
358
+ Detected basket with confidence 0.38
359
+ Bounding box: [2, 242, 218, 456]
360
+ Saved image (with padding): detected_objects/baseball_basket_3.png
361
+
362
+ Detected basket with confidence 0.38
363
+ Bounding box: [251, 2, 468, 216]
364
+ Saved image (with padding): detected_objects/baseball_basket_4.png
365
+
366
+ Done!
octotools/tools/advanced_object_detector/tool.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Grounding DINO Object Detection Tool
2
+ # https://huggingface.co/IDEA-Research/grounding-dino
3
+
4
+ import os
5
+ import time
6
+
7
+ from octotools.tools.base import BaseTool
8
+ from PIL import Image, ImageOps
9
+
10
+ import os
11
+ # Suppress stderr by redirecting it to /dev/null
12
+ import sys
13
+ import re
14
+ import base64
15
+ import requests
16
+ sys.stderr = open(os.devnull, 'w')
17
+
18
+
19
+ class Advanced_Object_Detector_Tool(BaseTool):
20
+ def __init__(self):
21
+ super().__init__(
22
+ tool_name="Advanced_Object_Detector_Tool",
23
+ tool_description="A tool that detects objects in an image using the Grounding DINO-X model and saves individual object images with empty padding.",
24
+ tool_version="1.0.0",
25
+ input_types={
26
+ "image": "str - The path to the image file.",
27
+ "labels": "list - A list of object labels to detect.",
28
+ "threshold": "float - The confidence threshold for detection (default: 0.35).",
29
+ "padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
30
+ },
31
+ output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
32
+ demo_commands=[
33
+ {
34
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
35
+ "description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
36
+ },
37
+ {
38
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
39
+ "description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
40
+ }
41
+ ],
42
+ user_metadata={
43
+ "limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
44
+ }
45
+ )
46
+ self.DINO_KEY = os.environ.get("DINO_KEY")
47
+
48
+ def preprocess_caption(self, caption):
49
+ result = caption.lower().strip()
50
+ if result.endswith("."):
51
+ return result
52
+ return result + "."
53
+
54
+ def build_tool(self, threshold=0.35):
55
+
56
+ params_dict = {
57
+ 'headers': {
58
+ "Content-Type": "application/json",
59
+ "Token" : self.DINO_KEY
60
+ },
61
+ 'body':{
62
+ "image" : None,
63
+ "prompts": [
64
+ {"type": "text", "text": None},
65
+ ],
66
+ "bbox_threshold": threshold
67
+ }
68
+
69
+ }
70
+ return params_dict
71
+
72
+
73
+ def save_detected_object(self, image, box, image_name, label, index, padding):
74
+ object_image = image.crop(box)
75
+ padded_image = ImageOps.expand(object_image, border=padding, fill='white')
76
+
77
+ filename = f"{image_name}_{label}_{index}.png"
78
+ os.makedirs(self.output_dir, exist_ok=True)
79
+ save_path = os.path.join(self.output_dir, filename)
80
+
81
+ padded_image.save(save_path)
82
+ return save_path
83
+
84
+ def execute(self, image, labels, threshold=0.35, padding=20, max_retries=10, retry_delay=5):
85
+ retry_count = 0
86
+ params = self.build_tool(threshold)
87
+
88
+ def process_image(input_str):
89
+
90
+ def image_to_base64(image_path):
91
+ with open(image_path, "rb") as image_file:
92
+ return base64.b64encode(image_file.read()).decode('utf-8')
93
+ # Define common image file extensions
94
+ image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.tiff', '.webp'}
95
+
96
+ # Check if it is a URL
97
+ url_pattern = re.compile(r'^(http|https|ftp)://')
98
+ if url_pattern.match(input_str):
99
+ if input_str.lower().endswith(tuple(image_extensions)):
100
+ return input_str
101
+ return input_str
102
+
103
+ # Check if it is a file path
104
+ _, ext = os.path.splitext(input_str)
105
+ if ext.lower() in image_extensions:
106
+ image_base64 = image_to_base64(input_str)
107
+ return f'data:image/png;base64,{image_base64}'
108
+ return None
109
+
110
+ if len(labels) < 1:
111
+ preprocessed_prompt = '<prompt_free>'
112
+ else:
113
+ preprocessed_prompt = ''
114
+ for label in labels:
115
+ preprocessed_prompt += self.preprocess_caption(label)
116
+
117
+
118
+ body = params['body']
119
+ body['image'] = process_image(image)
120
+ body['prompts'] = [{"type": "text", "text": preprocessed_prompt}]
121
+
122
+ # send request
123
+ resp = requests.post(
124
+ 'https://api.deepdataspace.com/tasks/dinox',
125
+ json=body,
126
+ headers=params['headers']
127
+ )
128
+
129
+ if resp.status_code == 200:
130
+ json_resp = resp.json()
131
+ print(json_resp)
132
+
133
+ # get task_uuid
134
+ task_uuid = json_resp["data"]["task_uuid"]
135
+ print(f'task_uuid:{task_uuid}')
136
+
137
+ # poll get task result
138
+ while retry_count < max_retries:
139
+ resp = requests.get(f'https://api.deepdataspace.com/task_statuses/{task_uuid}', headers=params['headers'])
140
+
141
+
142
+ if resp.status_code != 200:
143
+ break
144
+ json_resp = resp.json()
145
+
146
+ if json_resp["data"]["status"] not in ["waiting", "running"]:
147
+ break
148
+ time.sleep(1)#retry_delay)
149
+ retry_count += 1
150
+
151
+ if json_resp["data"]["status"] == "failed":
152
+ print(f'failed resp: {json_resp}')
153
+ elif json_resp["data"]["status"] == "success":
154
+ # print(f'success resp: {json_resp}')
155
+ formatted_results = []
156
+ original_image = Image.open(image)
157
+ image_name = os.path.splitext(os.path.basename(image))[0]
158
+
159
+ object_counts = {}
160
+
161
+ for result in json_resp['data']['result']['objects']:
162
+ box = tuple(result["bbox"])
163
+ try:
164
+ box = [int(x) for x in box]
165
+ except:
166
+ continue
167
+ label = result["category"]
168
+ score = round(result["score"], 2)
169
+ if label.endswith("."):
170
+ label = label[:-1]
171
+
172
+ object_counts[label] = object_counts.get(label, 0) + 1
173
+ index = object_counts[label]
174
+
175
+ save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
176
+
177
+ formatted_results.append({
178
+ "label": label,
179
+ "confidence score": score,
180
+ "box": box,
181
+ "saved_image_path": save_path
182
+ })
183
+
184
+ return formatted_results
185
+ else:
186
+ print(f'get task resp: {resp.status_code} - {resp.text}')
187
+ else:
188
+ print(f'Error: {resp.status_code} - {resp.text}')
189
+
190
+ print(f"Failed to detect objects after {max_retries} attempts.")
191
+ return []
192
+
193
+ def get_metadata(self):
194
+ metadata = super().get_metadata()
195
+ return metadata
196
+
197
+ if __name__ == "__main__":
198
+ # Test command:
199
+ """
200
+ Run the following commands in the terminal to test the script:
201
+
202
+ cd octotools/tools/advanced_object_detector
203
+ python tool.py
204
+ """
205
+
206
+ # Get the directory of the current script
207
+ script_dir = os.path.dirname(os.path.abspath(__file__))
208
+
209
+ # Example usage of the Object_Detector_Tool
210
+ tool = Advanced_Object_Detector_Tool()
211
+ tool.set_custom_output_dir("detected_objects")
212
+
213
+ # Get tool metadata
214
+ metadata = tool.get_metadata()
215
+ # print(metadata)
216
+
217
+ # Construct the full path to the image using the script's directory
218
+ relative_image_path = "examples/baseball.png"
219
+ image_path = os.path.join(script_dir, relative_image_path)
220
+
221
+ import json
222
+
223
+ # Execute the tool
224
+ try:
225
+ execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
226
+ print(json.dumps(execution, indent=4))
227
+ print("Detected Objects:")
228
+ for obj in execution:
229
+ print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
230
+ print(f"Bounding box: {obj['box']}")
231
+ print(f"Saved image (with padding): {obj['saved_image_path']}")
232
+ print()
233
+ except ValueError as e:
234
+ print(f"Execution failed: {e}")
235
+
236
+ print("Done!")
octotools/tools/arxiv_paper_searcher/__init__.py ADDED
File without changes
octotools/tools/arxiv_paper_searcher/test.log ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ArXiv Search Tool Test
2
+ Tool Metadata:
3
+ {'tool_name': 'ArXiv_Paper_Searcher_Tool', 'tool_description': 'A tool that searches arXiv for papers based on a given query.', 'tool_version': '1.0.0', 'input_types': {'query': 'str - The search query for arXiv papers.', 'size': 'int - The number of results per page (25, 50, 100, or 200). If None, use 25.', 'max_results': 'int - The maximum number of papers to return (default: 25). Should be less than or equal to 100.'}, 'output_type': 'list - A list of dictionaries containing paper information.', 'demo_commands': [{'command': 'execution = tool.execute(query="tool agents with large language models")', 'description': 'Search for papers about tool agents with large language models.'}, {'command': 'execution = tool.execute(query="quantum computing", size=100, max_results=50)', 'description': 'Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers.'}, {'command': 'execution = tool.execute(query="machine learning", max_results=75)', 'description': 'Search for machine learning papers, returning a maximum of 75 papers.'}], 'require_llm_engine': False, 'user_metadata': {'valid_sizes': [25, 50, 100, 200], 'base_url': 'https://arxiv.org/search/'}}
4
+
5
+ ==>> Execution:
6
+ [
7
+ {
8
+ "title": "Position: Multimodal Large Language Models Can Significantly Advance Scientific Reasoning",
9
+ "authors": "Yibo Yan, Shen Wang, Jiahao Huo, Jingheng Ye, Zhendong Chu, Xuming Hu, Philip S. Yu, Carla Gomes, Bart Selman, Qingsong Wen",
10
+ "abstract": "Scientific reasoning, the process through which humans apply logic, evidence, and critical thinking to explore and interpret scientific phenomena, is essential in advancing knowledge reasoning across diverse fields. However, despite significant progress, current scientific reasoning models still struggle with generalization across domains and often fall short of multimodal perception. Multimodal Large Language Models (MLLMs), which integrate text, images, and other modalities, present an exciting opportunity to overcome these limitations and enhance scientific reasoning. Therefore, this position paper argues that MLLMs can significantly advance scientific reasoning across disciplines such as mathematics, physics, chemistry, and biology. First, we propose a four-stage research roadmap of scientific reasoning capabilities, and highlight the current state of MLLM applications in scientific reasoning, noting their ability to integrate and reason over diverse data types. Second, we summarize the key challenges that remain obstacles to achieving MLLM's full potential. To address these challenges, we propose actionable insights and suggestions for the future. Overall, our work offers a novel perspective on MLLM integration with scientific reasoning, providing the LLM community with a valuable vision for achieving Artificial General Intelligence (AGI).",
11
+ "link": "https://arxiv.org/abs/2502.02871"
12
+ },
13
+ {
14
+ "title": "Adaptive Graph of Thoughts: Test-Time Adaptive Reasoning Unifying Chain, Tree, and Graph Structures",
15
+ "authors": "Tushar Pandey, Ara Ghukasyan, Oktay Goktas, Santosh Kumar Radha",
16
+ "abstract": "Large Language Models (LLMs) have demonstrated impressive reasoning capabilities, yet their performance is highly dependent on the prompting strategy and model scale. While reinforcement learning and fine-tuning have been deployed to boost reasoning, these approaches incur substantial computational and data overhead. In this work, we introduce Adaptive Graph of Thoughts (AGoT), a dynamic, graph-based inference framework that enhances LLM reasoning solely at test time. Rather than relying on fixed-step methods like Chain of Thought (CoT) or Tree of Thoughts (ToT), AGoT recursively decomposes complex queries into structured subproblems, forming an dynamic directed acyclic graph (DAG) of interdependent reasoning steps. By selectively expanding only those subproblems that require further analysis, AGoT unifies the strengths of chain, tree, and graph paradigms into a cohesive framework that allocates computation where it is most needed. We validate our approach on diverse benchmarks spanning multi-hop retrieval, scientific reasoning, and mathematical problem-solving, achieving up to 46.2% improvement on scientific reasoning tasks (GPQA) - comparable to gains achieved through computationally intensive reinforcement learning approaches and outperforming state-of-the-art iterative approaches. These results suggest that dynamic decomposition and structured recursion offer a scalable, cost-effective alternative to post-training modifications, paving the way for more robust, general-purpose reasoning in LLMs.",
17
+ "link": "https://arxiv.org/abs/2502.05078"
18
+ },
19
+ {
20
+ "title": "VersaPRM: Multi-Domain Process Reward Model via Synthetic Reasoning Data",
21
+ "authors": "Thomas Zeng, Shuibai Zhang, Shutong Wu, Christian Classen, Daewon Chae, Ethan Ewer, Minjae Lee, Heeju Kim, Wonjun Kang, Jackson Kunde, Ying Fan, Jungtaek Kim, Hyung Il Koo, Kannan Ramchandran, Dimitris Papailiopoulos, Kangwook Lee",
22
+ "abstract": "Process Reward Models (PRMs) have proven effective at enhancing mathematical reasoning for Large Language Models (LLMs) by leveraging increased inference-time computation. However, they are predominantly trained on mathematical data and their generalizability to non-mathematical domains has not been rigorously studied. In response, this work first shows that current PRMs have poor performance in other domains. To address this limitation, we introduce VersaPRM, a multi-domain PRM trained on synthetic reasoning data generated using our novel data generation and annotation method. VersaPRM achieves consistent performance gains across diverse domains. For instance, in the MMLU-Pro category of Law, VersaPRM via weighted majority voting, achieves a 7.9% performance gain over the majority voting baseline -- surpassing Qwen2.5-Math-PRM's gain of 1.3%. We further contribute to the community by open-sourcing all data, code and models for VersaPRM.",
23
+ "link": "https://arxiv.org/abs/2502.06737"
24
+ },
25
+ {
26
+ "title": "Large Language Models for Multi-Robot Systems: A Survey",
27
+ "authors": "Peihan Li, Zijian An, Shams Abrar, Lifeng Zhou",
28
+ "abstract": "The rapid advancement of Large Language Models (LLMs) has opened new possibilities in Multi-Robot Systems (MRS), enabling enhanced communication, task planning, and human-robot interaction. Unlike traditional single-robot and multi-agent systems, MRS poses unique challenges, including coordination, scalability, and real-world adaptability. This survey provides the first comprehensive exploration of LLM integration into MRS. It systematically categorizes their applications across high-level task allocation, mid-level motion planning, low-level action generation, and human intervention. We highlight key applications in diverse domains, such as household robotics, construction, formation control, target tracking, and robot games, showcasing the versatility and transformative potential of LLMs in MRS. Furthermore, we examine the challenges that limit adapting LLMs in MRS, including mathematical reasoning limitations, hallucination, latency issues, and the need for robust benchmarking systems. Finally, we outline opportunities for future research, emphasizing advancements in fine-tuning, reasoning techniques, and task-specific models. This survey aims to guide researchers in the intelligence and real-world deployment of MRS powered by LLMs. Based on the fast-evolving nature of research in the field, we keep updating the papers in the open-source Github repository.",
29
+ "link": "https://arxiv.org/abs/2502.03814"
30
+ },
31
+ {
32
+ "title": "MergeME: Model Merging Techniques for Homogeneous and Heterogeneous MoEs",
33
+ "authors": "Yuhang Zhou, Giannis Karamanolakis, Victor Soto, Anna Rumshisky, Mayank Kulkarni, Furong Huang, Wei Ai, Jianhua Lu",
34
+ "abstract": "The recent success of specialized Large Language Models (LLMs) in domains such as mathematical reasoning and coding has led to growing interest in methods for merging these expert LLMs into a unified Mixture-of-Experts (MoE) model, with the goal of enhancing performance in each domain while retaining effectiveness on general tasks. However, the effective merging of expert models remains an open challenge, especially for models with highly divergent weight parameters or different architectures. State-of-the-art MoE merging methods only work with homogeneous model architectures and rely on simple unweighted averaging to merge expert layers, which does not address parameter interference and requires extensive fine-tuning of the merged MoE to restore performance. To address these limitations, this paper introduces new MoE merging techniques, including strategies to mitigate parameter interference, routing heuristics to reduce the need for MoE fine-tuning, and a novel method for merging experts with different architectures. Extensive experiments across multiple domains demonstrate the effectiveness of our proposed methods, reducing fine-tuning costs, improving performance over state-of-the-art methods, and expanding the applicability of MoE merging.",
35
+ "link": "https://arxiv.org/abs/2502.00997"
36
+ },
37
+ {
38
+ "title": "Satori: Reinforcement Learning with Chain-of-Action-Thought Enhances LLM Reasoning via Autoregressive Search",
39
+ "authors": "Maohao Shen, Guangtao Zeng, Zhenting Qi, Zhang-Wei Hong, Zhenfang Chen, Wei Lu, Gregory Wornell, Subhro Das, David Cox, Chuang Gan",
40
+ "abstract": "Large language models (LLMs) have demonstrated remarkable reasoning capabilities across diverse domains. Recent studies have shown that increasing test-time computation enhances LLMs' reasoning capabilities. This typically involves extensive sampling at inference time guided by an external LLM verifier, resulting in a two-player system. Despite external guidance, the effectiveness of this system demonstrates the potential of a single LLM to tackle complex tasks. Thus, we pose a new research problem: Can we internalize the searching capabilities to fundamentally enhance the reasoning abilities of a single LLM? This work explores an orthogonal direction focusing on post-training LLMs for autoregressive searching (i.e., an extended reasoning process with self-reflection and self-exploration of new strategies). To achieve this, we propose the Chain-of-Action-Thought (COAT) reasoning and a two-stage training paradigm: 1) a small-scale format tuning stage to internalize the COAT reasoning format and 2) a large-scale self-improvement stage leveraging reinforcement learning. Our approach results in Satori, a 7B LLM trained on open-source models and data. Extensive empirical evaluations demonstrate that Satori achieves state-of-the-art performance on mathematical reasoning benchmarks while exhibits strong generalization to out-of-domain tasks. Code, data, and models will be fully open-sourced.",
41
+ "link": "https://arxiv.org/abs/2502.02508"
42
+ },
43
+ {
44
+ "title": "Reasoning-as-Logic-Units: Scaling Test-Time Reasoning in Large Language Models Through Logic Unit Alignment",
45
+ "authors": "Cheryl Li, Tianyuan Xu, Yiwen Guo",
46
+ "abstract": "Chain-of-Thought (CoT) prompting has shown promise in enhancing the reasoning capabilities of large language models (LLMs) by generating natural language (NL) rationales that lead to the final answer. However, it struggles with numerical computation, which has somehow led to the development of program-aided techniques. Despite their potential, a persistent challenge remains: inconsistencies between LLM-reported reasoning steps and the logic in generated programs, which we term ``reasoning hallucinations.\" This stems from the inherent ambiguities of NL and the statistical nature of LLMs, which often lack rigorous logical coherence. To address this challenge, we propose a novel test-time scaling framework, Reasoning-as-Logic-Units (RaLU), which constructs a more reliable reasoning path by aligning logical units between the generated program and their corresponding NL descriptions. By decomposing the initially generated program into discrete units using static analysis, RaLU engages in an iterative dialogue with the LLM to judge, refine, and explain each unit. A rewind-and-correct mechanism ensures alignment between code statements and task requirements in each unit, ultimately forming a cohesive reasoning path under the program's logic, from which the model reaches a final solution. Our experiments demonstrate that RaLU significantly outperforms existing baselines in mathematical reasoning (GSM8K, MATH) and algorithmic reasoning (HumanEval+, MBPP+), underscoring its potential to advance LLM reasoning and programming by offering enhanced accuracy and interpretability.",
47
+ "link": "https://arxiv.org/abs/2502.07803"
48
+ },
49
+ {
50
+ "title": "Premise-Augmented Reasoning Chains Improve Error Identification in Math reasoning with LLMs",
51
+ "authors": "Sagnik Mukherjee, Abhinav Chinta, Takyoung Kim, Tarun Anoop Sharma, Dilek Hakkani-T\u00fcr",
52
+ "abstract": "Chain-of-Thought (CoT) prompting enhances mathematical reasoning in large language models (LLMs) by enabling detailed step-by-step solutions. However, due to the verbosity of LLMs, the resulting reasoning chains can be long, making it harder to verify the reasoning steps and trace issues resulting from dependencies between the steps that may be farther away in the sequence of steps. Importantly, mathematical reasoning allows each step to be derived from a small set of premises, which are a subset of the preceding steps in the reasoning chain. In this paper, we present a framework that identifies the premises for each step, to improve the evaluation of reasoning. We restructure conventional linear reasoning chains into Premise Augmented Reasoning Chains (PARC) by introducing premise links, resulting in a directed acyclic graph where the nodes are the steps and the edges are the premise links. Through experiments with a PARC-based dataset that we built, namely PERL (Premises and ERrors identification in LLMs), we demonstrate that LLMs can reliably identify premises within complex reasoning chains. In particular, even open-source LLMs achieve 90% recall in premise identification. We also show that PARC helps to identify errors in reasoning chains more reliably. The accuracy of error identification improves by 6% to 16% absolute when step-by-step verification is carried out in PARC under the premises. Our findings highlight the utility of premise-centric representations in addressing complex problem-solving tasks and open new avenues for improving the reliability of LLM-based reasoning evaluations.",
53
+ "link": "https://arxiv.org/abs/2502.02362"
54
+ },
55
+ {
56
+ "title": "Advanced Weakly-Supervised Formula Exploration for Neuro-Symbolic Mathematical Reasoning",
57
+ "authors": "Yuxuan Wu, Hideki Nakayama",
58
+ "abstract": "In recent years, neuro-symbolic methods have become a popular and powerful approach that augments artificial intelligence systems with the capability to perform abstract, logical, and quantitative deductions with enhanced precision and controllability. Recent studies successfully performed symbolic reasoning by leveraging various machine learning models to explicitly or implicitly predict intermediate labels that provide symbolic instructions. However, these intermediate labels are not always prepared for every task as a part of training data, and pre-trained models, represented by Large Language Models (LLMs), also do not consistently generate valid symbolic instructions with their intrinsic knowledge. On the other hand, existing work developed alternative learning techniques that allow the learning system to autonomously uncover optimal symbolic instructions. Nevertheless, their performance also exhibits limitations when faced with relatively huge search spaces or more challenging reasoning problems. In view of this, in this work, we put forward an advanced practice for neuro-symbolic reasoning systems to explore the intermediate labels with weak supervision from problem inputs and final outputs. Our experiments on the Mathematics dataset illustrated the effectiveness of our proposals from multiple aspects.",
59
+ "link": "https://arxiv.org/abs/2502.00629"
60
+ },
61
+ {
62
+ "title": "ARIES: Stimulating Self-Refinement of Large Language Models by Iterative Preference Optimization",
63
+ "authors": "Yongcheng Zeng, Xinyu Cui, Xuanfa Jin, Guoqing Liu, Zexu Sun, Quan He, Dong Li, Ning Yang, Jianye Hao, Haifeng Zhang, Jun Wang",
64
+ "abstract": "A truly intelligent Large Language Model (LLM) should be capable of correcting errors in its responses through external interactions. However, even the most advanced models often face challenges in improving their outputs. In this paper, we explore how to cultivate LLMs with the self-refinement capability through iterative preference training, and how this ability can be leveraged to improve model performance during inference. To this end, we introduce a novel post-training and inference framework, called ARIES: Adaptive Refinement and Iterative Enhancement Structure. This method iteratively performs preference training and self-refinement-based data collection. During training, ARIES strengthen the model's direct question-answering capability while simultaneously unlocking its self-refinement potential. During inference, ARIES harnesses this self-refinement capability to generate a series of progressively refined responses, which are then filtered using either the Reward Model Scoring or a simple yet effective Rule-Based Selection mechanism, specifically tailored to our approach, to construct a dataset for the next round of preference training. Experimental results demonstrate the remarkable performance of ARIES. When applied to the Llama-3.1-8B model and under the self-refinement setting, ARIES surpasses powerful models such as GPT-4o, achieving 62.3% length-controlled (LC) and a 63.3% raw win rates on AlpacaEval 2, outperforming Iterative DPO by 27.8% and 35.5% respectively, as well as a 50.3% win rate on Arena-Hard, surpassing Iterative DPO by 26.6%. Furthermore, ARIES consistently enhances performance on mathematical reasoning tasks like GSM8K and MATH.",
65
+ "link": "https://arxiv.org/abs/2502.05605"
66
+ }
67
+ ]
68
+
69
+ ==>> Search Results:
70
+ 1. Position: Multimodal Large Language Models Can Significantly Advance Scientific Reasoning
71
+ Authors: Yibo Yan, Shen Wang, Jiahao Huo, Jingheng Ye, Zhendong Chu, Xuming Hu, Philip S. Yu, Carla Gomes, Bart Selman, Qingsong Wen
72
+ Abstract: Scientific reasoning, the process through which humans apply logic, evidence, and critical thinking to explore and interpret scientific phenomena, is essential in advancing knowledge reasoning across diverse fields. However, despite significant progress, current scientific reasoning models still struggle with generalization across domains and often fall short of multimodal perception. Multimodal Large Language Models (MLLMs), which integrate text, images, and other modalities, present an exciting opportunity to overcome these limitations and enhance scientific reasoning. Therefore, this position paper argues that MLLMs can significantly advance scientific reasoning across disciplines such as mathematics, physics, chemistry, and biology. First, we propose a four-stage research roadmap of scientific reasoning capabilities, and highlight the current state of MLLM applications in scientific reasoning, noting their ability to integrate and reason over diverse data types. Second, we summarize the key challenges that remain obstacles to achieving MLLM's full potential. To address these challenges, we propose actionable insights and suggestions for the future. Overall, our work offers a novel perspective on MLLM integration with scientific reasoning, providing the LLM community with a valuable vision for achieving Artificial General Intelligence (AGI).
73
+ Link: https://arxiv.org/abs/2502.02871
74
+
75
+ 2. Adaptive Graph of Thoughts: Test-Time Adaptive Reasoning Unifying Chain, Tree, and Graph Structures
76
+ Authors: Tushar Pandey, Ara Ghukasyan, Oktay Goktas, Santosh Kumar Radha
77
+ Abstract: Large Language Models (LLMs) have demonstrated impressive reasoning capabilities, yet their performance is highly dependent on the prompting strategy and model scale. While reinforcement learning and fine-tuning have been deployed to boost reasoning, these approaches incur substantial computational and data overhead. In this work, we introduce Adaptive Graph of Thoughts (AGoT), a dynamic, graph-based inference framework that enhances LLM reasoning solely at test time. Rather than relying on fixed-step methods like Chain of Thought (CoT) or Tree of Thoughts (ToT), AGoT recursively decomposes complex queries into structured subproblems, forming an dynamic directed acyclic graph (DAG) of interdependent reasoning steps. By selectively expanding only those subproblems that require further analysis, AGoT unifies the strengths of chain, tree, and graph paradigms into a cohesive framework that allocates computation where it is most needed. We validate our approach on diverse benchmarks spanning multi-hop retrieval, scientific reasoning, and mathematical problem-solving, achieving up to 46.2% improvement on scientific reasoning tasks (GPQA) - comparable to gains achieved through computationally intensive reinforcement learning approaches and outperforming state-of-the-art iterative approaches. These results suggest that dynamic decomposition and structured recursion offer a scalable, cost-effective alternative to post-training modifications, paving the way for more robust, general-purpose reasoning in LLMs.
78
+ Link: https://arxiv.org/abs/2502.05078
79
+
80
+ 3. VersaPRM: Multi-Domain Process Reward Model via Synthetic Reasoning Data
81
+ Authors: Thomas Zeng, Shuibai Zhang, Shutong Wu, Christian Classen, Daewon Chae, Ethan Ewer, Minjae Lee, Heeju Kim, Wonjun Kang, Jackson Kunde, Ying Fan, Jungtaek Kim, Hyung Il Koo, Kannan Ramchandran, Dimitris Papailiopoulos, Kangwook Lee
82
+ Abstract: Process Reward Models (PRMs) have proven effective at enhancing mathematical reasoning for Large Language Models (LLMs) by leveraging increased inference-time computation. However, they are predominantly trained on mathematical data and their generalizability to non-mathematical domains has not been rigorously studied. In response, this work first shows that current PRMs have poor performance in other domains. To address this limitation, we introduce VersaPRM, a multi-domain PRM trained on synthetic reasoning data generated using our novel data generation and annotation method. VersaPRM achieves consistent performance gains across diverse domains. For instance, in the MMLU-Pro category of Law, VersaPRM via weighted majority voting, achieves a 7.9% performance gain over the majority voting baseline -- surpassing Qwen2.5-Math-PRM's gain of 1.3%. We further contribute to the community by open-sourcing all data, code and models for VersaPRM.
83
+ Link: https://arxiv.org/abs/2502.06737
84
+
85
+ 4. Large Language Models for Multi-Robot Systems: A Survey
86
+ Authors: Peihan Li, Zijian An, Shams Abrar, Lifeng Zhou
87
+ Abstract: The rapid advancement of Large Language Models (LLMs) has opened new possibilities in Multi-Robot Systems (MRS), enabling enhanced communication, task planning, and human-robot interaction. Unlike traditional single-robot and multi-agent systems, MRS poses unique challenges, including coordination, scalability, and real-world adaptability. This survey provides the first comprehensive exploration of LLM integration into MRS. It systematically categorizes their applications across high-level task allocation, mid-level motion planning, low-level action generation, and human intervention. We highlight key applications in diverse domains, such as household robotics, construction, formation control, target tracking, and robot games, showcasing the versatility and transformative potential of LLMs in MRS. Furthermore, we examine the challenges that limit adapting LLMs in MRS, including mathematical reasoning limitations, hallucination, latency issues, and the need for robust benchmarking systems. Finally, we outline opportunities for future research, emphasizing advancements in fine-tuning, reasoning techniques, and task-specific models. This survey aims to guide researchers in the intelligence and real-world deployment of MRS powered by LLMs. Based on the fast-evolving nature of research in the field, we keep updating the papers in the open-source Github repository.
88
+ Link: https://arxiv.org/abs/2502.03814
89
+
90
+ 5. MergeME: Model Merging Techniques for Homogeneous and Heterogeneous MoEs
91
+ Authors: Yuhang Zhou, Giannis Karamanolakis, Victor Soto, Anna Rumshisky, Mayank Kulkarni, Furong Huang, Wei Ai, Jianhua Lu
92
+ Abstract: The recent success of specialized Large Language Models (LLMs) in domains such as mathematical reasoning and coding has led to growing interest in methods for merging these expert LLMs into a unified Mixture-of-Experts (MoE) model, with the goal of enhancing performance in each domain while retaining effectiveness on general tasks. However, the effective merging of expert models remains an open challenge, especially for models with highly divergent weight parameters or different architectures. State-of-the-art MoE merging methods only work with homogeneous model architectures and rely on simple unweighted averaging to merge expert layers, which does not address parameter interference and requires extensive fine-tuning of the merged MoE to restore performance. To address these limitations, this paper introduces new MoE merging techniques, including strategies to mitigate parameter interference, routing heuristics to reduce the need for MoE fine-tuning, and a novel method for merging experts with different architectures. Extensive experiments across multiple domains demonstrate the effectiveness of our proposed methods, reducing fine-tuning costs, improving performance over state-of-the-art methods, and expanding the applicability of MoE merging.
93
+ Link: https://arxiv.org/abs/2502.00997
94
+
95
+ 6. Satori: Reinforcement Learning with Chain-of-Action-Thought Enhances LLM Reasoning via Autoregressive Search
96
+ Authors: Maohao Shen, Guangtao Zeng, Zhenting Qi, Zhang-Wei Hong, Zhenfang Chen, Wei Lu, Gregory Wornell, Subhro Das, David Cox, Chuang Gan
97
+ Abstract: Large language models (LLMs) have demonstrated remarkable reasoning capabilities across diverse domains. Recent studies have shown that increasing test-time computation enhances LLMs' reasoning capabilities. This typically involves extensive sampling at inference time guided by an external LLM verifier, resulting in a two-player system. Despite external guidance, the effectiveness of this system demonstrates the potential of a single LLM to tackle complex tasks. Thus, we pose a new research problem: Can we internalize the searching capabilities to fundamentally enhance the reasoning abilities of a single LLM? This work explores an orthogonal direction focusing on post-training LLMs for autoregressive searching (i.e., an extended reasoning process with self-reflection and self-exploration of new strategies). To achieve this, we propose the Chain-of-Action-Thought (COAT) reasoning and a two-stage training paradigm: 1) a small-scale format tuning stage to internalize the COAT reasoning format and 2) a large-scale self-improvement stage leveraging reinforcement learning. Our approach results in Satori, a 7B LLM trained on open-source models and data. Extensive empirical evaluations demonstrate that Satori achieves state-of-the-art performance on mathematical reasoning benchmarks while exhibits strong generalization to out-of-domain tasks. Code, data, and models will be fully open-sourced.
98
+ Link: https://arxiv.org/abs/2502.02508
99
+
100
+ 7. Reasoning-as-Logic-Units: Scaling Test-Time Reasoning in Large Language Models Through Logic Unit Alignment
101
+ Authors: Cheryl Li, Tianyuan Xu, Yiwen Guo
102
+ Abstract: Chain-of-Thought (CoT) prompting has shown promise in enhancing the reasoning capabilities of large language models (LLMs) by generating natural language (NL) rationales that lead to the final answer. However, it struggles with numerical computation, which has somehow led to the development of program-aided techniques. Despite their potential, a persistent challenge remains: inconsistencies between LLM-reported reasoning steps and the logic in generated programs, which we term ``reasoning hallucinations." This stems from the inherent ambiguities of NL and the statistical nature of LLMs, which often lack rigorous logical coherence. To address this challenge, we propose a novel test-time scaling framework, Reasoning-as-Logic-Units (RaLU), which constructs a more reliable reasoning path by aligning logical units between the generated program and their corresponding NL descriptions. By decomposing the initially generated program into discrete units using static analysis, RaLU engages in an iterative dialogue with the LLM to judge, refine, and explain each unit. A rewind-and-correct mechanism ensures alignment between code statements and task requirements in each unit, ultimately forming a cohesive reasoning path under the program's logic, from which the model reaches a final solution. Our experiments demonstrate that RaLU significantly outperforms existing baselines in mathematical reasoning (GSM8K, MATH) and algorithmic reasoning (HumanEval+, MBPP+), underscoring its potential to advance LLM reasoning and programming by offering enhanced accuracy and interpretability.
103
+ Link: https://arxiv.org/abs/2502.07803
104
+
105
+ 8. Premise-Augmented Reasoning Chains Improve Error Identification in Math reasoning with LLMs
106
+ Authors: Sagnik Mukherjee, Abhinav Chinta, Takyoung Kim, Tarun Anoop Sharma, Dilek Hakkani-TΓΌr
107
+ Abstract: Chain-of-Thought (CoT) prompting enhances mathematical reasoning in large language models (LLMs) by enabling detailed step-by-step solutions. However, due to the verbosity of LLMs, the resulting reasoning chains can be long, making it harder to verify the reasoning steps and trace issues resulting from dependencies between the steps that may be farther away in the sequence of steps. Importantly, mathematical reasoning allows each step to be derived from a small set of premises, which are a subset of the preceding steps in the reasoning chain. In this paper, we present a framework that identifies the premises for each step, to improve the evaluation of reasoning. We restructure conventional linear reasoning chains into Premise Augmented Reasoning Chains (PARC) by introducing premise links, resulting in a directed acyclic graph where the nodes are the steps and the edges are the premise links. Through experiments with a PARC-based dataset that we built, namely PERL (Premises and ERrors identification in LLMs), we demonstrate that LLMs can reliably identify premises within complex reasoning chains. In particular, even open-source LLMs achieve 90% recall in premise identification. We also show that PARC helps to identify errors in reasoning chains more reliably. The accuracy of error identification improves by 6% to 16% absolute when step-by-step verification is carried out in PARC under the premises. Our findings highlight the utility of premise-centric representations in addressing complex problem-solving tasks and open new avenues for improving the reliability of LLM-based reasoning evaluations.
108
+ Link: https://arxiv.org/abs/2502.02362
109
+
110
+ 9. Advanced Weakly-Supervised Formula Exploration for Neuro-Symbolic Mathematical Reasoning
111
+ Authors: Yuxuan Wu, Hideki Nakayama
112
+ Abstract: In recent years, neuro-symbolic methods have become a popular and powerful approach that augments artificial intelligence systems with the capability to perform abstract, logical, and quantitative deductions with enhanced precision and controllability. Recent studies successfully performed symbolic reasoning by leveraging various machine learning models to explicitly or implicitly predict intermediate labels that provide symbolic instructions. However, these intermediate labels are not always prepared for every task as a part of training data, and pre-trained models, represented by Large Language Models (LLMs), also do not consistently generate valid symbolic instructions with their intrinsic knowledge. On the other hand, existing work developed alternative learning techniques that allow the learning system to autonomously uncover optimal symbolic instructions. Nevertheless, their performance also exhibits limitations when faced with relatively huge search spaces or more challenging reasoning problems. In view of this, in this work, we put forward an advanced practice for neuro-symbolic reasoning systems to explore the intermediate labels with weak supervision from problem inputs and final outputs. Our experiments on the Mathematics dataset illustrated the effectiveness of our proposals from multiple aspects.
113
+ Link: https://arxiv.org/abs/2502.00629
114
+
115
+ 10. ARIES: Stimulating Self-Refinement of Large Language Models by Iterative Preference Optimization
116
+ Authors: Yongcheng Zeng, Xinyu Cui, Xuanfa Jin, Guoqing Liu, Zexu Sun, Quan He, Dong Li, Ning Yang, Jianye Hao, Haifeng Zhang, Jun Wang
117
+ Abstract: A truly intelligent Large Language Model (LLM) should be capable of correcting errors in its responses through external interactions. However, even the most advanced models often face challenges in improving their outputs. In this paper, we explore how to cultivate LLMs with the self-refinement capability through iterative preference training, and how this ability can be leveraged to improve model performance during inference. To this end, we introduce a novel post-training and inference framework, called ARIES: Adaptive Refinement and Iterative Enhancement Structure. This method iteratively performs preference training and self-refinement-based data collection. During training, ARIES strengthen the model's direct question-answering capability while simultaneously unlocking its self-refinement potential. During inference, ARIES harnesses this self-refinement capability to generate a series of progressively refined responses, which are then filtered using either the Reward Model Scoring or a simple yet effective Rule-Based Selection mechanism, specifically tailored to our approach, to construct a dataset for the next round of preference training. Experimental results demonstrate the remarkable performance of ARIES. When applied to the Llama-3.1-8B model and under the self-refinement setting, ARIES surpasses powerful models such as GPT-4o, achieving 62.3% length-controlled (LC) and a 63.3% raw win rates on AlpacaEval 2, outperforming Iterative DPO by 27.8% and 35.5% respectively, as well as a 50.3% win rate on Arena-Hard, surpassing Iterative DPO by 26.6%. Furthermore, ARIES consistently enhances performance on mathematical reasoning tasks like GSM8K and MATH.
118
+ Link: https://arxiv.org/abs/2502.05605
119
+
120
+ Done!
octotools/tools/arxiv_paper_searcher/tool.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ from octotools.tools.base import BaseTool
6
+
7
+ class ArXiv_Paper_Searcher_Tool(BaseTool):
8
+ def __init__(self):
9
+ super().__init__(
10
+ tool_name="ArXiv_Paper_Searcher_Tool",
11
+ tool_description="A tool that searches arXiv for papers based on a given query.",
12
+ tool_version="1.0.0",
13
+ input_types={
14
+ "query": "str - The search query for arXiv papers.",
15
+ "size": "int - The number of results per page (25, 50, 100, or 200). If None, use 25.",
16
+ "max_results": "int - The maximum number of papers to return (default: 25). Should be less than or equal to 100."
17
+ },
18
+ output_type="list - A list of dictionaries containing paper information.",
19
+ demo_commands=[
20
+ {
21
+ "command": 'execution = tool.execute(query="tool agents with large language models")',
22
+ "description": "Search for papers about tool agents with large language models."
23
+ },
24
+ {
25
+ "command": 'execution = tool.execute(query="quantum computing", size=100, max_results=50)',
26
+ "description": "Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers."
27
+ },
28
+ {
29
+ "command": 'execution = tool.execute(query="machine learning", max_results=75)',
30
+ "description": "Search for machine learning papers, returning a maximum of 75 papers."
31
+ },
32
+ ],
33
+ user_metadata={
34
+ "valid_sizes": [25, 50, 100, 200],
35
+ "base_url": "https://arxiv.org/search/"
36
+ }
37
+ )
38
+
39
+ def build_tool(self):
40
+ """
41
+ No specific build required for this tool.
42
+ """
43
+ pass
44
+
45
+ def execute(self, query, size=None, max_results=25):
46
+ """
47
+ Executes the arXiv search tool to find papers based on the given query.
48
+
49
+ Parameters:
50
+ query (str): The search query for arXiv papers.
51
+ size (int): The number of results per page.
52
+ max_results (int): The maximum number of papers to return.
53
+
54
+ Returns:
55
+ list: A list of dictionaries containing paper information.
56
+ """
57
+ valid_sizes = self.user_metadata["valid_sizes"]
58
+ base_url = self.user_metadata["base_url"]
59
+
60
+ if size is None:
61
+ size = 25
62
+ elif size not in valid_sizes:
63
+ size = min(valid_sizes, key=lambda x: abs(x - size))
64
+
65
+ results = []
66
+ start = 0
67
+
68
+ max_results = min(max_results, 100) # NOTE: For traffic reasons, limit to 100 results
69
+
70
+ while len(results) < max_results:
71
+ params = {
72
+ "searchtype": "all",
73
+ "query": query,
74
+ "abstracts": "show",
75
+ "order": "",
76
+ "size": str(size),
77
+ "start": str(start)
78
+ }
79
+
80
+ try:
81
+ response = requests.get(base_url, params=params)
82
+ soup = BeautifulSoup(response.content, 'html.parser')
83
+
84
+ papers = soup.find_all("li", class_="arxiv-result")
85
+ if not papers:
86
+ break
87
+
88
+ for paper in papers:
89
+ if len(results) >= max_results:
90
+ break
91
+
92
+ title = paper.find("p", class_="title").text.strip()
93
+ authors = paper.find("p", class_="authors").text.strip()
94
+ authors = re.sub(r'^Authors:\s*', '', authors)
95
+ authors = re.sub(r'\s+', ' ', authors).strip()
96
+
97
+ abstract = paper.find("span", class_="abstract-full").text.strip()
98
+ abstract = abstract.replace("β–³ Less", "").strip()
99
+
100
+ link = paper.find("p", class_="list-title").find("a")["href"]
101
+
102
+ results.append({
103
+ "title": title,
104
+ "authors": authors,
105
+ "abstract": abstract,
106
+ "link": f"{link}"
107
+ })
108
+
109
+ start += size
110
+
111
+ except Exception as e:
112
+ print(f"Error searching arXiv: {e}")
113
+ break
114
+
115
+ return results[:max_results]
116
+
117
+ def get_metadata(self):
118
+ """
119
+ Returns the metadata for the ArXiv_Paper_Searcher_Tool.
120
+
121
+ Returns:
122
+ dict: A dictionary containing the tool's metadata.
123
+ """
124
+ metadata = super().get_metadata()
125
+ return metadata
126
+
127
+ if __name__ == "__main__":
128
+ # Test command:
129
+ """
130
+ Run the following commands in the terminal to test the script:
131
+
132
+ cd octotools/tools/arxiv_paper_searcher
133
+ python tool.py
134
+ """
135
+
136
+ import json
137
+
138
+ print("ArXiv Search Tool Test")
139
+
140
+ # Example usage of the ArXiv_Paper_Searcher_Tool
141
+ tool = ArXiv_Paper_Searcher_Tool()
142
+
143
+ # Get tool metadata
144
+ metadata = tool.get_metadata()
145
+ print("Tool Metadata:")
146
+ print(metadata)
147
+
148
+ # Sample query for searching arXiv
149
+ query = "enhance mathematical reasoning with large language models"
150
+ # Execute the tool
151
+ try:
152
+ execution = tool.execute(query=query, size=50, max_results=10)
153
+ print("\n==>> Execution:")
154
+ print(json.dumps(execution, indent=4)) # Pretty print JSON
155
+ print("\n==>> Search Results:")
156
+ for i, paper in enumerate(execution, 1):
157
+ print(f"{i}. {paper['title']}")
158
+ print(f" Authors: {paper['authors']}")
159
+ print(f" Abstract: {paper['abstract'][:2000]}")
160
+ print(f" Link: {paper['link']}")
161
+ print()
162
+ except Exception as e:
163
+ print(f"Execution failed: {e}")
164
+
165
+ print("Done!")
{opentools β†’ octotools}/tools/base.py RENAMED
@@ -1,6 +1,6 @@
1
- # opentools/tools/base.py
2
 
3
- from opentools.engine.openai import ChatOpenAI
4
 
5
  class BaseTool:
6
  """
 
1
+ # octotools/tools/base.py
2
 
3
+ from octotools.engine.openai import ChatOpenAI
4
 
5
  class BaseTool:
6
  """
{opentools β†’ octotools}/tools/generalist_solution_generator/tool.py RENAMED
@@ -1,6 +1,6 @@
1
  import os
2
- from opentools.tools.base import BaseTool
3
- from opentools.engine.openai import ChatOpenAI
4
 
5
  class Generalist_Solution_Generator_Tool(BaseTool):
6
  require_llm_engine = True
@@ -109,7 +109,7 @@ if __name__ == "__main__":
109
  """
110
  Run the following commands in the terminal to test the script:
111
 
112
- cd opentools
113
  python tools/default/tool.py
114
  """
115
 
 
1
  import os
2
+ from octotools.tools.base import BaseTool
3
+ from octotools.engine.openai import ChatOpenAI
4
 
5
  class Generalist_Solution_Generator_Tool(BaseTool):
6
  require_llm_engine = True
 
109
  """
110
  Run the following commands in the terminal to test the script:
111
 
112
+ cd octotools
113
  python tools/default/tool.py
114
  """
115
 
octotools/tools/google_search/__init__.py ADDED
File without changes
octotools/tools/google_search/test.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'tool_name': 'Google_Search_Tool', 'tool_description': 'A tool that performs Google searches based on a given text query.', 'tool_version': '1.0.0', 'input_types': {'query': 'str - The search query to be used for the Google search.', 'num_results': 'int - The number of search results to return (default: 10).'}, 'output_type': 'list - A list of dictionaries containing search result information.', 'demo_commands': [{'command': 'execution = tool.execute(query="Python programming")', 'description': "Perform a Google search for 'Python programming' and return the default number of results."}, {'command': 'execution = tool.execute(query="Machine learning tutorials", num_results=5)', 'description': "Perform a Google search for 'Machine learning tutorials' and return 5 results."}], 'require_llm_engine': False}
2
+ {'kind': 'customsearch#search', 'url': {'type': 'application/json', 'template': 'https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json'}, 'queries': {'request': [{'title': 'Google Custom Search - nobel prize winners in chemistry 2024', 'totalResults': '1020000', 'searchTerms': 'nobel prize winners in chemistry 2024', 'count': 5, 'startIndex': 1, 'inputEncoding': 'utf8', 'outputEncoding': 'utf8', 'safe': 'off', 'cx': 'd5bb3fdd4b7fd4cd9'}], 'nextPage': [{'title': 'Google Custom Search - nobel prize winners in chemistry 2024', 'totalResults': '1020000', 'searchTerms': 'nobel prize winners in chemistry 2024', 'count': 5, 'startIndex': 6, 'inputEncoding': 'utf8', 'outputEncoding': 'utf8', 'safe': 'off', 'cx': 'd5bb3fdd4b7fd4cd9'}]}, 'context': {'title': 'toolbox-dev-pan'}, 'searchInformation': {'searchTime': 0.285868, 'formattedSearchTime': '0.29', 'totalResults': '1020000', 'formattedTotalResults': '1,020,000'}, 'items': [{'kind': 'customsearch#result', 'title': 'The Nobel Prize in Chemistry 2024', 'htmlTitle': 'The <b>Nobel Prize</b> in <b>Chemistry 2024</b>', 'link': 'https://www.nobelprize.org/prizes/chemistry/', 'displayLink': 'www.nobelprize.org', 'snippet': "The Nobel Prize in Chemistry 2024 is about proteins, life's ingenious chemical tools. David Baker has succeeded with the almost impossible feat of building\xa0...", 'htmlSnippet': 'The <b>Nobel Prize</b> in <b>Chemistry 2024</b> is about proteins, life&#39;s ingenious <b>chemical</b> tools. David Baker has succeeded with the almost impossible feat of building&nbsp;...', 'formattedUrl': 'https://www.nobelprize.org/prizes/chemistry/', 'htmlFormattedUrl': 'https://www.<b>nobelprize</b>.org/<b>prize</b>s/<b>chemistry</b>/', 'pagemap': {'hcard': [{'fn': 'Ernest Rutherford', 'url': 'https://www.nobelprize.org/prizes/chemistry/1908/rutherford/'}, {'fn': 'Marie Curie, nΓ©e SkΕ‚odowska', 'url': 'https://www.nobelprize.org/prizes/physics/1903/marie-curie/'}, {'fn': 'Jacques Dubochet', 'url': 'https://www.nobelprize.org/prizes/chemistry/2017/dubochet/'}, {'fn': 'Dorothy Crowfoot Hodgkin', 'url': 'https://www.nobelprize.org/prizes/chemistry/1964/hodgkin/'}, {'fn': 'Linus Carl Pauling', 'url': 'https://www.nobelprize.org/prizes/chemistry/1954/pauling/'}, {'fn': 'Jean-Pierre Sauvage', 'url': 'https://www.nobelprize.org/prizes/chemistry/2016/sauvage/'}], 'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSOMs3quxSE4q1eWHocdDOr31q-aad13MSHgdUGUqpOG71sdtm6WGQ5BWAX&s', 'width': '276', 'height': '182'}], 'person': [{'name': 'Ernest Rutherford', 'url': 'Ernest Rutherford'}, {'name': 'Marie Curie, nΓ©e SkΕ‚odowska', 'url': 'Marie Curie, nΓ©e SkΕ‚odowska'}, {'name': 'Jacques Dubochet', 'url': 'Jacques Dubochet'}, {'name': 'Dorothy Crowfoot Hodgkin', 'url': 'Dorothy Crowfoot Hodgkin'}, {'name': 'Linus Carl Pauling', 'url': 'Linus Carl Pauling'}, {'name': 'Jean-Pierre Sauvage', 'url': 'Jean-Pierre Sauvage'}], 'organization': [{'url': 'https://www.nobelprize.org/'}, {'logo': 'Nobel Prize', 'url': 'Nobel Prize'}], 'metatags': [{'og:image': 'https://www.nobelprize.org/uploads/2018/09/New-will-fullsize-496x328.jpg', 'og:type': 'website', 'twitter:card': 'summary_large_image', 'twitter:title': 'Chemistry Prize', 'og:site_name': 'NobelPrize.org', 'og:title': 'Chemistry Prize', 'msapplication-tileimage': 'https://www.nobelprize.org/uploads/2018/08/Nobel-favicon.png', 'og:description': 'Chemistry Prize', 'twitter:image': 'https://www.nobelprize.org/uploads/2018/09/New-will-fullsize-496x328.jpg', 'viewport': 'width=device-width, initial-scale=1', 'twitter:description': 'Chemistry Prize', 'og:locale': 'en_US', 'og:url': 'https://www.nobelprize.org/chemistry-prize-2/'}], 'cse_image': [{'src': 'https://www.nobelprize.org/uploads/2018/09/New-will-fullsize-496x328.jpg'}], 'blogposting': [{'headline': 'They cracked the code for proteins’ amazing structures'}, {'headline': 'David Baker'}, {'headline': 'Demis Hassabis'}, {'headline': 'John Jumper'}, {'headline': 'The life of a chemist'}, {'headline': 'How many chemistry laureates can you match?'}, {'headline': 'What are the Nobel Prize categories?'}, {'headline': 'What did they discover?'}, {'headline': 'Who first predicted global warming?'}, {'headline': 'The world’s smallest machines'}, {'headline': 'Interview with a double awardee'}]}}, {'kind': 'customsearch#result', 'title': 'NSF congratulates laureates of the 2024 Nobel Prize in chemistry ...', 'htmlTitle': 'NSF congratulates <b>laureates</b> of the <b>2024 Nobel Prize</b> in <b>chemistry</b> ...', 'link': 'https://www.nsf.gov/news/nsf-congratulates-laureates-2024-nobel-prize-chemistry', 'displayLink': 'www.nsf.gov', 'snippet': 'Oct 9, 2024 ... The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize in\xa0...', 'htmlSnippet': 'Oct 9, 2024 <b>...</b> The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the <b>2024 Nobel Prize</b> in&nbsp;...', 'formattedUrl': 'https://www.nsf.gov/.../nsf-congratulates-laureates-2024-nobel-prize-chemi...', 'htmlFormattedUrl': 'https://www.nsf.gov/.../nsf-congratulates-laureates-<b>2024</b>-<b>nobel</b>-<b>prize</b>-<b>chemi</b>...', 'pagemap': {'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRH1B2HTt4h0BsKt4cqcthiUWk5GDU2hX6k7mP1EQh0glR9n13NnR2pLX4&s', 'width': '360', 'height': '140'}], 'metatags': [{'og:image': 'https://nsf-gov-resources.nsf.gov/styles/_inline_image_full_width/s3/media_hub/nobel24-chemistry-news-hero-David-Baker-Demis-Hassabis-John-Jumper.jpg?VersionId=8Wa1DsrhgMw.SQEBdxmUOgtdCntLkWDb&itok=NIPtuWh-', 'og:image:width': '800', 'og:image:alt': 'sketch portraits of three men', 'twitter:card': 'summary_large_image', 'twitter:title': 'NSF congratulates laureates of the 2024 Nobel Prize in chemistry', 'og:site_name': 'NSF - National Science Foundation', 'handheldfriendly': 'true', 'og:title': 'NSF congratulates laureates of the 2024 Nobel Prize in chemistry', 'og:image:height': '312', 'og:description': 'The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize in chemistry. Baker and his…', 'twitter:image': 'https://nsf-gov-resources.nsf.gov/styles/_inline_image_full_width/s3/media_hub/nobel24-chemistry-news-hero-David-Baker-Demis-Hassabis-John-Jumper.jpg?VersionId=8Wa1DsrhgMw.SQEBdxmUOgtdCntLkWDb&itok=NIPtuWh-', 'twitter:image:alt': 'sketch portraits of three men', 'twitter:site': '@NSF', 'viewport': 'width=device-width, initial-scale=1.0', 'twitter:description': 'The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize in chemistry. Baker and his…', 'mobileoptimized': 'width', 'og:url': 'https://www.nsf.gov/news/nsf-congratulates-laureates-2024-nobel-prize-chemistry'}], 'cse_image': [{'src': 'https://nsf-gov-resources.nsf.gov/styles/_inline_image_full_width/s3/media_hub/nobel24-chemistry-news-hero-David-Baker-Demis-Hassabis-John-Jumper.jpg?VersionId=8Wa1DsrhgMw.SQEBdxmUOgtdCntLkWDb&itok=NIPtuWh-'}]}}, {'kind': 'customsearch#result', 'title': 'Press release: The Nobel Prize in Chemistry 2024 - NobelPrize.org', 'htmlTitle': 'Press release: The <b>Nobel Prize</b> in <b>Chemistry 2024</b> - <b>NobelPrize</b>.org', 'link': 'https://www.nobelprize.org/prizes/chemistry/2024/press-release/', 'displayLink': 'www.nobelprize.org', 'snippet': 'Oct 9, 2024 ... David Baker has succeeded with the almost impossible feat of building entirely new kinds of proteins. Demis Hassabis and John Jumper have\xa0...', 'htmlSnippet': 'Oct 9, 2024 <b>...</b> David Baker has succeeded with the almost impossible feat of building entirely new kinds of proteins. Demis Hassabis and John Jumper have&nbsp;...', 'formattedUrl': 'https://www.nobelprize.org/prizes/chemistry/2024/press-release/', 'htmlFormattedUrl': 'https://www.<b>nobelprize</b>.org/<b>prize</b>s/<b>chemistry</b>/<b>2024</b>/press-release/', 'pagemap': {'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQON_Hf1_dyrEACSiPsjRoTG2R4pjcDpbg7BAzGC9LovMbRHVei6GShrCM&s', 'width': '302', 'height': '167'}], 'organization': [{'url': 'https://www.nobelprize.org/'}, {'logo': 'Nobel Prize', 'url': 'Nobel Prize'}], 'metatags': [{'og:image': 'https://www.nobelprize.org/uploads/2024/10/popular-chemistryprize2024-carrier-1024x567.jpg', 'og:type': 'website', 'twitter:card': 'summary_large_image', 'twitter:title': 'Nobel Prize in Chemistry 2024', 'og:site_name': 'NobelPrize.org', 'og:title': 'Nobel Prize in Chemistry 2024', 'msapplication-tileimage': 'https://www.nobelprize.org/uploads/2018/08/Nobel-favicon.png', 'og:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'twitter:image': 'https://www.nobelprize.org/uploads/2024/10/popular-chemistryprize2024-carrier-1024x567.jpg', 'viewport': 'width=device-width, initial-scale=1', 'twitter:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'og:locale': 'en_US', 'og:url': 'https://www.nobelprize.org/prizes/chemistry/2024/press-release/'}], 'cse_image': [{'src': 'https://www.nobelprize.org/uploads/2024/10/popular-chemistryprize2024-carrier-1024x567.jpg'}]}}, {'kind': 'customsearch#result', 'title': 'AIP Congratulates 2024 Nobel Prize Winners in Chemistry - AIP.ORG', 'htmlTitle': 'AIP Congratulates <b>2024 Nobel Prize Winners in Chemistry</b> - AIP.ORG', 'link': 'https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry', 'displayLink': 'ww2.aip.org', 'snippet': 'Oct 9, 2024 ... The 2024 Nobel Prize in chemistry was awarded with one half to David Baker β€œfor computational protein design” and the other half jointly to Demis Hassabis and\xa0...', 'htmlSnippet': 'Oct 9, 2024 <b>...</b> The <b>2024 Nobel Prize</b> in <b>chemistry</b> was awarded with one half to David Baker β€œfor computational protein design” and the other half jointly to Demis Hassabis and&nbsp;...', 'formattedUrl': 'https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry', 'htmlFormattedUrl': 'https://ww2.aip.org/aip/<b>2024</b>-<b>nobel</b>-<b>prize</b>-in-<b>chemistry</b>', 'pagemap': {'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTyVGKJaR9X7VE3Y97Z_j8IXIconF0D0zUe88ATVyBuFSXya-CrM1qsveY&s', 'width': '300', 'height': '168'}], 'metatags': [{'og:image': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg', 'og:image:width': '1440', 'og:image:alt': 'Nobel-2024-Chem-i01.jpg', 'og:type': 'article', 'article:published_time': '2024-10-09T12:28:31.16', 'article:section': 'AIP', 'twitter:card': 'summary_large_image', 'og:site_name': 'AIP', 'og:image:url': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg', 'og:image:height': '810', 'og:image:type': 'image/jpeg', 'twitter:image': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg', 'twitter:image:alt': 'Nobel-2024-Chem-i01.jpg', 'fb:app_id': '643005150655973', 'article:modified_time': '2024-10-09T18:23:12.852', 'viewport': 'width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=5', 'brightspot.contentid': '00000192-70e9-da7e-a1fe-fcff80d40000', 'og:url': 'https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry'}], 'cse_image': [{'src': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg'}]}}, {'kind': 'customsearch#result', 'title': 'The Nobel Prize in Chemistry 2024 - NobelPrize.org', 'htmlTitle': 'The <b>Nobel Prize</b> in <b>Chemistry 2024</b> - <b>NobelPrize</b>.org', 'link': 'https://www.nobelprize.org/prizes/chemistry/2024/summary/', 'displayLink': 'www.nobelprize.org', 'snippet': 'David Baker Β· Demis Hassabis Β· John Jumper Β· Nobel Prizes and laureates\xa0...', 'htmlSnippet': 'David Baker &middot; Demis Hassabis &middot; John Jumper &middot; <b>Nobel Prizes</b> and <b>laureates</b>&nbsp;...', 'formattedUrl': 'https://www.nobelprize.org/prizes/chemistry/2024/summary/', 'htmlFormattedUrl': 'https://www.<b>nobelprize</b>.org/<b>prize</b>s/<b>chemistry</b>/<b>2024</b>/summary/', 'pagemap': {'hcard': [{'fn': 'David Baker', 'url': 'https://www.nobelprize.org/prizes/chemistry/2024/baker/facts/'}, {'fn': 'Demis Hassabis', 'url': 'https://www.nobelprize.org/prizes/chemistry/2024/hassabis/facts/'}, {'fn': 'John Jumper', 'url': 'https://www.nobelprize.org/prizes/chemistry/2024/jumper/facts/'}], 'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT4UpOxZHEJhIvhwQsypTDggUJ3pJf_wG3U3jZBDyK8l-Qyqx99fVknWr8Y&s', 'width': '259', 'height': '194'}], 'person': [{'name': 'David Baker', 'description': 'Prize share: 1/2', 'url': 'David Baker'}, {'name': 'Demis Hassabis', 'description': 'Prize share: 1/4', 'url': 'Demis Hassabis'}, {'name': 'John Jumper', 'description': 'Prize share: 1/4', 'url': 'John Jumper'}], 'organization': [{'url': 'https://www.nobelprize.org/'}, {'logo': 'Nobel Prize', 'url': 'Nobel Prize'}], 'metatags': [{'og:image': 'https://www.nobelprize.org/uploads/2024/10/fig_ke_24_4x3-1024x768.jpg', 'og:type': 'website', 'twitter:card': 'summary_large_image', 'twitter:title': 'Nobel Prize in Chemistry 2024', 'og:site_name': 'NobelPrize.org', 'og:title': 'Nobel Prize in Chemistry 2024', 'msapplication-tileimage': 'https://www.nobelprize.org/uploads/2018/08/Nobel-favicon.png', 'og:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'twitter:image': 'https://www.nobelprize.org/uploads/2024/10/fig_ke_24_4x3-1024x768.jpg', 'viewport': 'width=device-width, initial-scale=1', 'twitter:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'og:locale': 'en_US', 'og:url': 'https://www.nobelprize.org/prizes/chemistry/2024/summary/'}], 'cse_image': [{'src': 'https://www.nobelprize.org/uploads/2024/10/fig_ke_24_4x3-1024x768.jpg'}]}}]}
3
+
4
+ Execution Result:
5
+ Search query: nobel prize winners in chemistry 2024
6
+ Number of results: 5
7
+
8
+ Search Results:
9
+
10
+ 1. Title: The Nobel Prize in Chemistry 2024
11
+ URL: https://www.nobelprize.org/prizes/chemistry/
12
+ Snippet: The Nobel Prize in Chemistry 2024 is about proteins, life's ingenious chemical tools. David Baker has succeeded with the almost impossible feat of buildingΒ ...
13
+
14
+ 2. Title: NSF congratulates laureates of the 2024 Nobel Prize in chemistry ...
15
+ URL: https://www.nsf.gov/news/nsf-congratulates-laureates-2024-nobel-prize-chemistry
16
+ Snippet: Oct 9, 2024 ... The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize inΒ ...
17
+
18
+ 3. Title: Press release: The Nobel Prize in Chemistry 2024 - NobelPrize.org
19
+ URL: https://www.nobelprize.org/prizes/chemistry/2024/press-release/
20
+ Snippet: Oct 9, 2024 ... David Baker has succeeded with the almost impossible feat of building entirely new kinds of proteins. Demis Hassabis and John Jumper haveΒ ...
21
+
22
+ 4. Title: AIP Congratulates 2024 Nobel Prize Winners in Chemistry - AIP.ORG
23
+ URL: https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry
24
+ Snippet: Oct 9, 2024 ... The 2024 Nobel Prize in chemistry was awarded with one half to David Baker β€œfor computational protein design” and the other half jointly to Demis Hassabis andΒ ...
25
+
26
+ 5. Title: The Nobel Prize in Chemistry 2024 - NobelPrize.org
27
+ URL: https://www.nobelprize.org/prizes/chemistry/2024/summary/
28
+ Snippet: David Baker Β· Demis Hassabis Β· John Jumper Β· Nobel Prizes and laureatesΒ ...
29
+ Done!
octotools/tools/google_search/tool.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from typing import List, Dict, Any
4
+
5
+ from octotools.tools.base import BaseTool
6
+
7
+ from dotenv import load_dotenv
8
+ load_dotenv()
9
+
10
+ class Google_Search_Tool(BaseTool):
11
+ def __init__(self):
12
+ super().__init__(
13
+ tool_name="Google_Search_Tool",
14
+ tool_description="A tool that performs Google searches based on a given text query.",
15
+ tool_version="1.0.0",
16
+ input_types={
17
+ "query": "str - The search query to be used for the Google search.",
18
+ "num_results": "int - The number of search results to return (default: 10).",
19
+ },
20
+ output_type="list - A list of dictionaries containing search result information.",
21
+ demo_commands=[
22
+ {
23
+ "command": 'execution = tool.execute(query="Python programming")',
24
+ "description": "Perform a Google search for 'Python programming' and return the default number of results."
25
+ },
26
+ {
27
+ "command": 'execution = tool.execute(query="Machine learning tutorials", num_results=5)',
28
+ "description": "Perform a Google search for 'Machine learning tutorials' and return 5 results."
29
+ },
30
+ ],
31
+ )
32
+ # self.api_key = os.getenv("GOOGLE_API_KEY")
33
+ self.api_key = os.getenv("GOOGLE_API_KEY") # NOTE: Replace with your own API key (Ref: https://developers.google.com/custom-search/v1/introduction)
34
+ self.cx = os.getenv("GOOGLE_CX") # NOTE: Replace with your own custom search (Ref: https://programmablesearchengine.google.com/controlpanel/all)
35
+ self.base_url = "https://www.googleapis.com/customsearch/v1"
36
+
37
+ def google_search(self, query: str, num_results: int = 10) -> Dict[str, Any]:
38
+ """
39
+ Performs a Google search using the provided query.
40
+
41
+ Parameters:
42
+ query (str): The search query.
43
+ num_results (int): The number of search results to return.
44
+
45
+ Returns:
46
+ Dict[str, Any]: The raw search results from the Google API.
47
+ """
48
+ params = {
49
+ 'q': query,
50
+ 'key': self.api_key,
51
+ 'cx': self.cx,
52
+ 'num': num_results
53
+ }
54
+
55
+ response = requests.get(self.base_url, params=params)
56
+ return response.json()
57
+
58
+ def execute(self, query: str, num_results: int = 10) -> List[Dict[str, Any]]:
59
+ """
60
+ Executes a Google search based on the provided query.
61
+
62
+ Parameters:
63
+ query (str): The search query.
64
+ num_results (int): The number of search results to return (default: 10).
65
+
66
+ Returns:
67
+ List[Dict[str, Any]]: A list of dictionaries containing search result information.
68
+ """
69
+ if not self.api_key:
70
+ return [{"error": "Google API key is not set. Please set the GOOGLE_API_KEY environment variable."}]
71
+
72
+ try:
73
+ results = self.google_search(query, num_results)
74
+ print(results)
75
+
76
+ if 'items' in results:
77
+ return [
78
+ {
79
+ "title": item['title'],
80
+ "link": item['link'],
81
+ "snippet": item['snippet']
82
+ }
83
+ for item in results['items']
84
+ ]
85
+ else:
86
+ return [{"error": "No results found."}]
87
+ except Exception as e:
88
+ return [{"error": f"An error occurred: {str(e)}"}]
89
+
90
+ def get_metadata(self):
91
+ """
92
+ Returns the metadata for the Google_Search_Tool.
93
+
94
+ Returns:
95
+ dict: A dictionary containing the tool's metadata.
96
+ """
97
+ metadata = super().get_metadata()
98
+ return metadata
99
+
100
+
101
+ if __name__ == "__main__":
102
+ # Test command:
103
+ """
104
+ Run the following commands in the terminal to test the script:
105
+
106
+ export GOOGLE_API_KEY=your_api_key_here
107
+ cd octotools/tools/google_search
108
+ python tool.py
109
+ """
110
+
111
+ # Example usage of the Google_Search_Tool
112
+ tool = Google_Search_Tool()
113
+
114
+ # Get tool metadata
115
+ metadata = tool.get_metadata()
116
+ print(metadata)
117
+
118
+ # Execute the tool to perform a Google search
119
+ query = "nobel prize winners in chemistry 2024"
120
+ try:
121
+ execution = tool.execute(query=query, num_results=5)
122
+ print("\nExecution Result:")
123
+ print(f"Search query: {query}")
124
+ print(f"Number of results: {len(execution)}")
125
+ print("\nSearch Results:")
126
+ if "error" in execution[0]:
127
+ print(f"Error: {execution[0]['error']}")
128
+ else:
129
+ for i, item in enumerate(execution, 1):
130
+ print(f"\n{i}. Title: {item['title']}")
131
+ print(f" URL: {item['link']}")
132
+ print(f" Snippet: {item['snippet']}")
133
+ except Exception as e:
134
+ print(f"Execution failed: {e}")
135
+
136
+ print("Done!")
octotools/tools/image_captioner/__init__.py ADDED
File without changes
octotools/tools/image_captioner/examples/baseball.png ADDED
octotools/tools/image_captioner/test.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ Initializing Image Captioner Tool with model: gpt-4o
3
+ !! Cache enabled for model: gpt-4o
4
+ {'tool_name': 'Image_Captioner_Tool', 'tool_description': "A tool that generates captions for images using OpenAI's multimodal model.", 'tool_version': '1.0.0', 'input_types': {'image': 'str - The path to the image file.', 'prompt': "str - The prompt to guide the image captioning (default: 'Describe this image in detail.')."}, 'output_type': 'str - The generated caption for the image.', 'demo_commands': [{'command': 'execution = tool.execute(image="path/to/image.png")', 'description': 'Generate a caption for an image using the default prompt and model.'}, {'command': 'execution = tool.execute(image="path/to/image.png", prompt="Explain the mood of this scene.")', 'description': 'Generate a caption focusing on the mood using a specific prompt and model.'}], 'require_llm_engine': True, 'user_metadata': {'limitation': 'The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary.'}}
5
+ Generated Caption:
6
+ "The image shows four blue buckets, each containing five baseballs. The buckets are arranged in a grid pattern with three on the top row and one on the bottom left. Each bucket has a handle on the side, and the baseballs inside are white with red stitching, typical of standard baseballs. The background is plain white, emphasizing the buckets and their contents."
7
+ Done!
octotools/tools/image_captioner/tool.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from octotools.tools.base import BaseTool
3
+ from octotools.engine.openai import ChatOpenAI
4
+
5
+ class Image_Captioner_Tool(BaseTool):
6
+ require_llm_engine = True
7
+
8
+ def __init__(self, model_string="gpt-4o-mini"):
9
+ super().__init__(
10
+ tool_name="Image_Captioner_Tool",
11
+ tool_description="A tool that generates captions for images using OpenAI's multimodal model.",
12
+ tool_version="1.0.0",
13
+ input_types={
14
+ "image": "str - The path to the image file.",
15
+ "prompt": "str - The prompt to guide the image captioning (default: 'Describe this image in detail.').",
16
+ },
17
+ output_type="str - The generated caption for the image.",
18
+ demo_commands=[
19
+ {
20
+ "command": 'execution = tool.execute(image="path/to/image.png")',
21
+ "description": "Generate a caption for an image using the default prompt and model."
22
+ },
23
+ {
24
+ "command": 'execution = tool.execute(image="path/to/image.png", prompt="Explain the mood of this scene.")',
25
+ "description": "Generate a caption focusing on the mood using a specific prompt and model."
26
+ }
27
+ ],
28
+ user_metadata = {
29
+ "limitation": "The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary."
30
+ },
31
+ )
32
+ print(f"\nInitializing Image Captioner Tool with model: {model_string}")
33
+ self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
34
+
35
+ def execute(self, image, prompt="Describe this image in detail."):
36
+ try:
37
+ if not self.llm_engine:
38
+ return "Error: LLM engine not initialized. Please provide a valid model_string."
39
+
40
+ input_data = [prompt]
41
+
42
+ if image and os.path.isfile(image):
43
+ try:
44
+ with open(image, 'rb') as file:
45
+ image_bytes = file.read()
46
+ input_data.append(image_bytes)
47
+ except Exception as e:
48
+ return f"Error reading image file: {str(e)}"
49
+ else:
50
+ return "Error: Invalid image file path."
51
+
52
+ caption = self.llm_engine(input_data)
53
+ return caption
54
+ except Exception as e:
55
+ return f"Error generating caption: {str(e)}"
56
+
57
+ def get_metadata(self):
58
+ metadata = super().get_metadata()
59
+ metadata['require_llm_engine'] = self.require_llm_engine # NOTE: can be removed if not needed
60
+ return metadata
61
+
62
+ if __name__ == "__main__":
63
+ # Test command:
64
+ """
65
+ Run the following commands in the terminal to test the script:
66
+
67
+ cd octotools/tools/image_captioner
68
+ python tool.py
69
+ """
70
+
71
+ import json
72
+
73
+ # Get the directory of the current script
74
+ script_dir = os.path.dirname(os.path.abspath(__file__))
75
+
76
+ # Example usage of the Image_Captioner_Tool
77
+ # tool = Image_Captioner_Tool()
78
+ tool = Image_Captioner_Tool(model_string="gpt-4o")
79
+
80
+ # Get tool metadata
81
+ metadata = tool.get_metadata()
82
+ print(metadata)
83
+
84
+ # Construct the full path to the image using the script's directory
85
+ relative_image_path = "examples/baseball.png"
86
+ image_path = os.path.join(script_dir, relative_image_path)
87
+
88
+ # Execute the tool with default prompt
89
+ try:
90
+ execution = tool.execute(image=image_path)
91
+ print("Generated Caption:")
92
+ print(json.dumps(execution, indent=4))
93
+ except Exception as e:
94
+ print(f"Execution failed: {e}")
95
+
96
+ print("Done!")
octotools/tools/nature_news_fetcher/__init__.py ADDED
File without changes
octotools/tools/nature_news_fetcher/test.log ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {'tool_name': 'Nature_News_Fetcher_Tool', 'tool_description': 'A tool that fetches the latest news articles from Nature.', 'tool_version': '1.0.0', 'input_types': {'num_articles': 'int - The number of articles to fetch (default: 100).', 'max_pages': 'int - The maximum number of pages to fetch (default: 5).'}, 'output_type': 'list - A list of dictionaries containing information about the latest Nature news articles.', 'demo_commands': [{'command': 'execution = tool.execute()', 'description': 'Fetch the latest 100 news articles from Nature.'}, {'command': 'execution = tool.execute(num_articles=50, max_pages=3)', 'description': 'Fetch the latest 50 news articles from Nature, searching up to 3 pages.'}], 'require_llm_engine': False}
2
+ [
3
+ {
4
+ "title": "NASA embraced diversity. Trump\u2019s DEI purge is hitting space scientists hard",
5
+ "url": "https://www.nature.com/articles/d41586-025-00480-x",
6
+ "description": "Some researchers at NASA and outside it feel betrayed by the changes at the agency, which was known for promoting inclusion in science.",
7
+ "authors": [
8
+ "Alexandra Witze"
9
+ ],
10
+ "date": "2025-02-14",
11
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00480-x/d41586-025-00480-x_50636314.jpg"
12
+ },
13
+ {
14
+ "title": "Are the Trump team\u2019s actions affecting your research? How to contact Nature",
15
+ "url": "https://www.nature.com/articles/d41586-025-00479-4",
16
+ "description": "Use this form to share information with Nature\u2019s news team, or to make suggestions for future coverage.",
17
+ "authors": [
18
+ "No authors found"
19
+ ],
20
+ "date": "2025-02-13",
21
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00479-4/d41586-025-00479-4_50626000.jpg"
22
+ },
23
+ {
24
+ "title": "Scientists use AI to design life-like enzymes from scratch",
25
+ "url": "https://www.nature.com/articles/d41586-025-00488-3",
26
+ "description": "Combined approach takes AI-engineered enzymes one step closer to practical applications.",
27
+ "authors": [
28
+ "Miryam Naddaf"
29
+ ],
30
+ "date": "2025-02-13",
31
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00488-3/d41586-025-00488-3_50636094.jpg"
32
+ },
33
+ {
34
+ "title": "Royal Society will meet amid campaign to revoke Elon Musk\u2019s fellowship",
35
+ "url": "https://www.nature.com/articles/d41586-025-00486-5",
36
+ "description": "More than 1,300 scientists have signed a letter calling on the world\u2019s oldest science society to reassess the billionaire\u2019s membership following cuts to US science.",
37
+ "authors": [
38
+ "Holly Else"
39
+ ],
40
+ "date": "2025-02-13",
41
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00486-5/d41586-025-00486-5_50635956.jpg"
42
+ },
43
+ {
44
+ "title": "Vaccine sceptic RFK Jr is now a powerful force in US science: what will he do?",
45
+ "url": "https://www.nature.com/articles/d41586-025-00439-y",
46
+ "description": "Kennedy has expressed support for some fields, but has also declared he\u2019d like a \u2018break\u2019 in infectious-disease research.",
47
+ "authors": [
48
+ "Amanda Heidt",
49
+ "Heidi Ledford"
50
+ ],
51
+ "date": "2025-02-13",
52
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00439-y/d41586-025-00439-y_50621992.jpg"
53
+ },
54
+ {
55
+ "title": "Are PhDs losing their lustre? Why fewer students are enrolling in doctoral degrees",
56
+ "url": "https://www.nature.com/articles/d41586-025-00425-4",
57
+ "description": "High living costs paired with stagnant stipends are being blamed for a drop in PhD enrolments in several countries.",
58
+ "authors": [
59
+ "Diana Kwon"
60
+ ],
61
+ "date": "2025-02-13",
62
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00425-4/d41586-025-00425-4_50621644.jpg"
63
+ },
64
+ {
65
+ "title": "Quantum-computing technology that makes qubits from atoms wins mega investment",
66
+ "url": "https://www.nature.com/articles/d41586-025-00451-2",
67
+ "description": "Firms using \u2018neutral atoms\u2019 to create qubits are reentering the race to build useful quantum machines.",
68
+ "authors": [
69
+ "Elizabeth Gibney"
70
+ ],
71
+ "date": "2025-02-13",
72
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00451-2/d41586-025-00451-2_50636260.jpg"
73
+ },
74
+ {
75
+ "title": "Cheap blood test detects pancreatic cancer before it spreads",
76
+ "url": "https://www.nature.com/articles/d41586-025-00438-z",
77
+ "description": "The deadly cancer is often not found until it has spread to other parts of the body.",
78
+ "authors": [
79
+ "Smriti Mallapaty"
80
+ ],
81
+ "date": "2025-02-12",
82
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00438-z/d41586-025-00438-z_50621748.jpg"
83
+ },
84
+ {
85
+ "title": "How centuries of isolation shaped Greenlanders\u2019 unique genetics",
86
+ "url": "https://www.nature.com/articles/d41586-025-00443-2",
87
+ "description": "Centuries of isolation have given Greenlanders a genetic profile that includes Arctic-specific variants.",
88
+ "authors": [
89
+ "Freda Kreier"
90
+ ],
91
+ "date": "2025-02-12",
92
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00443-2/d41586-025-00443-2_50625480.jpg"
93
+ },
94
+ {
95
+ "title": "Record-breaking neutrino is most energetic ever detected",
96
+ "url": "https://www.nature.com/articles/d41586-025-00444-1",
97
+ "description": "Although still under construction, the sea-floor KM3NeT detector spotted a neutrino 20 times more powerful than any previously detected.",
98
+ "authors": [
99
+ "Davide Castelvecchi"
100
+ ],
101
+ "date": "2025-02-12",
102
+ "image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00444-1/d41586-025-00444-1_50625568.jpg"
103
+ }
104
+ ]
105
+
106
+ Execution Result:
107
+ Number of articles fetched: 10
108
+
109
+ Sample articles:
110
+
111
+ 1. Title: NASA embraced diversity. Trump’s DEI purge is hitting space scientists hard
112
+ URL: https://www.nature.com/articles/d41586-025-00480-x
113
+ Description: Some researchers at NASA and outside it feel betrayed by the changes at the agency, which was known ...
114
+ Authors: Alexandra Witze
115
+ Date: 2025-02-14
116
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00480-x/d41586-025-00480-x_50636314.jpg
117
+
118
+ 2. Title: Are the Trump team’s actions affecting your research? How to contact Nature
119
+ URL: https://www.nature.com/articles/d41586-025-00479-4
120
+ Description: Use this form to share information with Nature’s news team, or to make suggestions for future covera...
121
+ Authors: No authors found
122
+ Date: 2025-02-13
123
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00479-4/d41586-025-00479-4_50626000.jpg
124
+
125
+ 3. Title: Scientists use AI to design life-like enzymes from scratch
126
+ URL: https://www.nature.com/articles/d41586-025-00488-3
127
+ Description: Combined approach takes AI-engineered enzymes one step closer to practical applications....
128
+ Authors: Miryam Naddaf
129
+ Date: 2025-02-13
130
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00488-3/d41586-025-00488-3_50636094.jpg
131
+
132
+ 4. Title: Royal Society will meet amid campaign to revoke Elon Musk’s fellowship
133
+ URL: https://www.nature.com/articles/d41586-025-00486-5
134
+ Description: More than 1,300 scientists have signed a letter calling on the world’s oldest science society to rea...
135
+ Authors: Holly Else
136
+ Date: 2025-02-13
137
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00486-5/d41586-025-00486-5_50635956.jpg
138
+
139
+ 5. Title: Vaccine sceptic RFK Jr is now a powerful force in US science: what will he do?
140
+ URL: https://www.nature.com/articles/d41586-025-00439-y
141
+ Description: Kennedy has expressed support for some fields, but has also declared he’d like a β€˜break’ in infectio...
142
+ Authors: Amanda Heidt, Heidi Ledford
143
+ Date: 2025-02-13
144
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00439-y/d41586-025-00439-y_50621992.jpg
145
+
146
+ 6. Title: Are PhDs losing their lustre? Why fewer students are enrolling in doctoral degrees
147
+ URL: https://www.nature.com/articles/d41586-025-00425-4
148
+ Description: High living costs paired with stagnant stipends are being blamed for a drop in PhD enrolments in sev...
149
+ Authors: Diana Kwon
150
+ Date: 2025-02-13
151
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00425-4/d41586-025-00425-4_50621644.jpg
152
+
153
+ 7. Title: Quantum-computing technology that makes qubits from atoms wins mega investment
154
+ URL: https://www.nature.com/articles/d41586-025-00451-2
155
+ Description: Firms using β€˜neutral atoms’ to create qubits are reentering the race to build useful quantum machine...
156
+ Authors: Elizabeth Gibney
157
+ Date: 2025-02-13
158
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00451-2/d41586-025-00451-2_50636260.jpg
159
+
160
+ 8. Title: Cheap blood test detects pancreatic cancer before it spreads
161
+ URL: https://www.nature.com/articles/d41586-025-00438-z
162
+ Description: The deadly cancer is often not found until it has spread to other parts of the body....
163
+ Authors: Smriti Mallapaty
164
+ Date: 2025-02-12
165
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00438-z/d41586-025-00438-z_50621748.jpg
166
+
167
+ 9. Title: How centuries of isolation shaped Greenlanders’ unique genetics
168
+ URL: https://www.nature.com/articles/d41586-025-00443-2
169
+ Description: Centuries of isolation have given Greenlanders a genetic profile that includes Arctic-specific varia...
170
+ Authors: Freda Kreier
171
+ Date: 2025-02-12
172
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00443-2/d41586-025-00443-2_50625480.jpg
173
+
174
+ 10. Title: Record-breaking neutrino is most energetic ever detected
175
+ URL: https://www.nature.com/articles/d41586-025-00444-1
176
+ Description: Although still under construction, the sea-floor KM3NeT detector spotted a neutrino 20 times more po...
177
+ Authors: Davide Castelvecchi
178
+ Date: 2025-02-12
179
+ Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00444-1/d41586-025-00444-1_50625568.jpg
180
+ Done!
octotools/tools/nature_news_fetcher/tool.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import time
5
+
6
+ from octotools.tools.base import BaseTool
7
+
8
+ class Nature_News_Fetcher_Tool(BaseTool):
9
+ def __init__(self):
10
+ super().__init__(
11
+ tool_name="Nature_News_Fetcher_Tool",
12
+ tool_description="A tool that fetches the latest news articles from Nature.",
13
+ tool_version="1.0.0",
14
+ input_types={
15
+ "num_articles": "int - The number of articles to fetch (default: 100).",
16
+ "max_pages": "int - The maximum number of pages to fetch (default: 5).",
17
+ },
18
+ output_type="list - A list of dictionaries containing information about the latest Nature news articles.",
19
+ demo_commands=[
20
+ {
21
+ "command": 'execution = tool.execute()',
22
+ "description": "Fetch the latest 100 news articles from Nature."
23
+ },
24
+ {
25
+ "command": 'execution = tool.execute(num_articles=50, max_pages=3)',
26
+ "description": "Fetch the latest 50 news articles from Nature, searching up to 3 pages."
27
+ },
28
+ ],
29
+ )
30
+ self.base_url = "https://www.nature.com/nature/articles"
31
+
32
+ def fetch_page(self, page_number):
33
+ """
34
+ Fetches a single page of news articles from Nature's website.
35
+
36
+ Parameters:
37
+ page_number (int): The page number to fetch.
38
+
39
+ Returns:
40
+ str: The HTML content of the page.
41
+ """
42
+ params = {
43
+ "searchType": "journalSearch",
44
+ "sort": "PubDate",
45
+ "type": "news",
46
+ "page": str(page_number)
47
+ }
48
+ headers = {
49
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
50
+ }
51
+ response = requests.get(self.base_url, params=params, headers=headers)
52
+ response.raise_for_status()
53
+ return response.text
54
+
55
+ def parse_articles(self, html_content):
56
+ """
57
+ Parses the HTML content and extracts article information.
58
+
59
+ Parameters:
60
+ html_content (str): The HTML content of the page.
61
+
62
+ Returns:
63
+ list: A list of dictionaries containing article information.
64
+ """
65
+ soup = BeautifulSoup(html_content, 'html.parser')
66
+ articles_section = soup.find('section', id='new-article-list')
67
+ if not articles_section:
68
+ return []
69
+
70
+ articles = []
71
+ for article in articles_section.find_all('article', class_='c-card'):
72
+ title_elem = article.find('h3', class_='c-card__title')
73
+ title = title_elem.text.strip() if title_elem else "No title found"
74
+
75
+ url_elem = title_elem.find('a') if title_elem else None
76
+ url = "https://www.nature.com" + url_elem['href'] if url_elem and 'href' in url_elem.attrs else "No URL found"
77
+
78
+ description_elem = article.find('div', {'data-test': 'article-description'})
79
+ description = description_elem.text.strip() if description_elem else "No description available"
80
+
81
+ authors_elem = article.find('ul', {'data-test': 'author-list'})
82
+ authors = [author.text.strip() for author in authors_elem.find_all('li')] if authors_elem else ["No authors found"]
83
+
84
+ date_elem = article.find('time')
85
+ date = date_elem['datetime'] if date_elem and 'datetime' in date_elem.attrs else "No date found"
86
+
87
+ image_elem = article.find('img')
88
+ image_url = image_elem['src'] if image_elem and 'src' in image_elem.attrs else "No image found"
89
+
90
+ articles.append({
91
+ 'title': title,
92
+ 'url': url,
93
+ 'description': description,
94
+ 'authors': authors,
95
+ 'date': date,
96
+ 'image_url': image_url
97
+ })
98
+
99
+ return articles
100
+
101
+ def execute(self, num_articles=100, max_pages=5):
102
+ """
103
+ Fetches the latest news articles from Nature's website.
104
+
105
+ Parameters:
106
+ num_articles (int): The number of articles to fetch.
107
+ max_pages (int): The maximum number of pages to fetch.
108
+
109
+ Returns:
110
+ list: A list of dictionaries containing article information.
111
+ """
112
+ all_articles = []
113
+ page_number = 1
114
+
115
+ try:
116
+ while len(all_articles) < num_articles and page_number <= max_pages:
117
+ html_content = self.fetch_page(page_number)
118
+ page_articles = self.parse_articles(html_content)
119
+
120
+ if not page_articles:
121
+ break # No more articles found
122
+
123
+ all_articles.extend(page_articles)
124
+ page_number += 1
125
+ time.sleep(1) # Be polite to the server
126
+
127
+ return all_articles[:num_articles]
128
+ except Exception as e:
129
+ return [{"error": str(e)}]
130
+
131
+ def get_metadata(self):
132
+ """
133
+ Returns the metadata for the Nature_News_Fetcher_Tool.
134
+
135
+ Returns:
136
+ dict: A dictionary containing the tool's metadata.
137
+ """
138
+ metadata = super().get_metadata()
139
+ return metadata
140
+
141
+
142
+ if __name__ == "__main__":
143
+ # Test command:
144
+ """
145
+ Run the following commands in the terminal to test the script:
146
+
147
+ cd octotools/tools/nature_news_fetcher
148
+ python tool.py
149
+ """
150
+
151
+ # Get the directory of the current script
152
+ script_dir = os.path.dirname(os.path.abspath(__file__))
153
+
154
+ # Example usage of the Nature_News_Fetcher_Tool
155
+ tool = Nature_News_Fetcher_Tool()
156
+
157
+ # Get tool metadata
158
+ metadata = tool.get_metadata()
159
+ print(metadata)
160
+
161
+ import json
162
+
163
+
164
+ # Execute the tool to fetch the latest 10 articles (for demonstration purposes)
165
+ try:
166
+ execution = tool.execute(num_articles=10, max_pages=1)
167
+ print(json.dumps(execution, indent=4))
168
+ print("\nExecution Result:")
169
+ print(f"Number of articles fetched: {len(execution)}")
170
+ print("\nSample articles:")
171
+ for i, article in enumerate(execution[:10], 1):
172
+ print(f"\n{i}. Title: {article['title']}")
173
+ print(f" URL: {article['url']}")
174
+ print(f" Description: {article['description'][:100]}...") # Show first 100 characters
175
+ print(f" Authors: {', '.join(article['authors'])}")
176
+ print(f" Date: {article['date']}")
177
+ print(f" Image URL: {article['image_url']}")
178
+ except Exception as e:
179
+ print(f"Execution failed: {e}")
180
+
181
+ print("Done!")
octotools/tools/object_detector/__init__.py ADDED
File without changes
octotools/tools/object_detector/examples/baseball.png ADDED
octotools/tools/object_detector/test.log ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Could not load the custom kernel for multi-scale deformable attention: Command '['which', 'c++']' returned non-zero exit status 1.
2
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
3
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
4
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
5
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
6
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
7
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
8
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
9
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
10
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
11
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
12
+ Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
13
+ CUDA_HOME is not set
14
+ {'tool_name': 'Object_Detector_Tool', 'tool_description': 'A tool that detects objects in an image using the Grounding DINO model and saves individual object images with empty padding.', 'tool_version': '1.0.0', 'input_types': {'image': 'str - The path to the image file.', 'labels': 'list - A list of object labels to detect.', 'threshold': 'float - The confidence threshold for detection (default: 0.35).', 'model_size': "str - The size of the model to use ('tiny' or 'base', default: 'tiny').", 'padding': 'int - The number of pixels to add as empty padding around detected objects (default: 20).'}, 'output_type': 'list - A list of detected objects with their scores, bounding boxes, and saved image paths.', 'demo_commands': [{'command': 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])', 'description': 'Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths.'}, {'command': 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)', 'description': 'Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths.'}], 'require_llm_engine': False, 'user_metadata': {'limitation': 'The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification.'}}
15
+ Detected Objects:
16
+ Detected baseball with confidence 0.69
17
+ Bounding box: (558, 48, 615, 107)
18
+ Saved image (with padding): detected_objects/baseball_baseball_1.png
19
+
20
+ Detected baseball with confidence 0.69
21
+ Bounding box: (614, 137, 671, 191)
22
+ Saved image (with padding): detected_objects/baseball_baseball_2.png
23
+
24
+ Detected baseball with confidence 0.68
25
+ Bounding box: (132, 67, 189, 126)
26
+ Saved image (with padding): detected_objects/baseball_baseball_3.png
27
+
28
+ Detected baseball with confidence 0.68
29
+ Bounding box: (632, 67, 690, 126)
30
+ Saved image (with padding): detected_objects/baseball_baseball_4.png
31
+
32
+ Detected baseball with confidence 0.68
33
+ Bounding box: (57, 289, 115, 346)
34
+ Saved image (with padding): detected_objects/baseball_baseball_5.png
35
+
36
+ Detected baseball with confidence 0.68
37
+ Bounding box: (535, 111, 592, 170)
38
+ Saved image (with padding): detected_objects/baseball_baseball_6.png
39
+
40
+ Detected baseball with confidence 0.68
41
+ Bounding box: (307, 48, 365, 107)
42
+ Saved image (with padding): detected_objects/baseball_baseball_7.png
43
+
44
+ Detected baseball with confidence 0.68
45
+ Bounding box: (114, 137, 171, 191)
46
+ Saved image (with padding): detected_objects/baseball_baseball_8.png
47
+
48
+ Detected baseball with confidence 0.68
49
+ Bounding box: (35, 351, 91, 410)
50
+ Saved image (with padding): detected_objects/baseball_baseball_9.png
51
+
52
+ Detected baseball with confidence 0.68
53
+ Bounding box: (57, 48, 115, 107)
54
+ Saved image (with padding): detected_objects/baseball_baseball_10.png
55
+
56
+ Detected baseball with confidence 0.68
57
+ Bounding box: (35, 111, 91, 170)
58
+ Saved image (with padding): detected_objects/baseball_baseball_11.png
59
+
60
+ Detected baseball with confidence 0.68
61
+ Bounding box: (364, 137, 421, 191)
62
+ Saved image (with padding): detected_objects/baseball_baseball_12.png
63
+
64
+ Detected baseball with confidence 0.68
65
+ Bounding box: (114, 377, 171, 430)
66
+ Saved image (with padding): detected_objects/baseball_baseball_13.png
67
+
68
+ Detected baseball with confidence 0.67
69
+ Bounding box: (132, 307, 189, 366)
70
+ Saved image (with padding): detected_objects/baseball_baseball_14.png
71
+
72
+ Detected baseball with confidence 0.67
73
+ Bounding box: (285, 111, 342, 170)
74
+ Saved image (with padding): detected_objects/baseball_baseball_15.png
75
+
76
+ Detected baseball with confidence 0.67
77
+ Bounding box: (382, 67, 439, 126)
78
+ Saved image (with padding): detected_objects/baseball_baseball_16.png
79
+
80
+ Detected baseball with confidence 0.65
81
+ Bounding box: (587, 94, 643, 153)
82
+ Saved image (with padding): detected_objects/baseball_baseball_17.png
83
+
84
+ Detected baseball with confidence 0.65
85
+ Bounding box: (86, 94, 143, 153)
86
+ Saved image (with padding): detected_objects/baseball_baseball_18.png
87
+
88
+ Detected baseball with confidence 0.65
89
+ Bounding box: (86, 335, 143, 393)
90
+ Saved image (with padding): detected_objects/baseball_baseball_19.png
91
+
92
+ Detected baseball with confidence 0.63
93
+ Bounding box: (336, 95, 393, 153)
94
+ Saved image (with padding): detected_objects/baseball_baseball_20.png
95
+
96
+ Detected basket with confidence 0.59
97
+ Bounding box: (252, 2, 468, 215)
98
+ Saved image (with padding): detected_objects/baseball_basket_1.png
99
+
100
+ Detected basket with confidence 0.55
101
+ Bounding box: (503, 2, 717, 215)
102
+ Saved image (with padding): detected_objects/baseball_basket_2.png
103
+
104
+ Detected basket with confidence 0.54
105
+ Bounding box: (2, 2, 217, 215)
106
+ Saved image (with padding): detected_objects/baseball_basket_3.png
107
+
108
+ Detected basket with confidence 0.5
109
+ Bounding box: (2, 242, 217, 455)
110
+ Saved image (with padding): detected_objects/baseball_basket_4.png
111
+
112
+ Done!
octotools/tools/object_detector/tool.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Grounding DINO Object Detection Tool
2
+ # https://huggingface.co/IDEA-Research/grounding-dino
3
+
4
+ import os
5
+ import time
6
+ import torch
7
+ from transformers import pipeline
8
+
9
+ from octotools.tools.base import BaseTool
10
+ from PIL import Image, ImageOps
11
+
12
+ import os
13
+ # If CUDA_HOME is set, print the value
14
+ print(os.environ.get('CUDA_HOME', 'CUDA_HOME is not set'))
15
+
16
+ # Suppress stderr by redirecting it to /dev/null
17
+ import sys
18
+ sys.stderr = open(os.devnull, 'w')
19
+
20
+ import warnings
21
+ warnings.filterwarnings("ignore")
22
+
23
+
24
+ class Object_Detector_Tool(BaseTool):
25
+ def __init__(self):
26
+ super().__init__(
27
+ tool_name="Object_Detector_Tool",
28
+ tool_description="A tool that detects objects in an image using the Grounding DINO model and saves individual object images with empty padding.",
29
+ tool_version="1.0.0",
30
+ input_types={
31
+ "image": "str - The path to the image file.",
32
+ "labels": "list - A list of object labels to detect.",
33
+ "threshold": "float - The confidence threshold for detection (default: 0.35).",
34
+ "model_size": "str - The size of the model to use ('tiny' or 'base', default: 'tiny').",
35
+ "padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
36
+ },
37
+ output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
38
+ demo_commands=[
39
+ {
40
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
41
+ "description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
42
+ },
43
+ {
44
+ "command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
45
+ "description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
46
+ }
47
+ ],
48
+ user_metadata={
49
+ "limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
50
+ }
51
+ )
52
+
53
+ def preprocess_caption(self, caption):
54
+ result = caption.lower().strip()
55
+ if result.endswith("."):
56
+ return result
57
+ return result + "."
58
+
59
+ def build_tool(self, model_size='tiny'):
60
+ model_name = f"IDEA-Research/grounding-dino-{model_size}"
61
+ device = "cuda" if torch.cuda.is_available() else "cpu"
62
+ try:
63
+ pipe = pipeline(model=model_name, task="zero-shot-object-detection", device=device)
64
+ return pipe
65
+ except Exception as e:
66
+ print(f"Error building the Object Detection tool: {e}")
67
+ return None
68
+
69
+ def save_detected_object(self, image, box, image_name, label, index, padding):
70
+ object_image = image.crop(box)
71
+ padded_image = ImageOps.expand(object_image, border=padding, fill='white')
72
+
73
+ filename = f"{image_name}_{label}_{index}.png"
74
+ os.makedirs(self.output_dir, exist_ok=True)
75
+ save_path = os.path.join(self.output_dir, filename)
76
+
77
+ padded_image.save(save_path)
78
+ return save_path
79
+
80
+ def execute(self, image, labels, threshold=0.35, model_size='tiny', padding=20, max_retries=10, retry_delay=5, clear_cuda_cache=False):
81
+ for attempt in range(max_retries):
82
+ try:
83
+ saved_files = []
84
+
85
+ pipe = self.build_tool(model_size)
86
+ if pipe is None:
87
+ raise ValueError("Failed to build the Object Detection tool.")
88
+
89
+ preprocessed_labels = [self.preprocess_caption(label) for label in labels]
90
+ results = pipe(image, candidate_labels=preprocessed_labels, threshold=threshold)
91
+
92
+ formatted_results = []
93
+ original_image = Image.open(image)
94
+ image_name = os.path.splitext(os.path.basename(image))[0]
95
+
96
+ object_counts = {}
97
+
98
+ for result in results:
99
+ box = tuple(result["box"].values())
100
+ label = result["label"]
101
+ score = round(result["score"], 2)
102
+ if label.endswith("."):
103
+ label = label[:-1]
104
+
105
+ object_counts[label] = object_counts.get(label, 0) + 1
106
+ index = object_counts[label]
107
+
108
+ save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
109
+
110
+ formatted_results.append({
111
+ "label": label,
112
+ "confidence score": score,
113
+ "box": box,
114
+ "saved_image_path": save_path
115
+ })
116
+
117
+ return formatted_results
118
+
119
+ except RuntimeError as e:
120
+ if "CUDA out of memory" in str(e):
121
+ print(f"CUDA out of memory error on attempt {attempt + 1}.")
122
+ if clear_cuda_cache:
123
+ print("Clearing CUDA cache and retrying...")
124
+ torch.cuda.empty_cache()
125
+ else:
126
+ print(f"Retrying in {retry_delay} seconds...")
127
+ time.sleep(retry_delay)
128
+ continue
129
+ else:
130
+ print(f"Runtime error: {e}")
131
+ break
132
+ except Exception as e:
133
+ print(f"Error detecting objects: {e}")
134
+ break
135
+
136
+ print(f"Failed to detect objects after {max_retries} attempts.")
137
+ return []
138
+
139
+ def get_metadata(self):
140
+ metadata = super().get_metadata()
141
+ return metadata
142
+
143
+ if __name__ == "__main__":
144
+ # Test command:
145
+ """
146
+ Run the following commands in the terminal to test the script:
147
+
148
+ cd octotools/tools/object_detector
149
+ python tool.py
150
+ """
151
+
152
+ # Get the directory of the current script
153
+ script_dir = os.path.dirname(os.path.abspath(__file__))
154
+
155
+ # Example usage of the Object_Detector_Tool
156
+ tool = Object_Detector_Tool()
157
+ tool.set_custom_output_dir("detected_objects")
158
+
159
+ # Get tool metadata
160
+ metadata = tool.get_metadata()
161
+ print(metadata)
162
+
163
+ # Construct the full path to the image using the script's directory
164
+ relative_image_path = "examples/baseball.png"
165
+ image_path = os.path.join(script_dir, relative_image_path)
166
+
167
+ # Execute the tool
168
+ try:
169
+ execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
170
+ print("Detected Objects:")
171
+ for obj in execution:
172
+ print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
173
+ print(f"Bounding box: {obj['box']}")
174
+ print(f"Saved image (with padding): {obj['saved_image_path']}")
175
+ print()
176
+ except ValueError as e:
177
+ print(f"Execution failed: {e}")
178
+
179
+ print("Done!")
octotools/tools/pubmed_search/__init__.py ADDED
File without changes
octotools/tools/pubmed_search/test.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 2025-02-14 19:03:45 06a0c5b0e177 metapub.config[158535] WARNING NCBI_API_KEY was not set.
2
+ [{'title': 'Safety of health workers during the COVID-19 pandemic and beyond: piloting WHO framework in Iran.', 'abstract': "BACKGROUND: Health Workers Safety (HWS) is a global health priority and essential at all times, in stable situations, in emergencies, in disease epidemics or pandemics. This study aimed to assess HWS during the COVID-19 Pandemic.\nMETHODS: This cross-sectional study was conducted in 2022 in east Azerbaijan province, Iran. HWS was assessed based on 22 indicators suggested by WHO EMRO. We selected 15 PHC facilities and six wards from two hospitals randomly. Data collected (qualitative and quantitative) using national digital health records, staff records, and indicator-specific tools. In addition to measuring the indicator's value, the indicators' feasibility was also assessed. Descriptive and inferential statistics with SPSS-16 were used for data analysis.\nRESULTS: Totally, 325 Health Workers (HWs) (218 from PHC facilities and 107 from hospitals) participated in the study. Most of the participants in PHC facilities and hospitals were Community Health Workers (CHWs) (Moragheb Salamat) (45.4%) and nurses (37.38%), respectively. Most of HWs had completed the full vaccination schedule for Hepatitis B and COVID-19. Personal Protective Equipment (PPE) safety protocols were adhered by most of HWs within a healthcare facility. None of managers had attended nationally certified training for mental health support for health and care workers. Less than 20% of HWs participated in the work burnout prevention courses and most of HWs complained about work overload, or burnout. The job satisfaction level of hospital HWs (60.20%) was significantly higher than that of HWs from PHC facilities (57.18%) (P\u2009<\u20090.001).\nCONCLUSION: Even though the mental health of HWs was not as expected, the indicators related to physical health and occupational health were at a suitable level. Also, there is not a system in PHC to audit the application of safety measures to mitigate the risk of contracting COVID-19. We recommend creating a specific system (precise and detailed) for HWs' safety and applying safety measures in the PHC routine programs.", 'keywords': ['Assessment', 'COVID-19 pandemic', 'Health worker', 'Iran', 'Safety'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39920792'}, {'title': 'Experiences of working as a clinical nurse while pregnant during the coronavirus disease-2019 pandemic: a qualitative study.', 'abstract': 'BACKGROUND: Working as a pregnant clinical nurse might experience a range of challenges, such as significant anatomical and physiological changes as well as emotional and cognitive changes. That might be particularly obvious under the historical background of coronavirus disease-2019 (COVID-19) pandemic. However, a dearth of studies has explored the experiences of working as a pregnant nurse during this special period. This study aimed to explore the experiences of working as a clinical nurse while pregnant during the COVID-19 pandemic.\nMETHODS: A descriptive qualitative design was selected. Purposive sampling, combined with maximum variation strategy and snowball sampling, were utilized to identify and select participants from tertiary-teaching hospitals, specialized hospitals, and community hospitals in Zhejiang Province, southeastern China. Online semi-structured individual interviews were used to collect data, and conventional content analysis was used to analyze the data.\nRESULTS: Eleven Chinese nurses with a mean age of 31.8 years, ranging from 26 to 40 years, participated in this study. Four themes and twelve subthemes emerged: (1) still adhering to work as a clinical nurse despite being pregnant during the pandemic; (2) working during pregnancy under pandemic is still an ordinary nurse; (3) still staying in the special life phase as a pregnant mother; and (4) growth and gains as pregnant mother.\nCONCLUSION: The pregnant clinical nurses suffered from various changes and difficulties during the pandemic. Managers, occupational health and other health system leaders, and policymakers should be aware of the importance of establishing a work environment that guarantees safe continued pregnancy. Future studies should focus on the establishment of specific guidelines and manuals regarding how pregnant nurses worked, as well as the development of self-protection interventions during pregnancy. Moreover, research on moral stigma and bullying in nursing during pregnancy deserves further exploration.\nCLINICAL TRIAL NUMBER: Not applicable.', 'keywords': ['COVID-19 pandemic', 'Experiences', 'Nurse', 'Pregnant', 'Qualitative research'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39901239'}, {'title': "Development and psychometric validation of the frontline health workers' occupational risk and characteristics in emergencies index (FORCE-index) - The covid Hospital cohort study.", 'abstract': "OBJECTIVES: A lack of tools for the systematic identification of frontline health workers' changing occupational risks, characteristics, and needs, poses a major barrier to supporting vital personnel to stay in practice through health emergencies and beyond. The current study reports on the development and psychometric evaluation of the Frontline health workers' Occupational Risk and Characteristics in Emergencies index (FORCE-index).\nSTUDY DESIGN: The Covid hospital study is a large, multisite, four-wave, open cohort study of frontline health workers responding to the first four waves of the COVID-19 pandemic (2020-2022).\nMETHODS: 2496 frontline health workers responded to questionnaires assessing various aspects of their work environment. Using exploratory factor analysis, we estimated the latent structure of the FORCE-index at the first and second waves. This structure was evaluated using confirmatory factor analysis at the third and fourth waves. The internal consistency of the instrument's subscales (e.g., factors) was evaluated using omega reliability, Cronbach's alpha coefficient, and mean inter-item correlation.\nRESULTS: A nine-factor solution provided best fit to the data. These factors mapped onto the following aspects of the work environment; competency, stress management, familiarity, workload manageability, work performance, infection safety, personal protective equipment, social safety, and social support. Internal consistency for the full FORCE-index and the nine factors was satisfactory.\nCONCLUSIONS: The initial psychometric validation indicates that the FORCE-index is a valid measure which can be used by health authorities, services, and institutions to adequately and systematically assess central aspects of frontline health workers' work environment that are commonly challenged in health emergencies.", 'keywords': ['Covid-19', 'Frontline', 'Health Personnel', 'Health care worker', 'Health emergency', 'Health response', 'Hospital', 'Index', 'Infectious outbreak', 'Job characteristics', 'Occupational Health', 'Occupational diseases', 'Occupational exposure', 'Occupational stress', 'Preparedness', 'Preparedness planning', 'Psychometric', 'Scale', 'Stress', 'Work environment'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39896339'}, {'title': "A descriptive analysis of nurses' self-reported mental health symptoms during the COVID-19 pandemic: An international study.", 'abstract': "AIM: To describe the self-reported mental health of nurses from 35 countries who worked during the COVID-19 pandemic.\nBACKGROUND: There is little occupationally specific data about nurses' mental health worldwide. Studies have documented the impact on nurses' mental health of the COVID-19 pandemic, but few have baseline referents.\nMETHODS: A descriptive, cross-sectional design structured the study. Data reflect a convenience sample of\xa09,387 participants who completed the opt-in survey between July 31, 2022, and October 31, 2023. Descriptive statistics were run to analyze the following variables associated with mental health: Self-reports of mental health symptoms, burnout, personal losses during the pandemic, access to mental health services, and self-care practices used to cope with pandemic-related stressors. Reporting of this study was steered by the STROBE guideline for quantitative studies.\nRESULTS: Anxiety or depression occurred at rates ranging from 23%-61%, with country-specific trends in reporting observed. Approximately 18% of the sample reported experiencing some symptoms of burnout. The majority of nurses' employers did not provide mental health support in the workplace. Most reported more frequently engaging with self-care practices compared with before the pandemic. Notably, 20% of nurses suffered the loss of a family member, 35% lost a friend, and 34% a coworker due to COVID-19. Nearly half (48%) reported experiencing public aggression due to their identity as a nurse.\nCONCLUSIONS: The data obtained establish a basis for understanding the specific mental health needs of the nursing workforce globally, highlighting key areas for service development.\nIMPLICATIONS FOR NURSING POLICY: Healthcare organizations and governmental bodies need to develop targeted mental health support programs that are readily accessible to nurses to foster a resilient nursing workforce.", 'keywords': ['COVID‐19', 'global health', 'health workforce', 'nursing', 'nursing shortage', 'occupational health', 'occupational health nursing', 'pandemics'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39871528'}, {'title': 'Work Psychology and Occupational Health: An Editorial.', 'abstract': "Globally, the COVID-19 pandemic has severely impacted workers' health, particularly their mental well-being [...].", 'keywords': [], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39857553'}, {'title': 'Analysis of HFE impact of COVID-19 on OHS in construction enterprises.', 'abstract': "Human factors are critical to Occupational Health and Safety (OHS) in construction enterprises. However, comprehensive industry-wide recognition remains challenging, underscoring the need for Human Factors Engineering (HFE) research. This study develops an optimized HFE evaluation model based on fundamental HFE principles. Examining COVID-19's significant impact on construction enterprise OHS, this research employs an empirical investigation of 259 cases, utilizing a model that integrates NetLogo's System Dynamics (SD) and Multiple Linear Regression (MLR) to analyze the interactions between human factors and other variables. The findings reveal four key factors influencing human factors: management, material, environmental, and methodological. These factors demonstrate a quadratic parabolic relationship, with peak influence occurring at step 36 of the research period. Twelve of the 20 survey factors exhibit a linear regression relationship with human factors' four sub-factors, with pre-job training (Q<sub>9</sub>) demonstrating multiple influential interactions. The strongest correlation is between pre-job training (Q<sub>9</sub>) and living materials (Q<sub>14</sub>), with a weight coefficient of .325. Psychological counseling (Q<sub>8</sub>) and living materials (Q<sub>14</sub>) show a close relationship (weight coefficient .301). Notably, Q<sub>9</sub> and empirical prevention materials (Q<sub>11</sub>) display a negative correlation with a weight coefficient of -.156. This study's practical significance lies in enabling enterprises to identify key HFE control factors and understand critical sub-factors for mitigating COVID-19's adverse impacts.", 'keywords': ['COVID-19', 'Human factors engineering (HFE)', 'Multiple linear regression (MLR)', 'NetLogo', 'Occupational health and safety (OHS)', 'System dynamics (SD)'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39811363'}, {'title': 'COVID-19 workplace countermeasures that occupational physicians could not change in Japan: a qualitative study.', 'abstract': 'BACKGROUND: During the COVID-19 pandemic, information and circumstances changed from moment to moment, including the accumulation of scientific knowledge, the emergence of variants, social tolerance, and government policy. Therefore, it was important to adapt workplace countermeasures punctually and flexibly based on scientific evidence and according to circumstances. However, there has been no assessment of changes in workplace countermeasures. With a view toward preparedness for future pandemics, we surveyed COVID-19 workplace countermeasures that occupational physicians considered as needing to be changed but went unchanged.\nMETHODS: We invited 685 professional occupational physicians certified by Japan Society for Occupational Health to complete an online questionnaire by sending postcards with QR codes. The main questions concerned countermeasures that the participants wanted to change but could not. The survey period was from February 21 to April 28, 2022. The responses were analyzed using the KJ method.\nRESULTS: Of the 168 invitees (24.5%) who responded to the survey, 125 reported countermeasures that needed to be changed (total count: 254). The responses were categorized into basic systems, occupational health services, workplace countermeasures, vaccines, and incidents, with a code count of 7, 8,147, 10, and 82, respectively. The type of countermeasure was 115 for countermeasures to be strengthened (CBS), 110 for measures to be mitigated (CBM), and 29 for neither.\nCONCLUSIONS: Often-mentioned CBS were increased teleworking, strengthened ventilation, smoking cessation, and promotion of vaccines. Often-mentioned CBM were relaxation of protective equipment rules, discontinued environmental disinfection, and shorted isolation and reinstatement. In the early pandemic phases, CBSs were frequently mentioned, whereas CBMs were featured more prominently in the latter phases. The survey revealed countermeasures that occupational physicians thought needed to be changed but were not changed in practice. For future responses to emerging and reemerging infectious diseases, it will be necessary to establish rules compatible with flexible modification of workplace countermeasures in response to changing circumstances.', 'keywords': ['COVID-19', 'Japan', 'Occupational health', 'Occupational physician', 'Workers', 'Workplace', 'Workplace countermeasures'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39780108'}, {'title': 'Rapid COVID-19 Testing of Symptomatic Health Care Personnel: A Strategy for Safely Maintaining the Workforce.', 'abstract': 'Determine performance characteristics and safety outcomes of two rapid COVID-19 screening methods to inform immediate return to work (RTW) decisions while (health care personnel) HCP await results of pending confirmatory laboratory test. Retrospective, occupational health quality improvement study comparing screening with rapid SARS-CoV-2 nucleic acid amplification (NAAT) and antigen test. 531 mildly symptomatic HCP screened over 16 months. Until more accurate affordable NAAT tests become available, antigen test screening alone addresses simultaneous needs to minimize COVID-19 transmission from symptomatic HCP and maintain an adequate workforce.', 'keywords': [], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39739739'}, {'title': 'Satisfaction and Workload as Predictors of Psychological Distress in Professionals of Psychosocial Care Centers During the COVID-19 Pandemic.', 'abstract': 'BACKGROUND AND AIMS: The COVID-19 pandemic significantly impacted the mental health of healthcare professionals, especially those working in Psychosocial Care Centers (CAPS), which are crucial services in the Brazilian mental health system. This study aimed to investigate the association between job satisfaction, workload, and psychological distress among CAPS professionals during the pandemic.\nMETHODS: A cross-sectional study was conducted with 53 professionals from seven CAPS. The Workload Impact Scale (IMPACTO-BR) and Job Satisfaction Scale (SATIS-BR), the General Health Questionnaire (GHQ-12), and a sociodemographic questionnaire were used. Descriptive and analytical statistical analyses were performed. Multiple linear regression analysis was conducted to examine the relationship between job satisfaction, workload, and psychological distress.\nRESULTS: Professionals reported moderate satisfaction (3.67 Β± 0.45) and mild workload (1.82 Β± 0.63). One-third of the sample showed scores indicative of psychological distress. Multiple linear regression analysis revealed that workload (p = 0.0025) and low job satisfaction (p = 0.0495) were significantly associated with psychological distress.\nCONCLUSIONS: Low job satisfaction and high professional workload were predictive variables of psychological distress. These findings highlight the need for investments in promoting the quality of life at work for mental health professionals, especially during crises. The implications for human resource management and public policy development emphasize the importance of an integrated approach that considers the well-being of professionals for the effectiveness and sustainability of the psychosocial care model.', 'keywords': ['COVID-19', 'health personnel', 'job satisfaction', 'mental health', 'mental health services', 'occupational health'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39728651'}, {'title': 'Occupational-related risk of testing SARS-CoV-2 positive for publicly employed medical doctors in Sweden: A nationwide cohort study.', 'abstract': 'AIMS: Doctors have an increased risk of SARS-CoV-2 infection caused by exposure to contagious patients. We aimed to identify which clinical specialities among medical doctors had the highest occupation-related risk of testing positive for SARS-CoV-2, utilizing data for all publicly employed medical doctors in Sweden.\nMETHODS: Data regarding positive SARS-CoV-2 test results and employment for publicly employed doctors in Sweden were divided into three observation periods: 1) 1 February to 31 December 2020, 2) 1 January to 30 June 2021 and 3) 1 July 2021 to 31 March 2022. Individuals were stratified according to occupation clinic and compared with clinical occupations with little to no patient contact. The risk of testing positive for SARS-CoV-2 was estimated using Cox proportional hazards regression, with sex, age and vaccination status as covariates.\nRESULTS: The study cohort included all publicly employed doctors in Sweden: 35,028 individuals. In the first period, Infectious Disease doctors had the highest incidence of SARS-CoV-2 positive tests, with an incidence of 20.2 %, compared with 8.7 % in the reference group, and an adjusted hazard ratio of 2.5 (95% confidence interval 2.02-3.04), which decreased during period 2-3. Doctors in Geriatric Medicine had an elevated risk throughout the whole study period.\nCONCLUSIONS: Our study shows an association between working in a speciality that involves caring for contagious COVID-19 patients, which raises concerns about infection control measures and routines being insufficient to prevent occupational infection in future pandemics.', 'keywords': ['COVID-19', 'Occupational health', 'SARS-CoV-2', 'healthcare workers', 'medical doctors', 'risk factors'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39726065'}]
3
+ Done!
octotools/tools/pubmed_search/tool.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from pymed import PubMed
4
+ from metapub import PubMedFetcher
5
+ from octotools.tools.base import BaseTool
6
+ from tenacity import (
7
+ retry,
8
+ stop_after_attempt,
9
+ wait_random_exponential,
10
+ )
11
+
12
+ # Suppress stderr by redirecting it to /dev/null
13
+ import sys
14
+ sys.stderr = open(os.devnull, 'w')
15
+
16
+ import warnings
17
+ warnings.filterwarnings("ignore")
18
+
19
+
20
+ class Pubmed_Search_Tool(BaseTool):
21
+ def __init__(self):
22
+ super().__init__(
23
+ tool_name="Pubmed_Search_Tool",
24
+ tool_description="A tool that searches PubMed Central to retrieve relevant article abstracts based on a given list of text queries. Use this ONLY if you cannot use the other more specific ontology tools.",
25
+ tool_version="1.0.0",
26
+ input_types={
27
+ "queries": "list[str] - list of queries terms for searching PubMed."
28
+ },
29
+ output_type="list - List of items matching the search query. Each item consists of the title, abstract, keywords, and URL of the article. If no results found, a string message is returned.",
30
+ demo_commands=[
31
+ {
32
+ "command": 'execution = tool.execute(queries=["scoliosis", "injury"])',
33
+ "description": "Search for PubMed articles mentioning 'scoliosis' OR 'injury'."
34
+ },
35
+ {
36
+ "command": 'execution = tool.execute(queries=["COVID", "vaccine", "occupational health"])',
37
+ "description": "Search for PubMed articles mentioning 'COVID' OR 'vaccine' OR 'occupational health'."
38
+ }
39
+ ],
40
+ user_metadata={
41
+ 'limitations': "Try to use shorter and more general search queries."
42
+ }
43
+ )
44
+ self.pubmed = PubMed(tool="MyTool", email="[email protected]")
45
+ self.fetch = PubMedFetcher()
46
+
47
+ @retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(3))
48
+ def search_query(self, query_str, max_results=10):
49
+ return self.pubmed.query(query_str, max_results=max_results)
50
+
51
+ def execute(self, queries, max_results=10):
52
+ try:
53
+ query_str = f"({'[Title/Abstract] OR '.join(queries) + '[Title/Abstract]'}) AND hasabstract[All Fields] AND fha[Filter]"
54
+ max_results = min(max_results, 50)
55
+
56
+ results = self.search_query(query_str, max_results=max_results) # API can only get most recent
57
+
58
+ items = []
59
+ for article in results:
60
+ try:
61
+ article = json.loads(article.toJSON())
62
+ pubmed_id = article['pubmed_id'] # get id using pymed then get content using metapub
63
+
64
+ article = self.fetch.article_by_pmid(pubmed_id)
65
+ items.append({
66
+ 'title': article.title,
67
+ 'abstract': article.abstract,
68
+ 'keywords': article.keywords,
69
+ 'url': article.url
70
+ })
71
+ except:
72
+ continue
73
+
74
+ if len(items) == 0:
75
+ return "No results found for search query. Try another query or tool."
76
+
77
+ return items
78
+
79
+ except Exception as e:
80
+ print(f"Error searching PubMed: {e}")
81
+ return []
82
+
83
+ def get_metadata(self):
84
+ metadata = super().get_metadata()
85
+ return metadata
86
+
87
+ if __name__ == "__main__":
88
+ # Test command:
89
+ """
90
+ Run the following commands in the terminal to test the script:
91
+
92
+ cd octotools/tools/pubmed_search
93
+ python tool.py
94
+ """
95
+
96
+ # Get the directory of the current script
97
+ script_dir = os.path.dirname(os.path.abspath(__file__))
98
+
99
+ # Example usage
100
+ tool = Pubmed_Search_Tool()
101
+
102
+ # Queries
103
+ queries = ["COVID occupational health"]
104
+
105
+ # Execute the tool
106
+ try:
107
+ execution = tool.execute(queries=queries)
108
+ print(execution)
109
+ except ValueError as e:
110
+ print(f"Execution failed: {e}")
111
+
112
+ print("Done!")
octotools/tools/python_code_generator/__init__.py ADDED
File without changes
octotools/tools/python_code_generator/test.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Initializing Python_Code_Generator_Tool with model_string: gpt-4o-mini
3
+ !! Cache enabled for model: gpt-4o-mini
4
+
5
+ Initializing Python_Code_Generator_Tool with model_string: gpt-4o-mini
6
+ !! Cache enabled for model: gpt-4o-mini
7
+ {'tool_name': 'Python_Code_Generator_Tool', 'tool_description': 'A tool that generates and executes simple Python code snippets for basic arithmetical calculations and math-related problems. The generated code runs in a highly restricted environment with only basic mathematical operations available.', 'tool_version': '1.0.0', 'input_types': {'query': 'str - A clear, specific description of the arithmetic calculation or math problem to be solved, including any necessary numerical inputs.'}, 'output_type': 'dict - A dictionary containing the generated code, calculation result, and any error messages.', 'demo_commands': [{'command': 'execution = tool.execute(query="Calculate the factorial of 5")', 'description': 'Generate a Python code snippet to calculate the factorial of 5.'}, {'command': 'execution = tool.execute(query="Find the sum of prime numbers up to 50")', 'description': 'Generate a Python code snippet to find the sum of prime numbers up to 50.'}, {'command': 'query="Given the list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], calculate the sum of squares of odd numbers"\nexecution = tool.execute(query=query)', 'description': 'Generate a Python function for a specific mathematical operation on a given list of numbers.'}], 'require_llm_engine': True, 'user_metadata': {'limitations': ['Restricted to basic Python arithmetic operations and built-in mathematical functions.', 'Cannot use any external libraries or modules, including those in the Python standard library.', 'Limited to simple mathematical calculations and problems.', 'Cannot perform any string processing, data structure manipulation, or complex algorithms.', 'No access to any system resources, file operations, or network requests.', "Cannot use 'import' statements.", 'All calculations must be self-contained within a single function or script.', 'Input must be provided directly in the query string.', 'Output is limited to numerical results or simple lists/tuples of numbers.'], 'best_practices': ['Provide clear and specific queries that describe the desired mathematical calculation.', 'Include all necessary numerical inputs directly in the query string.', 'Keep tasks focused on basic arithmetic, algebraic calculations, or simple mathematical algorithms.', 'Ensure all required numerical data is included in the query.', 'Verify that the query only involves mathematical operations and does not require any data processing or complex algorithms.', 'Review generated code to ensure it only uses basic Python arithmetic operations and built-in math functions.']}}
8
+
9
+ ###Query: Given the number list: [1, 2, 3, 4, 5], calculate the sum of all the numbers in the list.
10
+ The sum of all the numbers in the list [1, 2, 3, 4, 5] is: 15
11
+
12
+ ###Execution Result: {'printed_output': 'The sum of all the numbers in the list [1, 2, 3, 4, 5] is: 15', 'variables': {'numbers': [1, 2, 3, 4, 5], 'total_sum': 15}}
13
+ Done!
octotools/tools/python_code_generator/tool.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # octotools/tools/python_code_generator/tool.py
2
+
3
+ import os
4
+ import re
5
+ import sys
6
+ from io import StringIO
7
+ import contextlib
8
+
9
+
10
+ from octotools.tools.base import BaseTool
11
+ from octotools.engine.openai import ChatOpenAI
12
+
13
+ import signal
14
+ from contextlib import contextmanager
15
+
16
+ # Custom exception for code execution timeout
17
+ class TimeoutException(Exception):
18
+ pass
19
+
20
+ # Custom context manager for code execution timeout
21
+ @contextmanager
22
+ def timeout(seconds):
23
+ def timeout_handler(signum, frame):
24
+ raise TimeoutException("Code execution timed out")
25
+
26
+ # Set the timeout handler
27
+ original_handler = signal.signal(signal.SIGALRM, timeout_handler)
28
+ signal.alarm(seconds)
29
+
30
+ try:
31
+ yield
32
+ finally:
33
+ # Restore the original handler and disable the alarm
34
+ signal.alarm(0)
35
+ signal.signal(signal.SIGALRM, original_handler)
36
+
37
+
38
+ class Python_Code_Generator_Tool(BaseTool):
39
+ require_llm_engine = True
40
+
41
+ def __init__(self, model_string="gpt-4o-mini"):
42
+ super().__init__(
43
+ tool_name="Python_Code_Generator_Tool",
44
+ tool_description="A tool that generates and executes simple Python code snippets for basic arithmetical calculations and math-related problems. The generated code runs in a highly restricted environment with only basic mathematical operations available.",
45
+ tool_version="1.0.0",
46
+ input_types={
47
+ "query": "str - A clear, specific description of the arithmetic calculation or math problem to be solved, including any necessary numerical inputs."},
48
+ output_type="dict - A dictionary containing the generated code, calculation result, and any error messages.",
49
+ demo_commands=[
50
+ {
51
+ "command": 'execution = tool.execute(query="Calculate the factorial of 5")',
52
+ "description": "Generate a Python code snippet to calculate the factorial of 5."
53
+ },
54
+ {
55
+ "command": 'execution = tool.execute(query="Find the sum of prime numbers up to 50")',
56
+ "description": "Generate a Python code snippet to find the sum of prime numbers up to 50."
57
+ },
58
+ {
59
+ "command": 'query="Given the list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], calculate the sum of squares of odd numbers"\nexecution = tool.execute(query=query)',
60
+ "description": "Generate a Python function for a specific mathematical operation on a given list of numbers."
61
+ },
62
+ ],
63
+ user_metadata = {
64
+ "limitations": [
65
+ "Restricted to basic Python arithmetic operations and built-in mathematical functions.",
66
+ "Cannot use any external libraries or modules, including those in the Python standard library.",
67
+ "Limited to simple mathematical calculations and problems.",
68
+ "Cannot perform any string processing, data structure manipulation, or complex algorithms.",
69
+ "No access to any system resources, file operations, or network requests.",
70
+ "Cannot use 'import' statements.",
71
+ "All calculations must be self-contained within a single function or script.",
72
+ "Input must be provided directly in the query string.",
73
+ "Output is limited to numerical results or simple lists/tuples of numbers."
74
+ ],
75
+ "best_practices": [
76
+ "Provide clear and specific queries that describe the desired mathematical calculation.",
77
+ "Include all necessary numerical inputs directly in the query string.",
78
+ "Keep tasks focused on basic arithmetic, algebraic calculations, or simple mathematical algorithms.",
79
+ "Ensure all required numerical data is included in the query.",
80
+ "Verify that the query only involves mathematical operations and does not require any data processing or complex algorithms.",
81
+ "Review generated code to ensure it only uses basic Python arithmetic operations and built-in math functions."
82
+ ]
83
+ }
84
+ )
85
+ print(f"\nInitializing Python_Code_Generator_Tool with model_string: {model_string}")
86
+ self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=False) if model_string else None
87
+
88
+ @staticmethod
89
+ def preprocess_code(code):
90
+ """
91
+ Preprocesses the generated code snippet by extracting it from the response.
92
+
93
+ Parameters:
94
+ code (str): The response containing the code snippet.
95
+
96
+ Returns:
97
+ str: The extracted code snippet.
98
+ """
99
+ code = re.search(r"```python(.*)```", code, re.DOTALL).group(1).strip()
100
+ return code
101
+
102
+ @contextlib.contextmanager
103
+ def capture_output(self):
104
+ """
105
+ Context manager to capture the standard output.
106
+
107
+ Yields:
108
+ StringIO: The captured output.
109
+ """
110
+ new_out = StringIO()
111
+ old_out = sys.stdout
112
+ sys.stdout = new_out
113
+ try:
114
+ yield sys.stdout
115
+ finally:
116
+ sys.stdout = old_out
117
+
118
+ def execute_code_snippet(self, code):
119
+ """
120
+ Executes the given Python code snippet.
121
+
122
+ Parameters:
123
+ code (str): The Python code snippet to be executed.
124
+
125
+ Returns:
126
+ dict: A dictionary containing the printed output and local variables.
127
+ """
128
+ # Check for dangerous functions and remove them
129
+ dangerous_functions = ['exit', 'quit', 'sys.exit']
130
+ for func in dangerous_functions:
131
+ if func in code:
132
+ print(f"Warning: Removing unsafe '{func}' call from code")
133
+ # Use regex to remove function calls with any arguments
134
+ code = re.sub(rf'{func}\s*\([^)]*\)', 'break', code)
135
+
136
+ try:
137
+ execution_code = self.preprocess_code(code)
138
+
139
+ # Execute with 10-second timeout
140
+ with timeout(10):
141
+ try:
142
+ exec(execution_code)
143
+ except TimeoutException:
144
+ print("Error: Code execution exceeded 60 seconds timeout")
145
+ return {"error": "Execution timed out after 60 seconds"}
146
+ except Exception as e:
147
+ print(f"Error executing code: {e}")
148
+ return {"error": str(e)}
149
+
150
+ # Capture the output and local variables
151
+ local_vars = {}
152
+ with self.capture_output() as output:
153
+ exec(execution_code, {}, local_vars)
154
+ printed_output = output.getvalue().strip()
155
+
156
+ # Filter out built-in variables and modules
157
+ """
158
+ only the variables used in the code are returned,
159
+ excluding built-in variables (which start with '__') and imported modules.
160
+ """
161
+ used_vars = {k: v for k, v in local_vars.items()
162
+ if not k.startswith('__') and not isinstance(v, type(sys))}
163
+
164
+ return {"printed_output": printed_output, "variables": used_vars}
165
+
166
+ except Exception as e:
167
+ print(f"Error executing code: {e}")
168
+ return {"error": str(e)}
169
+
170
+ def execute(self, query):
171
+ """
172
+ Generates and executes Python code based on the provided query.
173
+
174
+ Parameters:
175
+ query (str): A query describing the desired operation.
176
+
177
+ Returns:
178
+ dict: A dictionary containing the executed output, local variables, or any error message.
179
+ """
180
+
181
+ if not self.llm_engine:
182
+ raise ValueError("LLM engine not initialized. Please provide a valid model_string when initializing the tool.")
183
+
184
+ task_description = """
185
+ Given a query, generate a Python code snippet that performs the specified operation on the provided data. Please think step by step. Ensure to break down the process into clear, logical steps. Make sure to print the final result in the generated code snippet with a descriptive message explaining what the output represents. The final output should be presented in the following format:
186
+
187
+ ```python
188
+ <code snippet>
189
+ ```
190
+ """
191
+ task_description = task_description.strip()
192
+ full_prompt = f"Task:\n{task_description}\n\nQuery:\n{query}"
193
+
194
+ response = self.llm_engine(full_prompt)
195
+ result_or_error = self.execute_code_snippet(response)
196
+ return result_or_error
197
+
198
+ def get_metadata(self):
199
+ """
200
+ Returns the metadata for the Python_Code_Generator_Tool.
201
+
202
+ Returns:
203
+ dict: A dictionary containing the tool's metadata.
204
+ """
205
+ metadata = super().get_metadata()
206
+ metadata["require_llm_engine"] = self.require_llm_engine # NOTE: can be removed if not needed
207
+ return metadata
208
+
209
+
210
+ if __name__ == "__main__":
211
+ # Test command:
212
+ """
213
+ Run the following commands in the terminal to test the script:
214
+
215
+ cd octotools/tools/python_code_generator
216
+ python tool.py
217
+ """
218
+
219
+ # Get the directory of the current script
220
+ script_dir = os.path.dirname(os.path.abspath(__file__))
221
+
222
+ # Example usage of the Python_Code_Generator_Tool
223
+ tool = Python_Code_Generator_Tool()
224
+ tool = Python_Code_Generator_Tool(model_string="gpt-4o-mini")
225
+
226
+ # Get tool metadata
227
+ metadata = tool.get_metadata()
228
+ print(metadata)
229
+
230
+ # Sample query for generating and executing Python code
231
+ queries = [
232
+ "Given the number list: [1, 2, 3, 4, 5], calculate the sum of all the numbers in the list.",
233
+ ]
234
+ for query in queries:
235
+ print(f"\n###Query: {query}")
236
+ # Execute the tool with the sample query
237
+ try:
238
+ execution = tool.execute(query=query)
239
+ print("\n###Execution Result:", execution)
240
+ except ValueError as e:
241
+ print(f"Execution failed: {e}")
242
+
243
+ print("Done!")
octotools/tools/relevant_patch_zoomer/__init__.py ADDED
File without changes
octotools/tools/relevant_patch_zoomer/examples/car.png ADDED
octotools/tools/relevant_patch_zoomer/test.log ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Initializing Patch Zoomer Tool with model: gpt-4o
3
+ !! Cache enabled for model: gpt-4o
4
+ {'tool_name': 'Relevant_Patch_Zoomer_Tool', 'tool_description': 'A tool that analyzes an image, divides it into 5 regions (4 quarters + center), and identifies the most relevant patches based on a question. The returned patches are zoomed in by a factor of 2.', 'tool_version': '1.0.0', 'input_types': {'image': 'str - The path to the image file.', 'question': 'str - The question about the image content.'}, 'output_type': 'dict - Contains analysis text and list of saved zoomed patch paths.', 'demo_commands': [{'command': 'execution = tool.execute(image="path/to/image.jpg", question="What is the color of the car?")', 'description': "Analyze image and return relevant zoomed patches that show the car's color."}], 'require_llm_engine': True, 'user_metadata': {'best_practices': ['It might be helpful to zoom in on the image first to get a better look at the object(s).', 'It might be helpful if the question requires a close-up view of the object(s), symbols, texts, etc.', 'The tool should be used to provide a high-level analysis first, and then use other tools for fine-grained analysis. For example, you can use Relevant_Patch_Zoomer_Tool first to get a zoomed patch of specific objects, and then use Image_Captioner_Tool to describe the objects in detail.']}}
5
+
6
+ Detected Patches:
7
+ Path: /root/Projects/octotools/octotools/tools/relevant_patch_zoomer/zoomed_patches/car_bottom-right_zoomed_2x.png
8
+ Description: The bottom-right region of the image: /root/Projects/octotools/octotools/tools/relevant_patch_zoomer/examples/car.png.
9
+
10
+ Done!
octotools/tools/relevant_patch_zoomer/tool.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ from pydantic import BaseModel
4
+ from octotools.tools.base import BaseTool
5
+ from octotools.engine.openai import ChatOpenAI
6
+
7
+ class PatchZoomerResponse(BaseModel):
8
+ analysis: str
9
+ patch: list[str]
10
+
11
+ class Relevant_Patch_Zoomer_Tool(BaseTool):
12
+ require_llm_engine = True
13
+
14
+ def __init__(self, model_string="gpt-4o"):
15
+ super().__init__(
16
+ tool_name="Relevant_Patch_Zoomer_Tool",
17
+ tool_description="A tool that analyzes an image, divides it into 5 regions (4 quarters + center), and identifies the most relevant patches based on a question. The returned patches are zoomed in by a factor of 2.",
18
+ tool_version="1.0.0",
19
+ input_types={
20
+ "image": "str - The path to the image file.",
21
+ "question": "str - The question about the image content.",
22
+ },
23
+ output_type="dict - Contains analysis text and list of saved zoomed patch paths.",
24
+ demo_commands=[
25
+ {
26
+ "command": 'execution = tool.execute(image="path/to/image.jpg", question="What is the color of the car?")',
27
+ "description": "Analyze image and return relevant zoomed patches that show the car's color."
28
+ }
29
+ ],
30
+ user_metadata = {
31
+ "best_practices": [
32
+ "It might be helpful to zoom in on the image first to get a better look at the object(s).",
33
+ "It might be helpful if the question requires a close-up view of the object(s), symbols, texts, etc.",
34
+ "The tool should be used to provide a high-level analysis first, and then use other tools for fine-grained analysis. For example, you can use Relevant_Patch_Zoomer_Tool first to get a zoomed patch of specific objects, and then use Image_Captioner_Tool to describe the objects in detail."
35
+ ]
36
+ }
37
+ )
38
+ self.matching_dict = {
39
+ "A": "top-left",
40
+ "B": "top-right",
41
+ "C": "bottom-left",
42
+ "D": "bottom-right",
43
+ "E": "center"
44
+ }
45
+
46
+ print(f"\nInitializing Patch Zoomer Tool with model: {model_string}")
47
+ self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
48
+
49
+ def _save_patch(self, image_path, patch, save_path, zoom_factor=2):
50
+ """Extract and save a specific patch from the image with 10% margins."""
51
+ img = cv2.imread(image_path)
52
+ height, width = img.shape[:2]
53
+
54
+ quarter_h = height // 2
55
+ quarter_w = width // 2
56
+
57
+ margin_h = int(quarter_h * 0.1)
58
+ margin_w = int(quarter_w * 0.1)
59
+
60
+ patch_coords = {
61
+ 'A': ((max(0, 0 - margin_w), max(0, 0 - margin_h)),
62
+ (min(width, quarter_w + margin_w), min(height, quarter_h + margin_h))),
63
+ 'B': ((max(0, quarter_w - margin_w), max(0, 0 - margin_h)),
64
+ (min(width, width + margin_w), min(height, quarter_h + margin_h))),
65
+ 'C': ((max(0, 0 - margin_w), max(0, quarter_h - margin_h)),
66
+ (min(width, quarter_w + margin_w), min(height, height + margin_h))),
67
+ 'D': ((max(0, quarter_w - margin_w), max(0, quarter_h - margin_h)),
68
+ (min(width, width + margin_w), min(height, height + margin_h))),
69
+ 'E': ((max(0, quarter_w//2 - margin_w), max(0, quarter_h//2 - margin_h)),
70
+ (min(width, quarter_w//2 + quarter_w + margin_w),
71
+ min(height, quarter_h//2 + quarter_h + margin_h)))
72
+ }
73
+
74
+ (x1, y1), (x2, y2) = patch_coords[patch]
75
+ patch_img = img[y1:y2, x1:x2]
76
+
77
+ zoomed_patch = cv2.resize(patch_img,
78
+ (patch_img.shape[1] * zoom_factor,
79
+ patch_img.shape[0] * zoom_factor),
80
+ interpolation=cv2.INTER_LINEAR)
81
+
82
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
83
+ cv2.imwrite(save_path, zoomed_patch)
84
+ return save_path
85
+
86
+ def execute(self, image, question, zoom_factor=2):
87
+ try:
88
+ if not self.llm_engine:
89
+ return "Error: LLM engine not initialized. Please provide a valid model_string."
90
+
91
+ # Prepare the prompt
92
+ prompt = f"""
93
+ Analyze this image to identify the most relevant region(s) for answering the question:
94
+
95
+ Question: {question}
96
+
97
+ The image is divided into 5 regions:
98
+ - (A) Top-left quarter
99
+ - (B) Top-right quarter
100
+ - (C) Bottom-left quarter
101
+ - (D) Bottom-right quarter
102
+ - (E) Center region (1/4 size, overlapping middle section)
103
+
104
+ Instructions:
105
+ 1. First describe what you see in each of the five regions.
106
+ 2. Then select the most relevant region(s) to answer the question.
107
+ 3. Choose only the minimum necessary regions - avoid selecting redundant areas that show the same content. For example, if one patch contains the entire object(s), do not select another patch that only shows a part of the same object(s).
108
+
109
+
110
+ Response format:
111
+ <analysis>: Describe the image and five patches first. Then analyze the question and select the most relevant patch or list of patches.
112
+ <patch>: List of letters (A-E)
113
+ """
114
+ # Read image and create input data
115
+ with open(image, 'rb') as file:
116
+ image_bytes = file.read()
117
+ input_data = [prompt, image_bytes]
118
+
119
+ # Get response from LLM
120
+ response = self.llm_engine(input_data, response_format=PatchZoomerResponse)
121
+
122
+ # Save patches
123
+ image_dir = os.path.dirname(image)
124
+ image_name = os.path.splitext(os.path.basename(image))[0]
125
+
126
+ # Update the return structure
127
+ patch_info = []
128
+ for patch in response.patch:
129
+ patch_name = self.matching_dict[patch]
130
+ save_path = os.path.join(self.output_dir,
131
+ f"{image_name}_{patch_name}_zoomed_{zoom_factor}x.png")
132
+ saved_path = self._save_patch(image, patch, save_path, zoom_factor)
133
+ save_path = os.path.abspath(saved_path)
134
+ patch_info.append({
135
+ "path": save_path,
136
+ "description": f"The {self.matching_dict[patch]} region of the image: {image}."
137
+ })
138
+
139
+ return {
140
+ "analysis": response.analysis,
141
+ "patches": patch_info
142
+ }
143
+
144
+ except Exception as e:
145
+ print(f"Error in patch zooming: {e}")
146
+ return None
147
+
148
+ def get_metadata(self):
149
+ return super().get_metadata()
150
+
151
+ if __name__ == "__main__":
152
+ # Test command:
153
+ """
154
+ Run the following commands in the terminal to test the script:
155
+
156
+ cd octotools/tools/relevant_patch_zoomer
157
+ python tool.py
158
+ """
159
+
160
+ # Get the directory of the current script
161
+ script_dir = os.path.dirname(os.path.abspath(__file__))
162
+
163
+ # Example usage of the Relevant_Patch_Zoomer_Tool
164
+ tool = Relevant_Patch_Zoomer_Tool()
165
+ tool.set_custom_output_dir(f"{script_dir}/zoomed_patches")
166
+
167
+ # Get tool metadata
168
+ metadata = tool.get_metadata()
169
+ print(metadata)
170
+
171
+ # Construct the full path to the image using the script's directory
172
+ relative_image_path = "examples/car.png"
173
+ image_path = os.path.join(script_dir, relative_image_path)
174
+ question = "What is the color of the car?"
175
+
176
+ # Execute the tool
177
+ try:
178
+ result = tool.execute(image=image_path, question=question)
179
+ if result:
180
+ print("\nDetected Patches:")
181
+ for patch in result['patches']:
182
+ print(f"Path: {patch['path']}")
183
+ print(f"Description: {patch['description']}")
184
+ print()
185
+ except Exception as e:
186
+ print(f"Execution failed: {e}")
187
+
188
+ print("Done!")