Spaces:
Running
on
A10G
Running
on
A10G
opentools-->octotools; added remaining tools; polished the ui
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .gitignore +177 -0
- README.md +7 -0
- app.py +41 -23
- app_bak_0215.py +307 -0
- {opentools β octotools}/__init__.py +0 -0
- {opentools β octotools}/engine/__init__.py +0 -0
- {opentools β octotools}/engine/base.py +0 -0
- {opentools β octotools}/engine/openai.py +2 -2
- {opentools β octotools}/models/__init__.py +0 -0
- {opentools β octotools}/models/executor.py +2 -2
- {opentools β octotools}/models/formatters.py +0 -0
- {opentools β octotools}/models/initializer.py +10 -10
- {opentools β octotools}/models/memory.py +0 -0
- {opentools β octotools}/models/planner.py +3 -3
- {opentools β octotools}/models/utils.py +0 -0
- {opentools β octotools}/tools/README.md +1 -1
- {opentools β octotools}/tools/__init__.py +0 -0
- octotools/tools/advanced_object_detector/__init__.py +0 -0
- octotools/tools/advanced_object_detector/examples/baseball.png +0 -0
- octotools/tools/advanced_object_detector/test.log +366 -0
- octotools/tools/advanced_object_detector/tool.py +236 -0
- octotools/tools/arxiv_paper_searcher/__init__.py +0 -0
- octotools/tools/arxiv_paper_searcher/test.log +120 -0
- octotools/tools/arxiv_paper_searcher/tool.py +165 -0
- {opentools β octotools}/tools/base.py +2 -2
- {opentools β octotools}/tools/generalist_solution_generator/tool.py +3 -3
- octotools/tools/google_search/__init__.py +0 -0
- octotools/tools/google_search/test.log +29 -0
- octotools/tools/google_search/tool.py +136 -0
- octotools/tools/image_captioner/__init__.py +0 -0
- octotools/tools/image_captioner/examples/baseball.png +0 -0
- octotools/tools/image_captioner/test.log +7 -0
- octotools/tools/image_captioner/tool.py +96 -0
- octotools/tools/nature_news_fetcher/__init__.py +0 -0
- octotools/tools/nature_news_fetcher/test.log +180 -0
- octotools/tools/nature_news_fetcher/tool.py +181 -0
- octotools/tools/object_detector/__init__.py +0 -0
- octotools/tools/object_detector/examples/baseball.png +0 -0
- octotools/tools/object_detector/test.log +112 -0
- octotools/tools/object_detector/tool.py +179 -0
- octotools/tools/pubmed_search/__init__.py +0 -0
- octotools/tools/pubmed_search/test.log +3 -0
- octotools/tools/pubmed_search/tool.py +112 -0
- octotools/tools/python_code_generator/__init__.py +0 -0
- octotools/tools/python_code_generator/test.log +13 -0
- octotools/tools/python_code_generator/tool.py +243 -0
- octotools/tools/relevant_patch_zoomer/__init__.py +0 -0
- octotools/tools/relevant_patch_zoomer/examples/car.png +0 -0
- octotools/tools/relevant_patch_zoomer/test.log +10 -0
- octotools/tools/relevant_patch_zoomer/tool.py +188 -0
.gitignore
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Byte-compiled / optimized / DLL files
|
3 |
+
__pycache__/
|
4 |
+
*.py[cod]
|
5 |
+
*$py.class
|
6 |
+
|
7 |
+
# C extensions
|
8 |
+
*.so
|
9 |
+
|
10 |
+
# Distribution / packaging
|
11 |
+
.Python
|
12 |
+
build/
|
13 |
+
develop-eggs/
|
14 |
+
dist/
|
15 |
+
downloads/
|
16 |
+
eggs/
|
17 |
+
.eggs/
|
18 |
+
lib/
|
19 |
+
lib64/
|
20 |
+
parts/
|
21 |
+
sdist/
|
22 |
+
var/
|
23 |
+
wheels/
|
24 |
+
share/python-wheels/
|
25 |
+
*.egg-info/
|
26 |
+
.installed.cfg
|
27 |
+
*.egg
|
28 |
+
MANIFEST
|
29 |
+
|
30 |
+
# PyInstaller
|
31 |
+
# Usually these files are written by a python script from a template
|
32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
33 |
+
*.manifest
|
34 |
+
*.spec
|
35 |
+
|
36 |
+
# Installer logs
|
37 |
+
pip-log.txt
|
38 |
+
pip-delete-this-directory.txt
|
39 |
+
|
40 |
+
# Unit test / coverage reports
|
41 |
+
htmlcov/
|
42 |
+
.tox/
|
43 |
+
.nox/
|
44 |
+
.coverage
|
45 |
+
.coverage.*
|
46 |
+
.cache
|
47 |
+
nosetests.xml
|
48 |
+
coverage.xml
|
49 |
+
*.cover
|
50 |
+
*.py,cover
|
51 |
+
.hypothesis/
|
52 |
+
.pytest_cache/
|
53 |
+
cover/
|
54 |
+
|
55 |
+
# Translations
|
56 |
+
*.mo
|
57 |
+
*.pot
|
58 |
+
|
59 |
+
# Django stuff:
|
60 |
+
# *.log
|
61 |
+
local_settings.py
|
62 |
+
db.sqlite3
|
63 |
+
db.sqlite3-journal
|
64 |
+
|
65 |
+
# Flask stuff:
|
66 |
+
instance/
|
67 |
+
.webassets-cache
|
68 |
+
|
69 |
+
# Scrapy stuff:
|
70 |
+
.scrapy
|
71 |
+
|
72 |
+
# Sphinx documentation
|
73 |
+
docs/_build/
|
74 |
+
|
75 |
+
# PyBuilder
|
76 |
+
.pybuilder/
|
77 |
+
target/
|
78 |
+
|
79 |
+
# Jupyter Notebook
|
80 |
+
.ipynb_checkpoints
|
81 |
+
|
82 |
+
# IPython
|
83 |
+
profile_default/
|
84 |
+
ipython_config.py
|
85 |
+
|
86 |
+
# pyenv
|
87 |
+
# For a library or package, you might want to ignore these files since the code is
|
88 |
+
# intended to run in multiple environments; otherwise, check them in:
|
89 |
+
# .python-version
|
90 |
+
|
91 |
+
# pipenv
|
92 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
93 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
94 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
95 |
+
# install all needed dependencies.
|
96 |
+
#Pipfile.lock
|
97 |
+
|
98 |
+
# poetry
|
99 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
100 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
101 |
+
# commonly ignored for libraries.
|
102 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
103 |
+
#poetry.lock
|
104 |
+
|
105 |
+
# pdm
|
106 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
107 |
+
#pdm.lock
|
108 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
109 |
+
# in version control.
|
110 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
111 |
+
.pdm.toml
|
112 |
+
.pdm-python
|
113 |
+
.pdm-build/
|
114 |
+
|
115 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
116 |
+
__pypackages__/
|
117 |
+
|
118 |
+
# Celery stuff
|
119 |
+
celerybeat-schedule
|
120 |
+
celerybeat.pid
|
121 |
+
|
122 |
+
# SageMath parsed files
|
123 |
+
*.sage.py
|
124 |
+
|
125 |
+
# Environments
|
126 |
+
.env
|
127 |
+
.venv
|
128 |
+
env/
|
129 |
+
venv/
|
130 |
+
ENV/
|
131 |
+
env.bak/
|
132 |
+
venv.bak/
|
133 |
+
|
134 |
+
# Spyder project settings
|
135 |
+
.spyderproject
|
136 |
+
.spyproject
|
137 |
+
|
138 |
+
# Rope project settings
|
139 |
+
.ropeproject
|
140 |
+
|
141 |
+
# mkdocs documentation
|
142 |
+
/site
|
143 |
+
|
144 |
+
# mypy
|
145 |
+
.mypy_cache/
|
146 |
+
.dmypy.json
|
147 |
+
dmypy.json
|
148 |
+
|
149 |
+
# Pyre type checker
|
150 |
+
.pyre/
|
151 |
+
|
152 |
+
# pytype static type analyzer
|
153 |
+
.pytype/
|
154 |
+
|
155 |
+
# Cython debug symbols
|
156 |
+
cython_debug/
|
157 |
+
|
158 |
+
# PyCharm
|
159 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
160 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
161 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
162 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
163 |
+
#.idea/
|
164 |
+
|
165 |
+
# [Octotools]
|
166 |
+
octotools.egg-info/
|
167 |
+
locals/
|
168 |
+
results/
|
169 |
+
logs/
|
170 |
+
*.zip
|
171 |
+
*.pt
|
172 |
+
cache/
|
173 |
+
tool_cache/
|
174 |
+
detected_objects/
|
175 |
+
|
176 |
+
# [Gradio]
|
177 |
+
demo_solver_cache/
|
README.md
CHANGED
@@ -15,3 +15,10 @@ pinned: false
|
|
15 |
- https://www.gradio.app/guides/agents-and-tool-usage
|
16 |
|
17 |
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
- https://www.gradio.app/guides/agents-and-tool-usage
|
16 |
|
17 |
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
18 |
+
|
19 |
+
|
20 |
+
### Example
|
21 |
+
|
22 |
+
```
|
23 |
+
How many baseballs are there?
|
24 |
+
```
|
app.py
CHANGED
@@ -16,17 +16,12 @@ current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
16 |
project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
|
17 |
sys.path.insert(0, project_root)
|
18 |
|
19 |
-
from
|
20 |
-
from
|
21 |
-
from
|
22 |
-
from
|
23 |
-
from
|
24 |
-
|
25 |
-
# class ChatMessage:
|
26 |
-
# def __init__(self, role: str, content: str, metadata: dict = None):
|
27 |
-
# self.role = role
|
28 |
-
# self.content = content
|
29 |
-
# self.metadata = metadata or {}
|
30 |
|
31 |
class Solver:
|
32 |
def __init__(
|
@@ -59,9 +54,6 @@ class Solver:
|
|
59 |
self.output_types = output_types.lower().split(',')
|
60 |
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
61 |
|
62 |
-
# self.benchmark_data = self.load_benchmark_data()
|
63 |
-
|
64 |
-
|
65 |
|
66 |
def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
|
67 |
"""
|
@@ -189,8 +181,9 @@ class Solver:
|
|
189 |
messages.append(ChatMessage(role="assistant", content="β
Problem-solving process complete."))
|
190 |
yield messages
|
191 |
|
|
|
192 |
def parse_arguments():
|
193 |
-
parser = argparse.ArgumentParser(description="Run the
|
194 |
parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
|
195 |
parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
|
196 |
parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
|
@@ -215,7 +208,7 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
215 |
"""
|
216 |
|
217 |
if api_key is None:
|
218 |
-
return [["assistant", "β οΈ Error: API Key is required."]]
|
219 |
|
220 |
# Initialize Tools
|
221 |
enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
@@ -271,16 +264,16 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
271 |
|
272 |
|
273 |
def main(args):
|
274 |
-
|
275 |
with gr.Blocks() as demo:
|
276 |
-
gr.Markdown("# π§ OctoTools
|
277 |
|
278 |
with gr.Row():
|
279 |
with gr.Column(scale=1):
|
280 |
api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
|
281 |
user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
|
282 |
-
max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1)
|
283 |
-
max_time = gr.Slider(value=
|
284 |
with gr.Column(scale=3):
|
285 |
chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
|
286 |
# chatbot_output.like(lambda x: print(f"User liked: {x}"))
|
@@ -296,13 +289,38 @@ def main(args):
|
|
296 |
|
297 |
# Link button click to function
|
298 |
run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image, max_steps, max_time, api_key], outputs=chatbot_output)
|
299 |
-
|
300 |
|
301 |
# Launch the Gradio app
|
302 |
demo.launch()
|
303 |
|
304 |
|
305 |
-
|
306 |
if __name__ == "__main__":
|
307 |
args = parse_arguments()
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
|
17 |
sys.path.insert(0, project_root)
|
18 |
|
19 |
+
from octotools.models.initializer import Initializer
|
20 |
+
from octotools.models.planner import Planner
|
21 |
+
from octotools.models.memory import Memory
|
22 |
+
from octotools.models.executor import Executor
|
23 |
+
from octotools.models.utils import make_json_serializable
|
24 |
+
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
class Solver:
|
27 |
def __init__(
|
|
|
54 |
self.output_types = output_types.lower().split(',')
|
55 |
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
56 |
|
|
|
|
|
|
|
57 |
|
58 |
def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, api_key: str, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
|
59 |
"""
|
|
|
181 |
messages.append(ChatMessage(role="assistant", content="β
Problem-solving process complete."))
|
182 |
yield messages
|
183 |
|
184 |
+
|
185 |
def parse_arguments():
|
186 |
+
parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
|
187 |
parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
|
188 |
parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
|
189 |
parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
|
|
|
208 |
"""
|
209 |
|
210 |
if api_key is None:
|
211 |
+
return [["assistant", "β οΈ Error: OpenAI API Key is required."]]
|
212 |
|
213 |
# Initialize Tools
|
214 |
enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
|
|
264 |
|
265 |
|
266 |
def main(args):
|
267 |
+
#################### Gradio Interface ####################
|
268 |
with gr.Blocks() as demo:
|
269 |
+
gr.Markdown("# π§ The OctoTools Agentic Solver") # Title
|
270 |
|
271 |
with gr.Row():
|
272 |
with gr.Column(scale=1):
|
273 |
api_key = gr.Textbox(show_label=False, placeholder="Your API key will not be stored in any way.", type="password", container=False)
|
274 |
user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
|
275 |
+
max_steps = gr.Slider(value=5, minimum=1, maximum=10, step=1, label="Max Steps")
|
276 |
+
max_time = gr.Slider(value=150, minimum=60, maximum=300, step=30, label="Max Time (seconds)")
|
277 |
with gr.Column(scale=3):
|
278 |
chatbot_output = gr.Chatbot(type="messages", label="Problem-Solving Output")
|
279 |
# chatbot_output.like(lambda x: print(f"User liked: {x}"))
|
|
|
289 |
|
290 |
# Link button click to function
|
291 |
run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image, max_steps, max_time, api_key], outputs=chatbot_output)
|
292 |
+
#################### Gradio Interface ####################
|
293 |
|
294 |
# Launch the Gradio app
|
295 |
demo.launch()
|
296 |
|
297 |
|
|
|
298 |
if __name__ == "__main__":
|
299 |
args = parse_arguments()
|
300 |
+
|
301 |
+
# Manually set enabled tools
|
302 |
+
# args.enabled_tools = "Generalist_Solution_Generator_Tool"
|
303 |
+
|
304 |
+
|
305 |
+
# All tools
|
306 |
+
all_tools = [
|
307 |
+
"Generalist_Solution_Generator_Tool",
|
308 |
+
|
309 |
+
"Image_Captioner_Tool",
|
310 |
+
"Object_Detector_Tool",
|
311 |
+
"Text_Detector_Tool",
|
312 |
+
"Relevant_Patch_Zoomer_Tool",
|
313 |
+
|
314 |
+
"Python_Code_Generator_Tool",
|
315 |
+
|
316 |
+
"ArXiv_Paper_Searcher_Tool",
|
317 |
+
"Google_Search_Tool",
|
318 |
+
"Nature_News_Fetcher_Tool",
|
319 |
+
"Pubmed_Search_Tool",
|
320 |
+
"URL_Text_Extractor_Tool",
|
321 |
+
"Wikipedia_Knowledge_Searcher_Tool"
|
322 |
+
]
|
323 |
+
args.enabled_tools = ",".join(all_tools)
|
324 |
+
|
325 |
+
main(args)
|
326 |
+
|
app_bak_0215.py
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import json
|
4 |
+
import argparse
|
5 |
+
import time
|
6 |
+
import io
|
7 |
+
import uuid
|
8 |
+
from PIL import Image
|
9 |
+
from typing import List, Dict, Any, Iterator
|
10 |
+
import gradio as gr
|
11 |
+
|
12 |
+
# Add the project root to the Python path
|
13 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
14 |
+
project_root = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
|
15 |
+
sys.path.insert(0, project_root)
|
16 |
+
|
17 |
+
from opentools.models.initializer import Initializer
|
18 |
+
from opentools.models.planner import Planner
|
19 |
+
from opentools.models.memory import Memory
|
20 |
+
from opentools.models.executor import Executor
|
21 |
+
from opentools.models.utlis import make_json_serializable
|
22 |
+
|
23 |
+
solver = None
|
24 |
+
|
25 |
+
class ChatMessage:
|
26 |
+
def __init__(self, role: str, content: str, metadata: dict = None):
|
27 |
+
self.role = role
|
28 |
+
self.content = content
|
29 |
+
self.metadata = metadata or {}
|
30 |
+
|
31 |
+
class Solver:
|
32 |
+
def __init__(
|
33 |
+
self,
|
34 |
+
planner,
|
35 |
+
memory,
|
36 |
+
executor,
|
37 |
+
task: str,
|
38 |
+
task_description: str,
|
39 |
+
output_types: str = "base,final,direct",
|
40 |
+
index: int = 0,
|
41 |
+
verbose: bool = True,
|
42 |
+
max_steps: int = 10,
|
43 |
+
max_time: int = 60,
|
44 |
+
output_json_dir: str = "results",
|
45 |
+
root_cache_dir: str = "cache"
|
46 |
+
):
|
47 |
+
self.planner = planner
|
48 |
+
self.memory = memory
|
49 |
+
self.executor = executor
|
50 |
+
self.task = task
|
51 |
+
self.task_description = task_description
|
52 |
+
self.index = index
|
53 |
+
self.verbose = verbose
|
54 |
+
self.max_steps = max_steps
|
55 |
+
self.max_time = max_time
|
56 |
+
self.output_json_dir = output_json_dir
|
57 |
+
self.root_cache_dir = root_cache_dir
|
58 |
+
|
59 |
+
self.output_types = output_types.lower().split(',')
|
60 |
+
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
61 |
+
|
62 |
+
# self.benchmark_data = self.load_benchmark_data()
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
def stream_solve_user_problem(self, user_query: str, user_image: Image.Image, messages: List[ChatMessage]) -> Iterator[List[ChatMessage]]:
|
67 |
+
"""
|
68 |
+
Streams intermediate thoughts and final responses for the problem-solving process based on user input.
|
69 |
+
|
70 |
+
Args:
|
71 |
+
user_query (str): The text query input from the user.
|
72 |
+
user_image (Image.Image): The uploaded image from the user (PIL Image object).
|
73 |
+
messages (list): A list of ChatMessage objects to store the streamed responses.
|
74 |
+
"""
|
75 |
+
|
76 |
+
if user_image:
|
77 |
+
# # Convert PIL Image to bytes (for processing)
|
78 |
+
# img_bytes_io = io.BytesIO()
|
79 |
+
# user_image.save(img_bytes_io, format="PNG") # Convert image to PNG bytes
|
80 |
+
# img_bytes = img_bytes_io.getvalue() # Get bytes
|
81 |
+
|
82 |
+
# Use image paths instead of bytes,
|
83 |
+
os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
|
84 |
+
img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
|
85 |
+
user_image.save(img_path)
|
86 |
+
else:
|
87 |
+
img_path = None
|
88 |
+
|
89 |
+
# Set query cache
|
90 |
+
_cache_dir = os.path.join(self.root_cache_dir)
|
91 |
+
self.executor.set_query_cache_dir(_cache_dir)
|
92 |
+
|
93 |
+
# Step 1: Display the received inputs
|
94 |
+
if user_image:
|
95 |
+
messages.append(ChatMessage(role="assistant", content=f"π Received Query: {user_query}\nπΌοΈ Image Uploaded"))
|
96 |
+
else:
|
97 |
+
messages.append(ChatMessage(role="assistant", content=f"π Received Query: {user_query}"))
|
98 |
+
yield messages
|
99 |
+
|
100 |
+
# Step 2: Add "thinking" status while processing
|
101 |
+
messages.append(ChatMessage(
|
102 |
+
role="assistant",
|
103 |
+
content="",
|
104 |
+
metadata={"title": "β³ Thinking: Processing input..."}
|
105 |
+
))
|
106 |
+
|
107 |
+
# Step 3: Initialize problem-solving state
|
108 |
+
start_time = time.time()
|
109 |
+
step_count = 0
|
110 |
+
json_data = {"query": user_query, "image": "Image received as bytes"}
|
111 |
+
|
112 |
+
# Step 4: Query Analysis
|
113 |
+
query_analysis = self.planner.analyze_query(user_query, img_path)
|
114 |
+
json_data["query_analysis"] = query_analysis
|
115 |
+
messages.append(ChatMessage(role="assistant", content=f"π Query Analysis:\n{query_analysis}"))
|
116 |
+
yield messages
|
117 |
+
|
118 |
+
# Step 5: Execution loop (similar to your step-by-step solver)
|
119 |
+
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
120 |
+
step_count += 1
|
121 |
+
messages.append(ChatMessage(role="assistant", content=f"π Step {step_count}: Generating next step..."))
|
122 |
+
yield messages
|
123 |
+
|
124 |
+
# Generate the next step
|
125 |
+
next_step = self.planner.generate_next_step(
|
126 |
+
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
127 |
+
)
|
128 |
+
context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
|
129 |
+
|
130 |
+
# Display the step information
|
131 |
+
messages.append(ChatMessage(
|
132 |
+
role="assistant",
|
133 |
+
content=f"π Step {step_count} Details:\n- Context: {context}\n- Sub-goal: {sub_goal}\n- Tool: {tool_name}"
|
134 |
+
))
|
135 |
+
yield messages
|
136 |
+
|
137 |
+
# Handle tool execution or errors
|
138 |
+
if tool_name not in self.planner.available_tools:
|
139 |
+
messages.append(ChatMessage(role="assistant", content=f"β οΈ Error: Tool '{tool_name}' is not available."))
|
140 |
+
yield messages
|
141 |
+
continue
|
142 |
+
|
143 |
+
# Execute the tool command
|
144 |
+
tool_command = self.executor.generate_tool_command(
|
145 |
+
user_query, img_path, context, sub_goal, tool_name, self.planner.toolbox_metadata[tool_name]
|
146 |
+
)
|
147 |
+
explanation, command = self.executor.extract_explanation_and_command(tool_command)
|
148 |
+
result = self.executor.execute_tool_command(tool_name, command)
|
149 |
+
result = make_json_serializable(result)
|
150 |
+
|
151 |
+
messages.append(ChatMessage(role="assistant", content=f"β
Step {step_count} Result:\n{json.dumps(result, indent=4)}"))
|
152 |
+
yield messages
|
153 |
+
|
154 |
+
# Step 6: Memory update and stopping condition
|
155 |
+
self.memory.add_action(step_count, tool_name, sub_goal, tool_command, result)
|
156 |
+
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
157 |
+
conclusion = self.planner.extract_conclusion(stop_verification)
|
158 |
+
|
159 |
+
messages.append(ChatMessage(role="assistant", content=f"π Step {step_count} Conclusion: {conclusion}"))
|
160 |
+
yield messages
|
161 |
+
|
162 |
+
if conclusion == 'STOP':
|
163 |
+
break
|
164 |
+
|
165 |
+
# Step 7: Generate Final Output (if needed)
|
166 |
+
if 'final' in self.output_types:
|
167 |
+
final_output = self.planner.generate_final_output(user_query, img_path, self.memory)
|
168 |
+
messages.append(ChatMessage(role="assistant", content=f"π― Final Output:\n{final_output}"))
|
169 |
+
yield messages
|
170 |
+
|
171 |
+
if 'direct' in self.output_types:
|
172 |
+
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
173 |
+
messages.append(ChatMessage(role="assistant", content=f"πΉ Direct Output:\n{direct_output}"))
|
174 |
+
yield messages
|
175 |
+
|
176 |
+
# Step 8: Completion Message
|
177 |
+
messages.append(ChatMessage(role="assistant", content="β
Problem-solving process complete."))
|
178 |
+
yield messages
|
179 |
+
|
180 |
+
def parse_arguments():
|
181 |
+
parser = argparse.ArgumentParser(description="Run the OpenTools demo with specified parameters.")
|
182 |
+
parser.add_argument("--llm_engine_name", default="gpt-4o", help="LLM engine name.")
|
183 |
+
parser.add_argument("--max_tokens", type=int, default=2000, help="Maximum tokens for LLM generation.")
|
184 |
+
parser.add_argument("--run_baseline_only", type=bool, default=False, help="Run only the baseline (no toolbox).")
|
185 |
+
parser.add_argument("--task", default="minitoolbench", help="Task to run.")
|
186 |
+
parser.add_argument("--task_description", default="", help="Task description.")
|
187 |
+
parser.add_argument(
|
188 |
+
"--output_types",
|
189 |
+
default="base,final,direct",
|
190 |
+
help="Comma-separated list of required outputs (base,final,direct)"
|
191 |
+
)
|
192 |
+
parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
|
193 |
+
parser.add_argument("--root_cache_dir", default="demo_solver_cache", help="Path to solver cache directory.")
|
194 |
+
parser.add_argument("--output_json_dir", default="demo_results", help="Path to output JSON directory.")
|
195 |
+
parser.add_argument("--max_steps", type=int, default=10, help="Maximum number of steps to execute.")
|
196 |
+
parser.add_argument("--max_time", type=int, default=60, help="Maximum time allowed in seconds.")
|
197 |
+
parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
|
198 |
+
return parser.parse_args()
|
199 |
+
|
200 |
+
|
201 |
+
def solve_problem_gradio(user_query, user_image):
|
202 |
+
"""
|
203 |
+
Wrapper function to connect the solver to Gradio.
|
204 |
+
Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
|
205 |
+
"""
|
206 |
+
global solver # Ensure we're using the globally defined solver
|
207 |
+
|
208 |
+
if solver is None:
|
209 |
+
return [["assistant", "β οΈ Error: Solver is not initialized. Please restart the application."]]
|
210 |
+
|
211 |
+
messages = [] # Initialize message list
|
212 |
+
for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
|
213 |
+
yield [[msg.role, msg.content] for msg in message_batch] # Ensure correct format for Gradio Chatbot
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
def main(args):
|
218 |
+
global solver
|
219 |
+
# Initialize Tools
|
220 |
+
enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
221 |
+
|
222 |
+
|
223 |
+
# Instantiate Initializer
|
224 |
+
initializer = Initializer(
|
225 |
+
enabled_tools=enabled_tools,
|
226 |
+
model_string=args.llm_engine_name
|
227 |
+
)
|
228 |
+
|
229 |
+
# Instantiate Planner
|
230 |
+
planner = Planner(
|
231 |
+
llm_engine_name=args.llm_engine_name,
|
232 |
+
toolbox_metadata=initializer.toolbox_metadata,
|
233 |
+
available_tools=initializer.available_tools
|
234 |
+
)
|
235 |
+
|
236 |
+
# Instantiate Memory
|
237 |
+
memory = Memory()
|
238 |
+
|
239 |
+
# Instantiate Executor
|
240 |
+
executor = Executor(
|
241 |
+
llm_engine_name=args.llm_engine_name,
|
242 |
+
root_cache_dir=args.root_cache_dir,
|
243 |
+
enable_signal=False
|
244 |
+
)
|
245 |
+
|
246 |
+
# Instantiate Solver
|
247 |
+
solver = Solver(
|
248 |
+
planner=planner,
|
249 |
+
memory=memory,
|
250 |
+
executor=executor,
|
251 |
+
task=args.task,
|
252 |
+
task_description=args.task_description,
|
253 |
+
output_types=args.output_types, # Add new parameter
|
254 |
+
verbose=args.verbose,
|
255 |
+
max_steps=args.max_steps,
|
256 |
+
max_time=args.max_time,
|
257 |
+
output_json_dir=args.output_json_dir,
|
258 |
+
root_cache_dir=args.root_cache_dir
|
259 |
+
)
|
260 |
+
|
261 |
+
# Test Inputs
|
262 |
+
# user_query = "How many balls are there in the image?"
|
263 |
+
# user_image_path = "/home/sheng/toolbox-agent/mathvista_113.png" # Replace with your actual image path
|
264 |
+
|
265 |
+
# # Load the image as a PIL object
|
266 |
+
# user_image = Image.open(user_image_path).convert("RGB") # Ensure it's in RGB mode
|
267 |
+
|
268 |
+
# print("\n=== Starting Problem Solving ===\n")
|
269 |
+
# messages = []
|
270 |
+
# for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
|
271 |
+
# for message in message_batch:
|
272 |
+
# print(f"{message.role}: {message.content}")
|
273 |
+
|
274 |
+
# messages = []
|
275 |
+
# solver.stream_solve_user_problem(user_query, user_image, messages)
|
276 |
+
|
277 |
+
|
278 |
+
# def solve_problem_stream(user_query, user_image):
|
279 |
+
# messages = [] # Ensure it's a list of [role, content] pairs
|
280 |
+
|
281 |
+
# for message_batch in solver.stream_solve_user_problem(user_query, user_image, messages):
|
282 |
+
# yield message_batch # Stream messages correctly in tuple format
|
283 |
+
|
284 |
+
# solve_problem_stream(user_query, user_image)
|
285 |
+
|
286 |
+
# ========== Gradio Interface ==========
|
287 |
+
with gr.Blocks() as demo:
|
288 |
+
gr.Markdown("# π§ OctoTools AI Solver") # Title
|
289 |
+
|
290 |
+
with gr.Row():
|
291 |
+
user_query = gr.Textbox(label="Enter your query", placeholder="Type your question here...")
|
292 |
+
user_image = gr.Image(type="pil", label="Upload an image") # Accepts multiple formats
|
293 |
+
|
294 |
+
run_button = gr.Button("Run") # Run button
|
295 |
+
chatbot_output = gr.Chatbot(label="Problem-Solving Output")
|
296 |
+
|
297 |
+
# Link button click to function
|
298 |
+
run_button.click(fn=solve_problem_gradio, inputs=[user_query, user_image], outputs=chatbot_output)
|
299 |
+
|
300 |
+
# Launch the Gradio app
|
301 |
+
demo.launch()
|
302 |
+
|
303 |
+
|
304 |
+
|
305 |
+
if __name__ == "__main__":
|
306 |
+
args = parse_arguments()
|
307 |
+
main(args)
|
{opentools β octotools}/__init__.py
RENAMED
File without changes
|
{opentools β octotools}/engine/__init__.py
RENAMED
File without changes
|
{opentools β octotools}/engine/base.py
RENAMED
File without changes
|
{opentools β octotools}/engine/openai.py
RENAMED
@@ -51,9 +51,9 @@ class ChatOpenAI(EngineLM, CachedEngine):
|
|
51 |
:param is_multimodal:
|
52 |
"""
|
53 |
if enable_cache:
|
54 |
-
root = platformdirs.user_cache_dir("
|
55 |
cache_path = os.path.join(root, f"cache_openai_{model_string}.db")
|
56 |
-
# For example, cache_path = /root/.cache/
|
57 |
# print(f"Cache path: {cache_path}")
|
58 |
|
59 |
self.image_cache_dir = os.path.join(root, "image_cache")
|
|
|
51 |
:param is_multimodal:
|
52 |
"""
|
53 |
if enable_cache:
|
54 |
+
root = platformdirs.user_cache_dir("octotools")
|
55 |
cache_path = os.path.join(root, f"cache_openai_{model_string}.db")
|
56 |
+
# For example, cache_path = /root/.cache/octotools/cache_openai_gpt-4o-mini.db
|
57 |
# print(f"Cache path: {cache_path}")
|
58 |
|
59 |
self.image_cache_dir = os.path.join(root, "image_cache")
|
{opentools β octotools}/models/__init__.py
RENAMED
File without changes
|
{opentools β octotools}/models/executor.py
RENAMED
@@ -5,8 +5,8 @@ import re
|
|
5 |
from typing import Dict, Any, List
|
6 |
from datetime import datetime
|
7 |
|
8 |
-
from
|
9 |
-
from
|
10 |
|
11 |
import signal
|
12 |
from typing import Dict, Any, List, Optional
|
|
|
5 |
from typing import Dict, Any, List
|
6 |
from datetime import datetime
|
7 |
|
8 |
+
from octotools.engine.openai import ChatOpenAI
|
9 |
+
from octotools.models.formatters import ToolCommand
|
10 |
|
11 |
import signal
|
12 |
from typing import Dict, Any, List, Optional
|
{opentools β octotools}/models/formatters.py
RENAMED
File without changes
|
{opentools β octotools}/models/initializer.py
RENAMED
@@ -14,7 +14,7 @@ class Initializer:
|
|
14 |
self.model_string = model_string # llm model string
|
15 |
self.api_key = api_key
|
16 |
|
17 |
-
print("\nInitializing
|
18 |
print(f"Enabled tools: {self.enabled_tools}")
|
19 |
print(f"LLM model string: {self.model_string}")
|
20 |
self._set_up_tools()
|
@@ -22,8 +22,8 @@ class Initializer:
|
|
22 |
def get_project_root(self):
|
23 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
24 |
while current_dir != '/':
|
25 |
-
if os.path.exists(os.path.join(current_dir, '
|
26 |
-
return os.path.join(current_dir, '
|
27 |
current_dir = os.path.dirname(current_dir)
|
28 |
raise Exception("Could not find project root")
|
29 |
|
@@ -31,15 +31,15 @@ class Initializer:
|
|
31 |
# Implementation of load_tools_and_get_metadata function
|
32 |
print("Loading tools and getting metadata...")
|
33 |
self.toolbox_metadata = {}
|
34 |
-
|
35 |
-
tools_dir = os.path.join(
|
36 |
|
37 |
-
print(f"
|
38 |
print(f"Tools directory: {tools_dir}")
|
39 |
|
40 |
-
# Add the
|
41 |
-
sys.path.insert(0,
|
42 |
-
sys.path.insert(0, os.path.dirname(
|
43 |
print(f"Updated Python path: {sys.path}")
|
44 |
|
45 |
if not os.path.exists(tools_dir):
|
@@ -52,7 +52,7 @@ class Initializer:
|
|
52 |
file = 'tool.py'
|
53 |
module_path = os.path.join(root, file)
|
54 |
module_name = os.path.splitext(file)[0]
|
55 |
-
relative_path = os.path.relpath(module_path,
|
56 |
import_path = '.'.join(os.path.split(relative_path)).replace(os.sep, '.')[:-3]
|
57 |
|
58 |
print(f"\nAttempting to import: {import_path}")
|
|
|
14 |
self.model_string = model_string # llm model string
|
15 |
self.api_key = api_key
|
16 |
|
17 |
+
print("\nInitializing OctoTools...")
|
18 |
print(f"Enabled tools: {self.enabled_tools}")
|
19 |
print(f"LLM model string: {self.model_string}")
|
20 |
self._set_up_tools()
|
|
|
22 |
def get_project_root(self):
|
23 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
24 |
while current_dir != '/':
|
25 |
+
if os.path.exists(os.path.join(current_dir, 'octotools')):
|
26 |
+
return os.path.join(current_dir, 'octotools')
|
27 |
current_dir = os.path.dirname(current_dir)
|
28 |
raise Exception("Could not find project root")
|
29 |
|
|
|
31 |
# Implementation of load_tools_and_get_metadata function
|
32 |
print("Loading tools and getting metadata...")
|
33 |
self.toolbox_metadata = {}
|
34 |
+
octotools_dir = self.get_project_root()
|
35 |
+
tools_dir = os.path.join(octotools_dir, 'tools')
|
36 |
|
37 |
+
print(f"OctoTools directory: {octotools_dir}")
|
38 |
print(f"Tools directory: {tools_dir}")
|
39 |
|
40 |
+
# Add the OctoTools directory and its parent to the Python path
|
41 |
+
sys.path.insert(0, octotools_dir)
|
42 |
+
sys.path.insert(0, os.path.dirname(octotools_dir))
|
43 |
print(f"Updated Python path: {sys.path}")
|
44 |
|
45 |
if not os.path.exists(tools_dir):
|
|
|
52 |
file = 'tool.py'
|
53 |
module_path = os.path.join(root, file)
|
54 |
module_name = os.path.splitext(file)[0]
|
55 |
+
relative_path = os.path.relpath(module_path, octotools_dir)
|
56 |
import_path = '.'.join(os.path.split(relative_path)).replace(os.sep, '.')[:-3]
|
57 |
|
58 |
print(f"\nAttempting to import: {import_path}")
|
{opentools β octotools}/models/memory.py
RENAMED
File without changes
|
{opentools β octotools}/models/planner.py
RENAMED
@@ -4,9 +4,9 @@ from PIL import Image
|
|
4 |
from io import BytesIO
|
5 |
from typing import Dict, Any, List, Tuple
|
6 |
|
7 |
-
from
|
8 |
-
from
|
9 |
-
from
|
10 |
|
11 |
class Planner:
|
12 |
def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None, api_key: str = None):
|
|
|
4 |
from io import BytesIO
|
5 |
from typing import Dict, Any, List, Tuple
|
6 |
|
7 |
+
from octotools.engine.openai import ChatOpenAI
|
8 |
+
from octotools.models.memory import Memory
|
9 |
+
from octotools.models.formatters import QueryAnalysis, NextStep, MemoryVerification
|
10 |
|
11 |
class Planner:
|
12 |
def __init__(self, llm_engine_name: str, toolbox_metadata: dict = None, available_tools: List = None, api_key: str = None):
|
{opentools β octotools}/models/utils.py
RENAMED
File without changes
|
{opentools β octotools}/tools/README.md
RENAMED
@@ -8,7 +8,7 @@ To test the text detection tool, follow these steps:
|
|
8 |
Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
|
9 |
|
10 |
```sh
|
11 |
-
cd your_path/toolbox-agent/
|
12 |
```
|
13 |
|
14 |
2. **Run the Text Detection Tool:**
|
|
|
8 |
Change your current directory to where the tools are located. Replace `your_path` with the actual path to your project directory.
|
9 |
|
10 |
```sh
|
11 |
+
cd your_path/toolbox-agent/octotools
|
12 |
```
|
13 |
|
14 |
2. **Run the Text Detection Tool:**
|
{opentools β octotools}/tools/__init__.py
RENAMED
File without changes
|
octotools/tools/advanced_object_detector/__init__.py
ADDED
File without changes
|
octotools/tools/advanced_object_detector/examples/baseball.png
ADDED
![]() |
octotools/tools/advanced_object_detector/test.log
ADDED
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{'code': 0, 'msg': 'ok', 'data': {'task_uuid': '2d4337f5-403d-419b-9832-64b0f888f146'}}
|
2 |
+
task_uuid:2d4337f5-403d-419b-9832-64b0f888f146
|
3 |
+
[
|
4 |
+
{
|
5 |
+
"label": "baseball",
|
6 |
+
"confidence score": 0.73,
|
7 |
+
"box": [
|
8 |
+
614,
|
9 |
+
137,
|
10 |
+
671,
|
11 |
+
191
|
12 |
+
],
|
13 |
+
"saved_image_path": "detected_objects/baseball_baseball_1.png"
|
14 |
+
},
|
15 |
+
{
|
16 |
+
"label": "baseball",
|
17 |
+
"confidence score": 0.73,
|
18 |
+
"box": [
|
19 |
+
114,
|
20 |
+
377,
|
21 |
+
171,
|
22 |
+
431
|
23 |
+
],
|
24 |
+
"saved_image_path": "detected_objects/baseball_baseball_2.png"
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"label": "baseball",
|
28 |
+
"confidence score": 0.72,
|
29 |
+
"box": [
|
30 |
+
632,
|
31 |
+
67,
|
32 |
+
689,
|
33 |
+
126
|
34 |
+
],
|
35 |
+
"saved_image_path": "detected_objects/baseball_baseball_3.png"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"label": "baseball",
|
39 |
+
"confidence score": 0.72,
|
40 |
+
"box": [
|
41 |
+
132,
|
42 |
+
67,
|
43 |
+
189,
|
44 |
+
126
|
45 |
+
],
|
46 |
+
"saved_image_path": "detected_objects/baseball_baseball_4.png"
|
47 |
+
},
|
48 |
+
{
|
49 |
+
"label": "baseball",
|
50 |
+
"confidence score": 0.71,
|
51 |
+
"box": [
|
52 |
+
382,
|
53 |
+
67,
|
54 |
+
439,
|
55 |
+
126
|
56 |
+
],
|
57 |
+
"saved_image_path": "detected_objects/baseball_baseball_5.png"
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"label": "baseball",
|
61 |
+
"confidence score": 0.71,
|
62 |
+
"box": [
|
63 |
+
364,
|
64 |
+
137,
|
65 |
+
421,
|
66 |
+
191
|
67 |
+
],
|
68 |
+
"saved_image_path": "detected_objects/baseball_baseball_6.png"
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"label": "baseball",
|
72 |
+
"confidence score": 0.71,
|
73 |
+
"box": [
|
74 |
+
132,
|
75 |
+
307,
|
76 |
+
189,
|
77 |
+
366
|
78 |
+
],
|
79 |
+
"saved_image_path": "detected_objects/baseball_baseball_7.png"
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"label": "baseball",
|
83 |
+
"confidence score": 0.71,
|
84 |
+
"box": [
|
85 |
+
114,
|
86 |
+
136,
|
87 |
+
171,
|
88 |
+
191
|
89 |
+
],
|
90 |
+
"saved_image_path": "detected_objects/baseball_baseball_8.png"
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"label": "baseball",
|
94 |
+
"confidence score": 0.7,
|
95 |
+
"box": [
|
96 |
+
57,
|
97 |
+
49,
|
98 |
+
115,
|
99 |
+
107
|
100 |
+
],
|
101 |
+
"saved_image_path": "detected_objects/baseball_baseball_9.png"
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"label": "baseball",
|
105 |
+
"confidence score": 0.69,
|
106 |
+
"box": [
|
107 |
+
307,
|
108 |
+
49,
|
109 |
+
365,
|
110 |
+
106
|
111 |
+
],
|
112 |
+
"saved_image_path": "detected_objects/baseball_baseball_10.png"
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"label": "baseball",
|
116 |
+
"confidence score": 0.68,
|
117 |
+
"box": [
|
118 |
+
57,
|
119 |
+
289,
|
120 |
+
115,
|
121 |
+
346
|
122 |
+
],
|
123 |
+
"saved_image_path": "detected_objects/baseball_baseball_11.png"
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"label": "baseball",
|
127 |
+
"confidence score": 0.68,
|
128 |
+
"box": [
|
129 |
+
86,
|
130 |
+
335,
|
131 |
+
143,
|
132 |
+
393
|
133 |
+
],
|
134 |
+
"saved_image_path": "detected_objects/baseball_baseball_12.png"
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"label": "baseball",
|
138 |
+
"confidence score": 0.68,
|
139 |
+
"box": [
|
140 |
+
557,
|
141 |
+
49,
|
142 |
+
615,
|
143 |
+
107
|
144 |
+
],
|
145 |
+
"saved_image_path": "detected_objects/baseball_baseball_13.png"
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"label": "baseball",
|
149 |
+
"confidence score": 0.68,
|
150 |
+
"box": [
|
151 |
+
35,
|
152 |
+
352,
|
153 |
+
92,
|
154 |
+
410
|
155 |
+
],
|
156 |
+
"saved_image_path": "detected_objects/baseball_baseball_14.png"
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"label": "baseball",
|
160 |
+
"confidence score": 0.68,
|
161 |
+
"box": [
|
162 |
+
86,
|
163 |
+
95,
|
164 |
+
143,
|
165 |
+
153
|
166 |
+
],
|
167 |
+
"saved_image_path": "detected_objects/baseball_baseball_15.png"
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"label": "baseball",
|
171 |
+
"confidence score": 0.67,
|
172 |
+
"box": [
|
173 |
+
586,
|
174 |
+
95,
|
175 |
+
643,
|
176 |
+
153
|
177 |
+
],
|
178 |
+
"saved_image_path": "detected_objects/baseball_baseball_16.png"
|
179 |
+
},
|
180 |
+
{
|
181 |
+
"label": "baseball",
|
182 |
+
"confidence score": 0.66,
|
183 |
+
"box": [
|
184 |
+
285,
|
185 |
+
111,
|
186 |
+
342,
|
187 |
+
169
|
188 |
+
],
|
189 |
+
"saved_image_path": "detected_objects/baseball_baseball_17.png"
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"label": "baseball",
|
193 |
+
"confidence score": 0.66,
|
194 |
+
"box": [
|
195 |
+
35,
|
196 |
+
111,
|
197 |
+
91,
|
198 |
+
170
|
199 |
+
],
|
200 |
+
"saved_image_path": "detected_objects/baseball_baseball_18.png"
|
201 |
+
},
|
202 |
+
{
|
203 |
+
"label": "baseball",
|
204 |
+
"confidence score": 0.66,
|
205 |
+
"box": [
|
206 |
+
535,
|
207 |
+
111,
|
208 |
+
592,
|
209 |
+
169
|
210 |
+
],
|
211 |
+
"saved_image_path": "detected_objects/baseball_baseball_19.png"
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"label": "baseball",
|
215 |
+
"confidence score": 0.66,
|
216 |
+
"box": [
|
217 |
+
337,
|
218 |
+
95,
|
219 |
+
393,
|
220 |
+
153
|
221 |
+
],
|
222 |
+
"saved_image_path": "detected_objects/baseball_baseball_20.png"
|
223 |
+
},
|
224 |
+
{
|
225 |
+
"label": "basket",
|
226 |
+
"confidence score": 0.41,
|
227 |
+
"box": [
|
228 |
+
1,
|
229 |
+
2,
|
230 |
+
218,
|
231 |
+
216
|
232 |
+
],
|
233 |
+
"saved_image_path": "detected_objects/baseball_basket_1.png"
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"label": "basket",
|
237 |
+
"confidence score": 0.39,
|
238 |
+
"box": [
|
239 |
+
501,
|
240 |
+
2,
|
241 |
+
718,
|
242 |
+
216
|
243 |
+
],
|
244 |
+
"saved_image_path": "detected_objects/baseball_basket_2.png"
|
245 |
+
},
|
246 |
+
{
|
247 |
+
"label": "basket",
|
248 |
+
"confidence score": 0.38,
|
249 |
+
"box": [
|
250 |
+
2,
|
251 |
+
242,
|
252 |
+
218,
|
253 |
+
456
|
254 |
+
],
|
255 |
+
"saved_image_path": "detected_objects/baseball_basket_3.png"
|
256 |
+
},
|
257 |
+
{
|
258 |
+
"label": "basket",
|
259 |
+
"confidence score": 0.38,
|
260 |
+
"box": [
|
261 |
+
251,
|
262 |
+
2,
|
263 |
+
468,
|
264 |
+
216
|
265 |
+
],
|
266 |
+
"saved_image_path": "detected_objects/baseball_basket_4.png"
|
267 |
+
}
|
268 |
+
]
|
269 |
+
Detected Objects:
|
270 |
+
Detected baseball with confidence 0.73
|
271 |
+
Bounding box: [614, 137, 671, 191]
|
272 |
+
Saved image (with padding): detected_objects/baseball_baseball_1.png
|
273 |
+
|
274 |
+
Detected baseball with confidence 0.73
|
275 |
+
Bounding box: [114, 377, 171, 431]
|
276 |
+
Saved image (with padding): detected_objects/baseball_baseball_2.png
|
277 |
+
|
278 |
+
Detected baseball with confidence 0.72
|
279 |
+
Bounding box: [632, 67, 689, 126]
|
280 |
+
Saved image (with padding): detected_objects/baseball_baseball_3.png
|
281 |
+
|
282 |
+
Detected baseball with confidence 0.72
|
283 |
+
Bounding box: [132, 67, 189, 126]
|
284 |
+
Saved image (with padding): detected_objects/baseball_baseball_4.png
|
285 |
+
|
286 |
+
Detected baseball with confidence 0.71
|
287 |
+
Bounding box: [382, 67, 439, 126]
|
288 |
+
Saved image (with padding): detected_objects/baseball_baseball_5.png
|
289 |
+
|
290 |
+
Detected baseball with confidence 0.71
|
291 |
+
Bounding box: [364, 137, 421, 191]
|
292 |
+
Saved image (with padding): detected_objects/baseball_baseball_6.png
|
293 |
+
|
294 |
+
Detected baseball with confidence 0.71
|
295 |
+
Bounding box: [132, 307, 189, 366]
|
296 |
+
Saved image (with padding): detected_objects/baseball_baseball_7.png
|
297 |
+
|
298 |
+
Detected baseball with confidence 0.71
|
299 |
+
Bounding box: [114, 136, 171, 191]
|
300 |
+
Saved image (with padding): detected_objects/baseball_baseball_8.png
|
301 |
+
|
302 |
+
Detected baseball with confidence 0.7
|
303 |
+
Bounding box: [57, 49, 115, 107]
|
304 |
+
Saved image (with padding): detected_objects/baseball_baseball_9.png
|
305 |
+
|
306 |
+
Detected baseball with confidence 0.69
|
307 |
+
Bounding box: [307, 49, 365, 106]
|
308 |
+
Saved image (with padding): detected_objects/baseball_baseball_10.png
|
309 |
+
|
310 |
+
Detected baseball with confidence 0.68
|
311 |
+
Bounding box: [57, 289, 115, 346]
|
312 |
+
Saved image (with padding): detected_objects/baseball_baseball_11.png
|
313 |
+
|
314 |
+
Detected baseball with confidence 0.68
|
315 |
+
Bounding box: [86, 335, 143, 393]
|
316 |
+
Saved image (with padding): detected_objects/baseball_baseball_12.png
|
317 |
+
|
318 |
+
Detected baseball with confidence 0.68
|
319 |
+
Bounding box: [557, 49, 615, 107]
|
320 |
+
Saved image (with padding): detected_objects/baseball_baseball_13.png
|
321 |
+
|
322 |
+
Detected baseball with confidence 0.68
|
323 |
+
Bounding box: [35, 352, 92, 410]
|
324 |
+
Saved image (with padding): detected_objects/baseball_baseball_14.png
|
325 |
+
|
326 |
+
Detected baseball with confidence 0.68
|
327 |
+
Bounding box: [86, 95, 143, 153]
|
328 |
+
Saved image (with padding): detected_objects/baseball_baseball_15.png
|
329 |
+
|
330 |
+
Detected baseball with confidence 0.67
|
331 |
+
Bounding box: [586, 95, 643, 153]
|
332 |
+
Saved image (with padding): detected_objects/baseball_baseball_16.png
|
333 |
+
|
334 |
+
Detected baseball with confidence 0.66
|
335 |
+
Bounding box: [285, 111, 342, 169]
|
336 |
+
Saved image (with padding): detected_objects/baseball_baseball_17.png
|
337 |
+
|
338 |
+
Detected baseball with confidence 0.66
|
339 |
+
Bounding box: [35, 111, 91, 170]
|
340 |
+
Saved image (with padding): detected_objects/baseball_baseball_18.png
|
341 |
+
|
342 |
+
Detected baseball with confidence 0.66
|
343 |
+
Bounding box: [535, 111, 592, 169]
|
344 |
+
Saved image (with padding): detected_objects/baseball_baseball_19.png
|
345 |
+
|
346 |
+
Detected baseball with confidence 0.66
|
347 |
+
Bounding box: [337, 95, 393, 153]
|
348 |
+
Saved image (with padding): detected_objects/baseball_baseball_20.png
|
349 |
+
|
350 |
+
Detected basket with confidence 0.41
|
351 |
+
Bounding box: [1, 2, 218, 216]
|
352 |
+
Saved image (with padding): detected_objects/baseball_basket_1.png
|
353 |
+
|
354 |
+
Detected basket with confidence 0.39
|
355 |
+
Bounding box: [501, 2, 718, 216]
|
356 |
+
Saved image (with padding): detected_objects/baseball_basket_2.png
|
357 |
+
|
358 |
+
Detected basket with confidence 0.38
|
359 |
+
Bounding box: [2, 242, 218, 456]
|
360 |
+
Saved image (with padding): detected_objects/baseball_basket_3.png
|
361 |
+
|
362 |
+
Detected basket with confidence 0.38
|
363 |
+
Bounding box: [251, 2, 468, 216]
|
364 |
+
Saved image (with padding): detected_objects/baseball_basket_4.png
|
365 |
+
|
366 |
+
Done!
|
octotools/tools/advanced_object_detector/tool.py
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grounding DINO Object Detection Tool
|
2 |
+
# https://huggingface.co/IDEA-Research/grounding-dino
|
3 |
+
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
|
7 |
+
from octotools.tools.base import BaseTool
|
8 |
+
from PIL import Image, ImageOps
|
9 |
+
|
10 |
+
import os
|
11 |
+
# Suppress stderr by redirecting it to /dev/null
|
12 |
+
import sys
|
13 |
+
import re
|
14 |
+
import base64
|
15 |
+
import requests
|
16 |
+
sys.stderr = open(os.devnull, 'w')
|
17 |
+
|
18 |
+
|
19 |
+
class Advanced_Object_Detector_Tool(BaseTool):
|
20 |
+
def __init__(self):
|
21 |
+
super().__init__(
|
22 |
+
tool_name="Advanced_Object_Detector_Tool",
|
23 |
+
tool_description="A tool that detects objects in an image using the Grounding DINO-X model and saves individual object images with empty padding.",
|
24 |
+
tool_version="1.0.0",
|
25 |
+
input_types={
|
26 |
+
"image": "str - The path to the image file.",
|
27 |
+
"labels": "list - A list of object labels to detect.",
|
28 |
+
"threshold": "float - The confidence threshold for detection (default: 0.35).",
|
29 |
+
"padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
|
30 |
+
},
|
31 |
+
output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
|
32 |
+
demo_commands=[
|
33 |
+
{
|
34 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
|
35 |
+
"description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
|
39 |
+
"description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
|
40 |
+
}
|
41 |
+
],
|
42 |
+
user_metadata={
|
43 |
+
"limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
|
44 |
+
}
|
45 |
+
)
|
46 |
+
self.DINO_KEY = os.environ.get("DINO_KEY")
|
47 |
+
|
48 |
+
def preprocess_caption(self, caption):
|
49 |
+
result = caption.lower().strip()
|
50 |
+
if result.endswith("."):
|
51 |
+
return result
|
52 |
+
return result + "."
|
53 |
+
|
54 |
+
def build_tool(self, threshold=0.35):
|
55 |
+
|
56 |
+
params_dict = {
|
57 |
+
'headers': {
|
58 |
+
"Content-Type": "application/json",
|
59 |
+
"Token" : self.DINO_KEY
|
60 |
+
},
|
61 |
+
'body':{
|
62 |
+
"image" : None,
|
63 |
+
"prompts": [
|
64 |
+
{"type": "text", "text": None},
|
65 |
+
],
|
66 |
+
"bbox_threshold": threshold
|
67 |
+
}
|
68 |
+
|
69 |
+
}
|
70 |
+
return params_dict
|
71 |
+
|
72 |
+
|
73 |
+
def save_detected_object(self, image, box, image_name, label, index, padding):
|
74 |
+
object_image = image.crop(box)
|
75 |
+
padded_image = ImageOps.expand(object_image, border=padding, fill='white')
|
76 |
+
|
77 |
+
filename = f"{image_name}_{label}_{index}.png"
|
78 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
79 |
+
save_path = os.path.join(self.output_dir, filename)
|
80 |
+
|
81 |
+
padded_image.save(save_path)
|
82 |
+
return save_path
|
83 |
+
|
84 |
+
def execute(self, image, labels, threshold=0.35, padding=20, max_retries=10, retry_delay=5):
|
85 |
+
retry_count = 0
|
86 |
+
params = self.build_tool(threshold)
|
87 |
+
|
88 |
+
def process_image(input_str):
|
89 |
+
|
90 |
+
def image_to_base64(image_path):
|
91 |
+
with open(image_path, "rb") as image_file:
|
92 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
93 |
+
# Define common image file extensions
|
94 |
+
image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.tiff', '.webp'}
|
95 |
+
|
96 |
+
# Check if it is a URL
|
97 |
+
url_pattern = re.compile(r'^(http|https|ftp)://')
|
98 |
+
if url_pattern.match(input_str):
|
99 |
+
if input_str.lower().endswith(tuple(image_extensions)):
|
100 |
+
return input_str
|
101 |
+
return input_str
|
102 |
+
|
103 |
+
# Check if it is a file path
|
104 |
+
_, ext = os.path.splitext(input_str)
|
105 |
+
if ext.lower() in image_extensions:
|
106 |
+
image_base64 = image_to_base64(input_str)
|
107 |
+
return f'data:image/png;base64,{image_base64}'
|
108 |
+
return None
|
109 |
+
|
110 |
+
if len(labels) < 1:
|
111 |
+
preprocessed_prompt = '<prompt_free>'
|
112 |
+
else:
|
113 |
+
preprocessed_prompt = ''
|
114 |
+
for label in labels:
|
115 |
+
preprocessed_prompt += self.preprocess_caption(label)
|
116 |
+
|
117 |
+
|
118 |
+
body = params['body']
|
119 |
+
body['image'] = process_image(image)
|
120 |
+
body['prompts'] = [{"type": "text", "text": preprocessed_prompt}]
|
121 |
+
|
122 |
+
# send request
|
123 |
+
resp = requests.post(
|
124 |
+
'https://api.deepdataspace.com/tasks/dinox',
|
125 |
+
json=body,
|
126 |
+
headers=params['headers']
|
127 |
+
)
|
128 |
+
|
129 |
+
if resp.status_code == 200:
|
130 |
+
json_resp = resp.json()
|
131 |
+
print(json_resp)
|
132 |
+
|
133 |
+
# get task_uuid
|
134 |
+
task_uuid = json_resp["data"]["task_uuid"]
|
135 |
+
print(f'task_uuid:{task_uuid}')
|
136 |
+
|
137 |
+
# poll get task result
|
138 |
+
while retry_count < max_retries:
|
139 |
+
resp = requests.get(f'https://api.deepdataspace.com/task_statuses/{task_uuid}', headers=params['headers'])
|
140 |
+
|
141 |
+
|
142 |
+
if resp.status_code != 200:
|
143 |
+
break
|
144 |
+
json_resp = resp.json()
|
145 |
+
|
146 |
+
if json_resp["data"]["status"] not in ["waiting", "running"]:
|
147 |
+
break
|
148 |
+
time.sleep(1)#retry_delay)
|
149 |
+
retry_count += 1
|
150 |
+
|
151 |
+
if json_resp["data"]["status"] == "failed":
|
152 |
+
print(f'failed resp: {json_resp}')
|
153 |
+
elif json_resp["data"]["status"] == "success":
|
154 |
+
# print(f'success resp: {json_resp}')
|
155 |
+
formatted_results = []
|
156 |
+
original_image = Image.open(image)
|
157 |
+
image_name = os.path.splitext(os.path.basename(image))[0]
|
158 |
+
|
159 |
+
object_counts = {}
|
160 |
+
|
161 |
+
for result in json_resp['data']['result']['objects']:
|
162 |
+
box = tuple(result["bbox"])
|
163 |
+
try:
|
164 |
+
box = [int(x) for x in box]
|
165 |
+
except:
|
166 |
+
continue
|
167 |
+
label = result["category"]
|
168 |
+
score = round(result["score"], 2)
|
169 |
+
if label.endswith("."):
|
170 |
+
label = label[:-1]
|
171 |
+
|
172 |
+
object_counts[label] = object_counts.get(label, 0) + 1
|
173 |
+
index = object_counts[label]
|
174 |
+
|
175 |
+
save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
|
176 |
+
|
177 |
+
formatted_results.append({
|
178 |
+
"label": label,
|
179 |
+
"confidence score": score,
|
180 |
+
"box": box,
|
181 |
+
"saved_image_path": save_path
|
182 |
+
})
|
183 |
+
|
184 |
+
return formatted_results
|
185 |
+
else:
|
186 |
+
print(f'get task resp: {resp.status_code} - {resp.text}')
|
187 |
+
else:
|
188 |
+
print(f'Error: {resp.status_code} - {resp.text}')
|
189 |
+
|
190 |
+
print(f"Failed to detect objects after {max_retries} attempts.")
|
191 |
+
return []
|
192 |
+
|
193 |
+
def get_metadata(self):
|
194 |
+
metadata = super().get_metadata()
|
195 |
+
return metadata
|
196 |
+
|
197 |
+
if __name__ == "__main__":
|
198 |
+
# Test command:
|
199 |
+
"""
|
200 |
+
Run the following commands in the terminal to test the script:
|
201 |
+
|
202 |
+
cd octotools/tools/advanced_object_detector
|
203 |
+
python tool.py
|
204 |
+
"""
|
205 |
+
|
206 |
+
# Get the directory of the current script
|
207 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
208 |
+
|
209 |
+
# Example usage of the Object_Detector_Tool
|
210 |
+
tool = Advanced_Object_Detector_Tool()
|
211 |
+
tool.set_custom_output_dir("detected_objects")
|
212 |
+
|
213 |
+
# Get tool metadata
|
214 |
+
metadata = tool.get_metadata()
|
215 |
+
# print(metadata)
|
216 |
+
|
217 |
+
# Construct the full path to the image using the script's directory
|
218 |
+
relative_image_path = "examples/baseball.png"
|
219 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
220 |
+
|
221 |
+
import json
|
222 |
+
|
223 |
+
# Execute the tool
|
224 |
+
try:
|
225 |
+
execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
|
226 |
+
print(json.dumps(execution, indent=4))
|
227 |
+
print("Detected Objects:")
|
228 |
+
for obj in execution:
|
229 |
+
print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
|
230 |
+
print(f"Bounding box: {obj['box']}")
|
231 |
+
print(f"Saved image (with padding): {obj['saved_image_path']}")
|
232 |
+
print()
|
233 |
+
except ValueError as e:
|
234 |
+
print(f"Execution failed: {e}")
|
235 |
+
|
236 |
+
print("Done!")
|
octotools/tools/arxiv_paper_searcher/__init__.py
ADDED
File without changes
|
octotools/tools/arxiv_paper_searcher/test.log
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ArXiv Search Tool Test
|
2 |
+
Tool Metadata:
|
3 |
+
{'tool_name': 'ArXiv_Paper_Searcher_Tool', 'tool_description': 'A tool that searches arXiv for papers based on a given query.', 'tool_version': '1.0.0', 'input_types': {'query': 'str - The search query for arXiv papers.', 'size': 'int - The number of results per page (25, 50, 100, or 200). If None, use 25.', 'max_results': 'int - The maximum number of papers to return (default: 25). Should be less than or equal to 100.'}, 'output_type': 'list - A list of dictionaries containing paper information.', 'demo_commands': [{'command': 'execution = tool.execute(query="tool agents with large language models")', 'description': 'Search for papers about tool agents with large language models.'}, {'command': 'execution = tool.execute(query="quantum computing", size=100, max_results=50)', 'description': 'Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers.'}, {'command': 'execution = tool.execute(query="machine learning", max_results=75)', 'description': 'Search for machine learning papers, returning a maximum of 75 papers.'}], 'require_llm_engine': False, 'user_metadata': {'valid_sizes': [25, 50, 100, 200], 'base_url': 'https://arxiv.org/search/'}}
|
4 |
+
|
5 |
+
==>> Execution:
|
6 |
+
[
|
7 |
+
{
|
8 |
+
"title": "Position: Multimodal Large Language Models Can Significantly Advance Scientific Reasoning",
|
9 |
+
"authors": "Yibo Yan, Shen Wang, Jiahao Huo, Jingheng Ye, Zhendong Chu, Xuming Hu, Philip S. Yu, Carla Gomes, Bart Selman, Qingsong Wen",
|
10 |
+
"abstract": "Scientific reasoning, the process through which humans apply logic, evidence, and critical thinking to explore and interpret scientific phenomena, is essential in advancing knowledge reasoning across diverse fields. However, despite significant progress, current scientific reasoning models still struggle with generalization across domains and often fall short of multimodal perception. Multimodal Large Language Models (MLLMs), which integrate text, images, and other modalities, present an exciting opportunity to overcome these limitations and enhance scientific reasoning. Therefore, this position paper argues that MLLMs can significantly advance scientific reasoning across disciplines such as mathematics, physics, chemistry, and biology. First, we propose a four-stage research roadmap of scientific reasoning capabilities, and highlight the current state of MLLM applications in scientific reasoning, noting their ability to integrate and reason over diverse data types. Second, we summarize the key challenges that remain obstacles to achieving MLLM's full potential. To address these challenges, we propose actionable insights and suggestions for the future. Overall, our work offers a novel perspective on MLLM integration with scientific reasoning, providing the LLM community with a valuable vision for achieving Artificial General Intelligence (AGI).",
|
11 |
+
"link": "https://arxiv.org/abs/2502.02871"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"title": "Adaptive Graph of Thoughts: Test-Time Adaptive Reasoning Unifying Chain, Tree, and Graph Structures",
|
15 |
+
"authors": "Tushar Pandey, Ara Ghukasyan, Oktay Goktas, Santosh Kumar Radha",
|
16 |
+
"abstract": "Large Language Models (LLMs) have demonstrated impressive reasoning capabilities, yet their performance is highly dependent on the prompting strategy and model scale. While reinforcement learning and fine-tuning have been deployed to boost reasoning, these approaches incur substantial computational and data overhead. In this work, we introduce Adaptive Graph of Thoughts (AGoT), a dynamic, graph-based inference framework that enhances LLM reasoning solely at test time. Rather than relying on fixed-step methods like Chain of Thought (CoT) or Tree of Thoughts (ToT), AGoT recursively decomposes complex queries into structured subproblems, forming an dynamic directed acyclic graph (DAG) of interdependent reasoning steps. By selectively expanding only those subproblems that require further analysis, AGoT unifies the strengths of chain, tree, and graph paradigms into a cohesive framework that allocates computation where it is most needed. We validate our approach on diverse benchmarks spanning multi-hop retrieval, scientific reasoning, and mathematical problem-solving, achieving up to 46.2% improvement on scientific reasoning tasks (GPQA) - comparable to gains achieved through computationally intensive reinforcement learning approaches and outperforming state-of-the-art iterative approaches. These results suggest that dynamic decomposition and structured recursion offer a scalable, cost-effective alternative to post-training modifications, paving the way for more robust, general-purpose reasoning in LLMs.",
|
17 |
+
"link": "https://arxiv.org/abs/2502.05078"
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"title": "VersaPRM: Multi-Domain Process Reward Model via Synthetic Reasoning Data",
|
21 |
+
"authors": "Thomas Zeng, Shuibai Zhang, Shutong Wu, Christian Classen, Daewon Chae, Ethan Ewer, Minjae Lee, Heeju Kim, Wonjun Kang, Jackson Kunde, Ying Fan, Jungtaek Kim, Hyung Il Koo, Kannan Ramchandran, Dimitris Papailiopoulos, Kangwook Lee",
|
22 |
+
"abstract": "Process Reward Models (PRMs) have proven effective at enhancing mathematical reasoning for Large Language Models (LLMs) by leveraging increased inference-time computation. However, they are predominantly trained on mathematical data and their generalizability to non-mathematical domains has not been rigorously studied. In response, this work first shows that current PRMs have poor performance in other domains. To address this limitation, we introduce VersaPRM, a multi-domain PRM trained on synthetic reasoning data generated using our novel data generation and annotation method. VersaPRM achieves consistent performance gains across diverse domains. For instance, in the MMLU-Pro category of Law, VersaPRM via weighted majority voting, achieves a 7.9% performance gain over the majority voting baseline -- surpassing Qwen2.5-Math-PRM's gain of 1.3%. We further contribute to the community by open-sourcing all data, code and models for VersaPRM.",
|
23 |
+
"link": "https://arxiv.org/abs/2502.06737"
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"title": "Large Language Models for Multi-Robot Systems: A Survey",
|
27 |
+
"authors": "Peihan Li, Zijian An, Shams Abrar, Lifeng Zhou",
|
28 |
+
"abstract": "The rapid advancement of Large Language Models (LLMs) has opened new possibilities in Multi-Robot Systems (MRS), enabling enhanced communication, task planning, and human-robot interaction. Unlike traditional single-robot and multi-agent systems, MRS poses unique challenges, including coordination, scalability, and real-world adaptability. This survey provides the first comprehensive exploration of LLM integration into MRS. It systematically categorizes their applications across high-level task allocation, mid-level motion planning, low-level action generation, and human intervention. We highlight key applications in diverse domains, such as household robotics, construction, formation control, target tracking, and robot games, showcasing the versatility and transformative potential of LLMs in MRS. Furthermore, we examine the challenges that limit adapting LLMs in MRS, including mathematical reasoning limitations, hallucination, latency issues, and the need for robust benchmarking systems. Finally, we outline opportunities for future research, emphasizing advancements in fine-tuning, reasoning techniques, and task-specific models. This survey aims to guide researchers in the intelligence and real-world deployment of MRS powered by LLMs. Based on the fast-evolving nature of research in the field, we keep updating the papers in the open-source Github repository.",
|
29 |
+
"link": "https://arxiv.org/abs/2502.03814"
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"title": "MergeME: Model Merging Techniques for Homogeneous and Heterogeneous MoEs",
|
33 |
+
"authors": "Yuhang Zhou, Giannis Karamanolakis, Victor Soto, Anna Rumshisky, Mayank Kulkarni, Furong Huang, Wei Ai, Jianhua Lu",
|
34 |
+
"abstract": "The recent success of specialized Large Language Models (LLMs) in domains such as mathematical reasoning and coding has led to growing interest in methods for merging these expert LLMs into a unified Mixture-of-Experts (MoE) model, with the goal of enhancing performance in each domain while retaining effectiveness on general tasks. However, the effective merging of expert models remains an open challenge, especially for models with highly divergent weight parameters or different architectures. State-of-the-art MoE merging methods only work with homogeneous model architectures and rely on simple unweighted averaging to merge expert layers, which does not address parameter interference and requires extensive fine-tuning of the merged MoE to restore performance. To address these limitations, this paper introduces new MoE merging techniques, including strategies to mitigate parameter interference, routing heuristics to reduce the need for MoE fine-tuning, and a novel method for merging experts with different architectures. Extensive experiments across multiple domains demonstrate the effectiveness of our proposed methods, reducing fine-tuning costs, improving performance over state-of-the-art methods, and expanding the applicability of MoE merging.",
|
35 |
+
"link": "https://arxiv.org/abs/2502.00997"
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"title": "Satori: Reinforcement Learning with Chain-of-Action-Thought Enhances LLM Reasoning via Autoregressive Search",
|
39 |
+
"authors": "Maohao Shen, Guangtao Zeng, Zhenting Qi, Zhang-Wei Hong, Zhenfang Chen, Wei Lu, Gregory Wornell, Subhro Das, David Cox, Chuang Gan",
|
40 |
+
"abstract": "Large language models (LLMs) have demonstrated remarkable reasoning capabilities across diverse domains. Recent studies have shown that increasing test-time computation enhances LLMs' reasoning capabilities. This typically involves extensive sampling at inference time guided by an external LLM verifier, resulting in a two-player system. Despite external guidance, the effectiveness of this system demonstrates the potential of a single LLM to tackle complex tasks. Thus, we pose a new research problem: Can we internalize the searching capabilities to fundamentally enhance the reasoning abilities of a single LLM? This work explores an orthogonal direction focusing on post-training LLMs for autoregressive searching (i.e., an extended reasoning process with self-reflection and self-exploration of new strategies). To achieve this, we propose the Chain-of-Action-Thought (COAT) reasoning and a two-stage training paradigm: 1) a small-scale format tuning stage to internalize the COAT reasoning format and 2) a large-scale self-improvement stage leveraging reinforcement learning. Our approach results in Satori, a 7B LLM trained on open-source models and data. Extensive empirical evaluations demonstrate that Satori achieves state-of-the-art performance on mathematical reasoning benchmarks while exhibits strong generalization to out-of-domain tasks. Code, data, and models will be fully open-sourced.",
|
41 |
+
"link": "https://arxiv.org/abs/2502.02508"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"title": "Reasoning-as-Logic-Units: Scaling Test-Time Reasoning in Large Language Models Through Logic Unit Alignment",
|
45 |
+
"authors": "Cheryl Li, Tianyuan Xu, Yiwen Guo",
|
46 |
+
"abstract": "Chain-of-Thought (CoT) prompting has shown promise in enhancing the reasoning capabilities of large language models (LLMs) by generating natural language (NL) rationales that lead to the final answer. However, it struggles with numerical computation, which has somehow led to the development of program-aided techniques. Despite their potential, a persistent challenge remains: inconsistencies between LLM-reported reasoning steps and the logic in generated programs, which we term ``reasoning hallucinations.\" This stems from the inherent ambiguities of NL and the statistical nature of LLMs, which often lack rigorous logical coherence. To address this challenge, we propose a novel test-time scaling framework, Reasoning-as-Logic-Units (RaLU), which constructs a more reliable reasoning path by aligning logical units between the generated program and their corresponding NL descriptions. By decomposing the initially generated program into discrete units using static analysis, RaLU engages in an iterative dialogue with the LLM to judge, refine, and explain each unit. A rewind-and-correct mechanism ensures alignment between code statements and task requirements in each unit, ultimately forming a cohesive reasoning path under the program's logic, from which the model reaches a final solution. Our experiments demonstrate that RaLU significantly outperforms existing baselines in mathematical reasoning (GSM8K, MATH) and algorithmic reasoning (HumanEval+, MBPP+), underscoring its potential to advance LLM reasoning and programming by offering enhanced accuracy and interpretability.",
|
47 |
+
"link": "https://arxiv.org/abs/2502.07803"
|
48 |
+
},
|
49 |
+
{
|
50 |
+
"title": "Premise-Augmented Reasoning Chains Improve Error Identification in Math reasoning with LLMs",
|
51 |
+
"authors": "Sagnik Mukherjee, Abhinav Chinta, Takyoung Kim, Tarun Anoop Sharma, Dilek Hakkani-T\u00fcr",
|
52 |
+
"abstract": "Chain-of-Thought (CoT) prompting enhances mathematical reasoning in large language models (LLMs) by enabling detailed step-by-step solutions. However, due to the verbosity of LLMs, the resulting reasoning chains can be long, making it harder to verify the reasoning steps and trace issues resulting from dependencies between the steps that may be farther away in the sequence of steps. Importantly, mathematical reasoning allows each step to be derived from a small set of premises, which are a subset of the preceding steps in the reasoning chain. In this paper, we present a framework that identifies the premises for each step, to improve the evaluation of reasoning. We restructure conventional linear reasoning chains into Premise Augmented Reasoning Chains (PARC) by introducing premise links, resulting in a directed acyclic graph where the nodes are the steps and the edges are the premise links. Through experiments with a PARC-based dataset that we built, namely PERL (Premises and ERrors identification in LLMs), we demonstrate that LLMs can reliably identify premises within complex reasoning chains. In particular, even open-source LLMs achieve 90% recall in premise identification. We also show that PARC helps to identify errors in reasoning chains more reliably. The accuracy of error identification improves by 6% to 16% absolute when step-by-step verification is carried out in PARC under the premises. Our findings highlight the utility of premise-centric representations in addressing complex problem-solving tasks and open new avenues for improving the reliability of LLM-based reasoning evaluations.",
|
53 |
+
"link": "https://arxiv.org/abs/2502.02362"
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"title": "Advanced Weakly-Supervised Formula Exploration for Neuro-Symbolic Mathematical Reasoning",
|
57 |
+
"authors": "Yuxuan Wu, Hideki Nakayama",
|
58 |
+
"abstract": "In recent years, neuro-symbolic methods have become a popular and powerful approach that augments artificial intelligence systems with the capability to perform abstract, logical, and quantitative deductions with enhanced precision and controllability. Recent studies successfully performed symbolic reasoning by leveraging various machine learning models to explicitly or implicitly predict intermediate labels that provide symbolic instructions. However, these intermediate labels are not always prepared for every task as a part of training data, and pre-trained models, represented by Large Language Models (LLMs), also do not consistently generate valid symbolic instructions with their intrinsic knowledge. On the other hand, existing work developed alternative learning techniques that allow the learning system to autonomously uncover optimal symbolic instructions. Nevertheless, their performance also exhibits limitations when faced with relatively huge search spaces or more challenging reasoning problems. In view of this, in this work, we put forward an advanced practice for neuro-symbolic reasoning systems to explore the intermediate labels with weak supervision from problem inputs and final outputs. Our experiments on the Mathematics dataset illustrated the effectiveness of our proposals from multiple aspects.",
|
59 |
+
"link": "https://arxiv.org/abs/2502.00629"
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"title": "ARIES: Stimulating Self-Refinement of Large Language Models by Iterative Preference Optimization",
|
63 |
+
"authors": "Yongcheng Zeng, Xinyu Cui, Xuanfa Jin, Guoqing Liu, Zexu Sun, Quan He, Dong Li, Ning Yang, Jianye Hao, Haifeng Zhang, Jun Wang",
|
64 |
+
"abstract": "A truly intelligent Large Language Model (LLM) should be capable of correcting errors in its responses through external interactions. However, even the most advanced models often face challenges in improving their outputs. In this paper, we explore how to cultivate LLMs with the self-refinement capability through iterative preference training, and how this ability can be leveraged to improve model performance during inference. To this end, we introduce a novel post-training and inference framework, called ARIES: Adaptive Refinement and Iterative Enhancement Structure. This method iteratively performs preference training and self-refinement-based data collection. During training, ARIES strengthen the model's direct question-answering capability while simultaneously unlocking its self-refinement potential. During inference, ARIES harnesses this self-refinement capability to generate a series of progressively refined responses, which are then filtered using either the Reward Model Scoring or a simple yet effective Rule-Based Selection mechanism, specifically tailored to our approach, to construct a dataset for the next round of preference training. Experimental results demonstrate the remarkable performance of ARIES. When applied to the Llama-3.1-8B model and under the self-refinement setting, ARIES surpasses powerful models such as GPT-4o, achieving 62.3% length-controlled (LC) and a 63.3% raw win rates on AlpacaEval 2, outperforming Iterative DPO by 27.8% and 35.5% respectively, as well as a 50.3% win rate on Arena-Hard, surpassing Iterative DPO by 26.6%. Furthermore, ARIES consistently enhances performance on mathematical reasoning tasks like GSM8K and MATH.",
|
65 |
+
"link": "https://arxiv.org/abs/2502.05605"
|
66 |
+
}
|
67 |
+
]
|
68 |
+
|
69 |
+
==>> Search Results:
|
70 |
+
1. Position: Multimodal Large Language Models Can Significantly Advance Scientific Reasoning
|
71 |
+
Authors: Yibo Yan, Shen Wang, Jiahao Huo, Jingheng Ye, Zhendong Chu, Xuming Hu, Philip S. Yu, Carla Gomes, Bart Selman, Qingsong Wen
|
72 |
+
Abstract: Scientific reasoning, the process through which humans apply logic, evidence, and critical thinking to explore and interpret scientific phenomena, is essential in advancing knowledge reasoning across diverse fields. However, despite significant progress, current scientific reasoning models still struggle with generalization across domains and often fall short of multimodal perception. Multimodal Large Language Models (MLLMs), which integrate text, images, and other modalities, present an exciting opportunity to overcome these limitations and enhance scientific reasoning. Therefore, this position paper argues that MLLMs can significantly advance scientific reasoning across disciplines such as mathematics, physics, chemistry, and biology. First, we propose a four-stage research roadmap of scientific reasoning capabilities, and highlight the current state of MLLM applications in scientific reasoning, noting their ability to integrate and reason over diverse data types. Second, we summarize the key challenges that remain obstacles to achieving MLLM's full potential. To address these challenges, we propose actionable insights and suggestions for the future. Overall, our work offers a novel perspective on MLLM integration with scientific reasoning, providing the LLM community with a valuable vision for achieving Artificial General Intelligence (AGI).
|
73 |
+
Link: https://arxiv.org/abs/2502.02871
|
74 |
+
|
75 |
+
2. Adaptive Graph of Thoughts: Test-Time Adaptive Reasoning Unifying Chain, Tree, and Graph Structures
|
76 |
+
Authors: Tushar Pandey, Ara Ghukasyan, Oktay Goktas, Santosh Kumar Radha
|
77 |
+
Abstract: Large Language Models (LLMs) have demonstrated impressive reasoning capabilities, yet their performance is highly dependent on the prompting strategy and model scale. While reinforcement learning and fine-tuning have been deployed to boost reasoning, these approaches incur substantial computational and data overhead. In this work, we introduce Adaptive Graph of Thoughts (AGoT), a dynamic, graph-based inference framework that enhances LLM reasoning solely at test time. Rather than relying on fixed-step methods like Chain of Thought (CoT) or Tree of Thoughts (ToT), AGoT recursively decomposes complex queries into structured subproblems, forming an dynamic directed acyclic graph (DAG) of interdependent reasoning steps. By selectively expanding only those subproblems that require further analysis, AGoT unifies the strengths of chain, tree, and graph paradigms into a cohesive framework that allocates computation where it is most needed. We validate our approach on diverse benchmarks spanning multi-hop retrieval, scientific reasoning, and mathematical problem-solving, achieving up to 46.2% improvement on scientific reasoning tasks (GPQA) - comparable to gains achieved through computationally intensive reinforcement learning approaches and outperforming state-of-the-art iterative approaches. These results suggest that dynamic decomposition and structured recursion offer a scalable, cost-effective alternative to post-training modifications, paving the way for more robust, general-purpose reasoning in LLMs.
|
78 |
+
Link: https://arxiv.org/abs/2502.05078
|
79 |
+
|
80 |
+
3. VersaPRM: Multi-Domain Process Reward Model via Synthetic Reasoning Data
|
81 |
+
Authors: Thomas Zeng, Shuibai Zhang, Shutong Wu, Christian Classen, Daewon Chae, Ethan Ewer, Minjae Lee, Heeju Kim, Wonjun Kang, Jackson Kunde, Ying Fan, Jungtaek Kim, Hyung Il Koo, Kannan Ramchandran, Dimitris Papailiopoulos, Kangwook Lee
|
82 |
+
Abstract: Process Reward Models (PRMs) have proven effective at enhancing mathematical reasoning for Large Language Models (LLMs) by leveraging increased inference-time computation. However, they are predominantly trained on mathematical data and their generalizability to non-mathematical domains has not been rigorously studied. In response, this work first shows that current PRMs have poor performance in other domains. To address this limitation, we introduce VersaPRM, a multi-domain PRM trained on synthetic reasoning data generated using our novel data generation and annotation method. VersaPRM achieves consistent performance gains across diverse domains. For instance, in the MMLU-Pro category of Law, VersaPRM via weighted majority voting, achieves a 7.9% performance gain over the majority voting baseline -- surpassing Qwen2.5-Math-PRM's gain of 1.3%. We further contribute to the community by open-sourcing all data, code and models for VersaPRM.
|
83 |
+
Link: https://arxiv.org/abs/2502.06737
|
84 |
+
|
85 |
+
4. Large Language Models for Multi-Robot Systems: A Survey
|
86 |
+
Authors: Peihan Li, Zijian An, Shams Abrar, Lifeng Zhou
|
87 |
+
Abstract: The rapid advancement of Large Language Models (LLMs) has opened new possibilities in Multi-Robot Systems (MRS), enabling enhanced communication, task planning, and human-robot interaction. Unlike traditional single-robot and multi-agent systems, MRS poses unique challenges, including coordination, scalability, and real-world adaptability. This survey provides the first comprehensive exploration of LLM integration into MRS. It systematically categorizes their applications across high-level task allocation, mid-level motion planning, low-level action generation, and human intervention. We highlight key applications in diverse domains, such as household robotics, construction, formation control, target tracking, and robot games, showcasing the versatility and transformative potential of LLMs in MRS. Furthermore, we examine the challenges that limit adapting LLMs in MRS, including mathematical reasoning limitations, hallucination, latency issues, and the need for robust benchmarking systems. Finally, we outline opportunities for future research, emphasizing advancements in fine-tuning, reasoning techniques, and task-specific models. This survey aims to guide researchers in the intelligence and real-world deployment of MRS powered by LLMs. Based on the fast-evolving nature of research in the field, we keep updating the papers in the open-source Github repository.
|
88 |
+
Link: https://arxiv.org/abs/2502.03814
|
89 |
+
|
90 |
+
5. MergeME: Model Merging Techniques for Homogeneous and Heterogeneous MoEs
|
91 |
+
Authors: Yuhang Zhou, Giannis Karamanolakis, Victor Soto, Anna Rumshisky, Mayank Kulkarni, Furong Huang, Wei Ai, Jianhua Lu
|
92 |
+
Abstract: The recent success of specialized Large Language Models (LLMs) in domains such as mathematical reasoning and coding has led to growing interest in methods for merging these expert LLMs into a unified Mixture-of-Experts (MoE) model, with the goal of enhancing performance in each domain while retaining effectiveness on general tasks. However, the effective merging of expert models remains an open challenge, especially for models with highly divergent weight parameters or different architectures. State-of-the-art MoE merging methods only work with homogeneous model architectures and rely on simple unweighted averaging to merge expert layers, which does not address parameter interference and requires extensive fine-tuning of the merged MoE to restore performance. To address these limitations, this paper introduces new MoE merging techniques, including strategies to mitigate parameter interference, routing heuristics to reduce the need for MoE fine-tuning, and a novel method for merging experts with different architectures. Extensive experiments across multiple domains demonstrate the effectiveness of our proposed methods, reducing fine-tuning costs, improving performance over state-of-the-art methods, and expanding the applicability of MoE merging.
|
93 |
+
Link: https://arxiv.org/abs/2502.00997
|
94 |
+
|
95 |
+
6. Satori: Reinforcement Learning with Chain-of-Action-Thought Enhances LLM Reasoning via Autoregressive Search
|
96 |
+
Authors: Maohao Shen, Guangtao Zeng, Zhenting Qi, Zhang-Wei Hong, Zhenfang Chen, Wei Lu, Gregory Wornell, Subhro Das, David Cox, Chuang Gan
|
97 |
+
Abstract: Large language models (LLMs) have demonstrated remarkable reasoning capabilities across diverse domains. Recent studies have shown that increasing test-time computation enhances LLMs' reasoning capabilities. This typically involves extensive sampling at inference time guided by an external LLM verifier, resulting in a two-player system. Despite external guidance, the effectiveness of this system demonstrates the potential of a single LLM to tackle complex tasks. Thus, we pose a new research problem: Can we internalize the searching capabilities to fundamentally enhance the reasoning abilities of a single LLM? This work explores an orthogonal direction focusing on post-training LLMs for autoregressive searching (i.e., an extended reasoning process with self-reflection and self-exploration of new strategies). To achieve this, we propose the Chain-of-Action-Thought (COAT) reasoning and a two-stage training paradigm: 1) a small-scale format tuning stage to internalize the COAT reasoning format and 2) a large-scale self-improvement stage leveraging reinforcement learning. Our approach results in Satori, a 7B LLM trained on open-source models and data. Extensive empirical evaluations demonstrate that Satori achieves state-of-the-art performance on mathematical reasoning benchmarks while exhibits strong generalization to out-of-domain tasks. Code, data, and models will be fully open-sourced.
|
98 |
+
Link: https://arxiv.org/abs/2502.02508
|
99 |
+
|
100 |
+
7. Reasoning-as-Logic-Units: Scaling Test-Time Reasoning in Large Language Models Through Logic Unit Alignment
|
101 |
+
Authors: Cheryl Li, Tianyuan Xu, Yiwen Guo
|
102 |
+
Abstract: Chain-of-Thought (CoT) prompting has shown promise in enhancing the reasoning capabilities of large language models (LLMs) by generating natural language (NL) rationales that lead to the final answer. However, it struggles with numerical computation, which has somehow led to the development of program-aided techniques. Despite their potential, a persistent challenge remains: inconsistencies between LLM-reported reasoning steps and the logic in generated programs, which we term ``reasoning hallucinations." This stems from the inherent ambiguities of NL and the statistical nature of LLMs, which often lack rigorous logical coherence. To address this challenge, we propose a novel test-time scaling framework, Reasoning-as-Logic-Units (RaLU), which constructs a more reliable reasoning path by aligning logical units between the generated program and their corresponding NL descriptions. By decomposing the initially generated program into discrete units using static analysis, RaLU engages in an iterative dialogue with the LLM to judge, refine, and explain each unit. A rewind-and-correct mechanism ensures alignment between code statements and task requirements in each unit, ultimately forming a cohesive reasoning path under the program's logic, from which the model reaches a final solution. Our experiments demonstrate that RaLU significantly outperforms existing baselines in mathematical reasoning (GSM8K, MATH) and algorithmic reasoning (HumanEval+, MBPP+), underscoring its potential to advance LLM reasoning and programming by offering enhanced accuracy and interpretability.
|
103 |
+
Link: https://arxiv.org/abs/2502.07803
|
104 |
+
|
105 |
+
8. Premise-Augmented Reasoning Chains Improve Error Identification in Math reasoning with LLMs
|
106 |
+
Authors: Sagnik Mukherjee, Abhinav Chinta, Takyoung Kim, Tarun Anoop Sharma, Dilek Hakkani-TΓΌr
|
107 |
+
Abstract: Chain-of-Thought (CoT) prompting enhances mathematical reasoning in large language models (LLMs) by enabling detailed step-by-step solutions. However, due to the verbosity of LLMs, the resulting reasoning chains can be long, making it harder to verify the reasoning steps and trace issues resulting from dependencies between the steps that may be farther away in the sequence of steps. Importantly, mathematical reasoning allows each step to be derived from a small set of premises, which are a subset of the preceding steps in the reasoning chain. In this paper, we present a framework that identifies the premises for each step, to improve the evaluation of reasoning. We restructure conventional linear reasoning chains into Premise Augmented Reasoning Chains (PARC) by introducing premise links, resulting in a directed acyclic graph where the nodes are the steps and the edges are the premise links. Through experiments with a PARC-based dataset that we built, namely PERL (Premises and ERrors identification in LLMs), we demonstrate that LLMs can reliably identify premises within complex reasoning chains. In particular, even open-source LLMs achieve 90% recall in premise identification. We also show that PARC helps to identify errors in reasoning chains more reliably. The accuracy of error identification improves by 6% to 16% absolute when step-by-step verification is carried out in PARC under the premises. Our findings highlight the utility of premise-centric representations in addressing complex problem-solving tasks and open new avenues for improving the reliability of LLM-based reasoning evaluations.
|
108 |
+
Link: https://arxiv.org/abs/2502.02362
|
109 |
+
|
110 |
+
9. Advanced Weakly-Supervised Formula Exploration for Neuro-Symbolic Mathematical Reasoning
|
111 |
+
Authors: Yuxuan Wu, Hideki Nakayama
|
112 |
+
Abstract: In recent years, neuro-symbolic methods have become a popular and powerful approach that augments artificial intelligence systems with the capability to perform abstract, logical, and quantitative deductions with enhanced precision and controllability. Recent studies successfully performed symbolic reasoning by leveraging various machine learning models to explicitly or implicitly predict intermediate labels that provide symbolic instructions. However, these intermediate labels are not always prepared for every task as a part of training data, and pre-trained models, represented by Large Language Models (LLMs), also do not consistently generate valid symbolic instructions with their intrinsic knowledge. On the other hand, existing work developed alternative learning techniques that allow the learning system to autonomously uncover optimal symbolic instructions. Nevertheless, their performance also exhibits limitations when faced with relatively huge search spaces or more challenging reasoning problems. In view of this, in this work, we put forward an advanced practice for neuro-symbolic reasoning systems to explore the intermediate labels with weak supervision from problem inputs and final outputs. Our experiments on the Mathematics dataset illustrated the effectiveness of our proposals from multiple aspects.
|
113 |
+
Link: https://arxiv.org/abs/2502.00629
|
114 |
+
|
115 |
+
10. ARIES: Stimulating Self-Refinement of Large Language Models by Iterative Preference Optimization
|
116 |
+
Authors: Yongcheng Zeng, Xinyu Cui, Xuanfa Jin, Guoqing Liu, Zexu Sun, Quan He, Dong Li, Ning Yang, Jianye Hao, Haifeng Zhang, Jun Wang
|
117 |
+
Abstract: A truly intelligent Large Language Model (LLM) should be capable of correcting errors in its responses through external interactions. However, even the most advanced models often face challenges in improving their outputs. In this paper, we explore how to cultivate LLMs with the self-refinement capability through iterative preference training, and how this ability can be leveraged to improve model performance during inference. To this end, we introduce a novel post-training and inference framework, called ARIES: Adaptive Refinement and Iterative Enhancement Structure. This method iteratively performs preference training and self-refinement-based data collection. During training, ARIES strengthen the model's direct question-answering capability while simultaneously unlocking its self-refinement potential. During inference, ARIES harnesses this self-refinement capability to generate a series of progressively refined responses, which are then filtered using either the Reward Model Scoring or a simple yet effective Rule-Based Selection mechanism, specifically tailored to our approach, to construct a dataset for the next round of preference training. Experimental results demonstrate the remarkable performance of ARIES. When applied to the Llama-3.1-8B model and under the self-refinement setting, ARIES surpasses powerful models such as GPT-4o, achieving 62.3% length-controlled (LC) and a 63.3% raw win rates on AlpacaEval 2, outperforming Iterative DPO by 27.8% and 35.5% respectively, as well as a 50.3% win rate on Arena-Hard, surpassing Iterative DPO by 26.6%. Furthermore, ARIES consistently enhances performance on mathematical reasoning tasks like GSM8K and MATH.
|
118 |
+
Link: https://arxiv.org/abs/2502.05605
|
119 |
+
|
120 |
+
Done!
|
octotools/tools/arxiv_paper_searcher/tool.py
ADDED
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
|
7 |
+
class ArXiv_Paper_Searcher_Tool(BaseTool):
|
8 |
+
def __init__(self):
|
9 |
+
super().__init__(
|
10 |
+
tool_name="ArXiv_Paper_Searcher_Tool",
|
11 |
+
tool_description="A tool that searches arXiv for papers based on a given query.",
|
12 |
+
tool_version="1.0.0",
|
13 |
+
input_types={
|
14 |
+
"query": "str - The search query for arXiv papers.",
|
15 |
+
"size": "int - The number of results per page (25, 50, 100, or 200). If None, use 25.",
|
16 |
+
"max_results": "int - The maximum number of papers to return (default: 25). Should be less than or equal to 100."
|
17 |
+
},
|
18 |
+
output_type="list - A list of dictionaries containing paper information.",
|
19 |
+
demo_commands=[
|
20 |
+
{
|
21 |
+
"command": 'execution = tool.execute(query="tool agents with large language models")',
|
22 |
+
"description": "Search for papers about tool agents with large language models."
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"command": 'execution = tool.execute(query="quantum computing", size=100, max_results=50)',
|
26 |
+
"description": "Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers."
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"command": 'execution = tool.execute(query="machine learning", max_results=75)',
|
30 |
+
"description": "Search for machine learning papers, returning a maximum of 75 papers."
|
31 |
+
},
|
32 |
+
],
|
33 |
+
user_metadata={
|
34 |
+
"valid_sizes": [25, 50, 100, 200],
|
35 |
+
"base_url": "https://arxiv.org/search/"
|
36 |
+
}
|
37 |
+
)
|
38 |
+
|
39 |
+
def build_tool(self):
|
40 |
+
"""
|
41 |
+
No specific build required for this tool.
|
42 |
+
"""
|
43 |
+
pass
|
44 |
+
|
45 |
+
def execute(self, query, size=None, max_results=25):
|
46 |
+
"""
|
47 |
+
Executes the arXiv search tool to find papers based on the given query.
|
48 |
+
|
49 |
+
Parameters:
|
50 |
+
query (str): The search query for arXiv papers.
|
51 |
+
size (int): The number of results per page.
|
52 |
+
max_results (int): The maximum number of papers to return.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
list: A list of dictionaries containing paper information.
|
56 |
+
"""
|
57 |
+
valid_sizes = self.user_metadata["valid_sizes"]
|
58 |
+
base_url = self.user_metadata["base_url"]
|
59 |
+
|
60 |
+
if size is None:
|
61 |
+
size = 25
|
62 |
+
elif size not in valid_sizes:
|
63 |
+
size = min(valid_sizes, key=lambda x: abs(x - size))
|
64 |
+
|
65 |
+
results = []
|
66 |
+
start = 0
|
67 |
+
|
68 |
+
max_results = min(max_results, 100) # NOTE: For traffic reasons, limit to 100 results
|
69 |
+
|
70 |
+
while len(results) < max_results:
|
71 |
+
params = {
|
72 |
+
"searchtype": "all",
|
73 |
+
"query": query,
|
74 |
+
"abstracts": "show",
|
75 |
+
"order": "",
|
76 |
+
"size": str(size),
|
77 |
+
"start": str(start)
|
78 |
+
}
|
79 |
+
|
80 |
+
try:
|
81 |
+
response = requests.get(base_url, params=params)
|
82 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
83 |
+
|
84 |
+
papers = soup.find_all("li", class_="arxiv-result")
|
85 |
+
if not papers:
|
86 |
+
break
|
87 |
+
|
88 |
+
for paper in papers:
|
89 |
+
if len(results) >= max_results:
|
90 |
+
break
|
91 |
+
|
92 |
+
title = paper.find("p", class_="title").text.strip()
|
93 |
+
authors = paper.find("p", class_="authors").text.strip()
|
94 |
+
authors = re.sub(r'^Authors:\s*', '', authors)
|
95 |
+
authors = re.sub(r'\s+', ' ', authors).strip()
|
96 |
+
|
97 |
+
abstract = paper.find("span", class_="abstract-full").text.strip()
|
98 |
+
abstract = abstract.replace("β³ Less", "").strip()
|
99 |
+
|
100 |
+
link = paper.find("p", class_="list-title").find("a")["href"]
|
101 |
+
|
102 |
+
results.append({
|
103 |
+
"title": title,
|
104 |
+
"authors": authors,
|
105 |
+
"abstract": abstract,
|
106 |
+
"link": f"{link}"
|
107 |
+
})
|
108 |
+
|
109 |
+
start += size
|
110 |
+
|
111 |
+
except Exception as e:
|
112 |
+
print(f"Error searching arXiv: {e}")
|
113 |
+
break
|
114 |
+
|
115 |
+
return results[:max_results]
|
116 |
+
|
117 |
+
def get_metadata(self):
|
118 |
+
"""
|
119 |
+
Returns the metadata for the ArXiv_Paper_Searcher_Tool.
|
120 |
+
|
121 |
+
Returns:
|
122 |
+
dict: A dictionary containing the tool's metadata.
|
123 |
+
"""
|
124 |
+
metadata = super().get_metadata()
|
125 |
+
return metadata
|
126 |
+
|
127 |
+
if __name__ == "__main__":
|
128 |
+
# Test command:
|
129 |
+
"""
|
130 |
+
Run the following commands in the terminal to test the script:
|
131 |
+
|
132 |
+
cd octotools/tools/arxiv_paper_searcher
|
133 |
+
python tool.py
|
134 |
+
"""
|
135 |
+
|
136 |
+
import json
|
137 |
+
|
138 |
+
print("ArXiv Search Tool Test")
|
139 |
+
|
140 |
+
# Example usage of the ArXiv_Paper_Searcher_Tool
|
141 |
+
tool = ArXiv_Paper_Searcher_Tool()
|
142 |
+
|
143 |
+
# Get tool metadata
|
144 |
+
metadata = tool.get_metadata()
|
145 |
+
print("Tool Metadata:")
|
146 |
+
print(metadata)
|
147 |
+
|
148 |
+
# Sample query for searching arXiv
|
149 |
+
query = "enhance mathematical reasoning with large language models"
|
150 |
+
# Execute the tool
|
151 |
+
try:
|
152 |
+
execution = tool.execute(query=query, size=50, max_results=10)
|
153 |
+
print("\n==>> Execution:")
|
154 |
+
print(json.dumps(execution, indent=4)) # Pretty print JSON
|
155 |
+
print("\n==>> Search Results:")
|
156 |
+
for i, paper in enumerate(execution, 1):
|
157 |
+
print(f"{i}. {paper['title']}")
|
158 |
+
print(f" Authors: {paper['authors']}")
|
159 |
+
print(f" Abstract: {paper['abstract'][:2000]}")
|
160 |
+
print(f" Link: {paper['link']}")
|
161 |
+
print()
|
162 |
+
except Exception as e:
|
163 |
+
print(f"Execution failed: {e}")
|
164 |
+
|
165 |
+
print("Done!")
|
{opentools β octotools}/tools/base.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1 |
-
#
|
2 |
|
3 |
-
from
|
4 |
|
5 |
class BaseTool:
|
6 |
"""
|
|
|
1 |
+
# octotools/tools/base.py
|
2 |
|
3 |
+
from octotools.engine.openai import ChatOpenAI
|
4 |
|
5 |
class BaseTool:
|
6 |
"""
|
{opentools β octotools}/tools/generalist_solution_generator/tool.py
RENAMED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
-
from
|
3 |
-
from
|
4 |
|
5 |
class Generalist_Solution_Generator_Tool(BaseTool):
|
6 |
require_llm_engine = True
|
@@ -109,7 +109,7 @@ if __name__ == "__main__":
|
|
109 |
"""
|
110 |
Run the following commands in the terminal to test the script:
|
111 |
|
112 |
-
cd
|
113 |
python tools/default/tool.py
|
114 |
"""
|
115 |
|
|
|
1 |
import os
|
2 |
+
from octotools.tools.base import BaseTool
|
3 |
+
from octotools.engine.openai import ChatOpenAI
|
4 |
|
5 |
class Generalist_Solution_Generator_Tool(BaseTool):
|
6 |
require_llm_engine = True
|
|
|
109 |
"""
|
110 |
Run the following commands in the terminal to test the script:
|
111 |
|
112 |
+
cd octotools
|
113 |
python tools/default/tool.py
|
114 |
"""
|
115 |
|
octotools/tools/google_search/__init__.py
ADDED
File without changes
|
octotools/tools/google_search/test.log
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{'tool_name': 'Google_Search_Tool', 'tool_description': 'A tool that performs Google searches based on a given text query.', 'tool_version': '1.0.0', 'input_types': {'query': 'str - The search query to be used for the Google search.', 'num_results': 'int - The number of search results to return (default: 10).'}, 'output_type': 'list - A list of dictionaries containing search result information.', 'demo_commands': [{'command': 'execution = tool.execute(query="Python programming")', 'description': "Perform a Google search for 'Python programming' and return the default number of results."}, {'command': 'execution = tool.execute(query="Machine learning tutorials", num_results=5)', 'description': "Perform a Google search for 'Machine learning tutorials' and return 5 results."}], 'require_llm_engine': False}
|
2 |
+
{'kind': 'customsearch#search', 'url': {'type': 'application/json', 'template': 'https://www.googleapis.com/customsearch/v1?q={searchTerms}&num={count?}&start={startIndex?}&lr={language?}&safe={safe?}&cx={cx?}&sort={sort?}&filter={filter?}&gl={gl?}&cr={cr?}&googlehost={googleHost?}&c2coff={disableCnTwTranslation?}&hq={hq?}&hl={hl?}&siteSearch={siteSearch?}&siteSearchFilter={siteSearchFilter?}&exactTerms={exactTerms?}&excludeTerms={excludeTerms?}&linkSite={linkSite?}&orTerms={orTerms?}&dateRestrict={dateRestrict?}&lowRange={lowRange?}&highRange={highRange?}&searchType={searchType}&fileType={fileType?}&rights={rights?}&imgSize={imgSize?}&imgType={imgType?}&imgColorType={imgColorType?}&imgDominantColor={imgDominantColor?}&alt=json'}, 'queries': {'request': [{'title': 'Google Custom Search - nobel prize winners in chemistry 2024', 'totalResults': '1020000', 'searchTerms': 'nobel prize winners in chemistry 2024', 'count': 5, 'startIndex': 1, 'inputEncoding': 'utf8', 'outputEncoding': 'utf8', 'safe': 'off', 'cx': 'd5bb3fdd4b7fd4cd9'}], 'nextPage': [{'title': 'Google Custom Search - nobel prize winners in chemistry 2024', 'totalResults': '1020000', 'searchTerms': 'nobel prize winners in chemistry 2024', 'count': 5, 'startIndex': 6, 'inputEncoding': 'utf8', 'outputEncoding': 'utf8', 'safe': 'off', 'cx': 'd5bb3fdd4b7fd4cd9'}]}, 'context': {'title': 'toolbox-dev-pan'}, 'searchInformation': {'searchTime': 0.285868, 'formattedSearchTime': '0.29', 'totalResults': '1020000', 'formattedTotalResults': '1,020,000'}, 'items': [{'kind': 'customsearch#result', 'title': 'The Nobel Prize in Chemistry 2024', 'htmlTitle': 'The <b>Nobel Prize</b> in <b>Chemistry 2024</b>', 'link': 'https://www.nobelprize.org/prizes/chemistry/', 'displayLink': 'www.nobelprize.org', 'snippet': "The Nobel Prize in Chemistry 2024 is about proteins, life's ingenious chemical tools. David Baker has succeeded with the almost impossible feat of building\xa0...", 'htmlSnippet': 'The <b>Nobel Prize</b> in <b>Chemistry 2024</b> is about proteins, life's ingenious <b>chemical</b> tools. David Baker has succeeded with the almost impossible feat of building ...', 'formattedUrl': 'https://www.nobelprize.org/prizes/chemistry/', 'htmlFormattedUrl': 'https://www.<b>nobelprize</b>.org/<b>prize</b>s/<b>chemistry</b>/', 'pagemap': {'hcard': [{'fn': 'Ernest Rutherford', 'url': 'https://www.nobelprize.org/prizes/chemistry/1908/rutherford/'}, {'fn': 'Marie Curie, nΓ©e SkΕodowska', 'url': 'https://www.nobelprize.org/prizes/physics/1903/marie-curie/'}, {'fn': 'Jacques Dubochet', 'url': 'https://www.nobelprize.org/prizes/chemistry/2017/dubochet/'}, {'fn': 'Dorothy Crowfoot Hodgkin', 'url': 'https://www.nobelprize.org/prizes/chemistry/1964/hodgkin/'}, {'fn': 'Linus Carl Pauling', 'url': 'https://www.nobelprize.org/prizes/chemistry/1954/pauling/'}, {'fn': 'Jean-Pierre Sauvage', 'url': 'https://www.nobelprize.org/prizes/chemistry/2016/sauvage/'}], 'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSOMs3quxSE4q1eWHocdDOr31q-aad13MSHgdUGUqpOG71sdtm6WGQ5BWAX&s', 'width': '276', 'height': '182'}], 'person': [{'name': 'Ernest Rutherford', 'url': 'Ernest Rutherford'}, {'name': 'Marie Curie, nΓ©e SkΕodowska', 'url': 'Marie Curie, nΓ©e SkΕodowska'}, {'name': 'Jacques Dubochet', 'url': 'Jacques Dubochet'}, {'name': 'Dorothy Crowfoot Hodgkin', 'url': 'Dorothy Crowfoot Hodgkin'}, {'name': 'Linus Carl Pauling', 'url': 'Linus Carl Pauling'}, {'name': 'Jean-Pierre Sauvage', 'url': 'Jean-Pierre Sauvage'}], 'organization': [{'url': 'https://www.nobelprize.org/'}, {'logo': 'Nobel Prize', 'url': 'Nobel Prize'}], 'metatags': [{'og:image': 'https://www.nobelprize.org/uploads/2018/09/New-will-fullsize-496x328.jpg', 'og:type': 'website', 'twitter:card': 'summary_large_image', 'twitter:title': 'Chemistry Prize', 'og:site_name': 'NobelPrize.org', 'og:title': 'Chemistry Prize', 'msapplication-tileimage': 'https://www.nobelprize.org/uploads/2018/08/Nobel-favicon.png', 'og:description': 'Chemistry Prize', 'twitter:image': 'https://www.nobelprize.org/uploads/2018/09/New-will-fullsize-496x328.jpg', 'viewport': 'width=device-width, initial-scale=1', 'twitter:description': 'Chemistry Prize', 'og:locale': 'en_US', 'og:url': 'https://www.nobelprize.org/chemistry-prize-2/'}], 'cse_image': [{'src': 'https://www.nobelprize.org/uploads/2018/09/New-will-fullsize-496x328.jpg'}], 'blogposting': [{'headline': 'They cracked the code for proteinsβ amazing structures'}, {'headline': 'David Baker'}, {'headline': 'Demis Hassabis'}, {'headline': 'John Jumper'}, {'headline': 'The life of a chemist'}, {'headline': 'How many chemistry laureates can you match?'}, {'headline': 'What are the Nobel Prize categories?'}, {'headline': 'What did they discover?'}, {'headline': 'Who first predicted global warming?'}, {'headline': 'The worldβs smallest machines'}, {'headline': 'Interview with a double awardee'}]}}, {'kind': 'customsearch#result', 'title': 'NSF congratulates laureates of the 2024 Nobel Prize in chemistry ...', 'htmlTitle': 'NSF congratulates <b>laureates</b> of the <b>2024 Nobel Prize</b> in <b>chemistry</b> ...', 'link': 'https://www.nsf.gov/news/nsf-congratulates-laureates-2024-nobel-prize-chemistry', 'displayLink': 'www.nsf.gov', 'snippet': 'Oct 9, 2024 ... The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize in\xa0...', 'htmlSnippet': 'Oct 9, 2024 <b>...</b> The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the <b>2024 Nobel Prize</b> in ...', 'formattedUrl': 'https://www.nsf.gov/.../nsf-congratulates-laureates-2024-nobel-prize-chemi...', 'htmlFormattedUrl': 'https://www.nsf.gov/.../nsf-congratulates-laureates-<b>2024</b>-<b>nobel</b>-<b>prize</b>-<b>chemi</b>...', 'pagemap': {'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRH1B2HTt4h0BsKt4cqcthiUWk5GDU2hX6k7mP1EQh0glR9n13NnR2pLX4&s', 'width': '360', 'height': '140'}], 'metatags': [{'og:image': 'https://nsf-gov-resources.nsf.gov/styles/_inline_image_full_width/s3/media_hub/nobel24-chemistry-news-hero-David-Baker-Demis-Hassabis-John-Jumper.jpg?VersionId=8Wa1DsrhgMw.SQEBdxmUOgtdCntLkWDb&itok=NIPtuWh-', 'og:image:width': '800', 'og:image:alt': 'sketch portraits of three men', 'twitter:card': 'summary_large_image', 'twitter:title': 'NSF congratulates laureates of the 2024 Nobel Prize in chemistry', 'og:site_name': 'NSF - National Science Foundation', 'handheldfriendly': 'true', 'og:title': 'NSF congratulates laureates of the 2024 Nobel Prize in chemistry', 'og:image:height': '312', 'og:description': 'The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize in chemistry. Baker and hisβ¦', 'twitter:image': 'https://nsf-gov-resources.nsf.gov/styles/_inline_image_full_width/s3/media_hub/nobel24-chemistry-news-hero-David-Baker-Demis-Hassabis-John-Jumper.jpg?VersionId=8Wa1DsrhgMw.SQEBdxmUOgtdCntLkWDb&itok=NIPtuWh-', 'twitter:image:alt': 'sketch portraits of three men', 'twitter:site': '@NSF', 'viewport': 'width=device-width, initial-scale=1.0', 'twitter:description': 'The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize in chemistry. Baker and hisβ¦', 'mobileoptimized': 'width', 'og:url': 'https://www.nsf.gov/news/nsf-congratulates-laureates-2024-nobel-prize-chemistry'}], 'cse_image': [{'src': 'https://nsf-gov-resources.nsf.gov/styles/_inline_image_full_width/s3/media_hub/nobel24-chemistry-news-hero-David-Baker-Demis-Hassabis-John-Jumper.jpg?VersionId=8Wa1DsrhgMw.SQEBdxmUOgtdCntLkWDb&itok=NIPtuWh-'}]}}, {'kind': 'customsearch#result', 'title': 'Press release: The Nobel Prize in Chemistry 2024 - NobelPrize.org', 'htmlTitle': 'Press release: The <b>Nobel Prize</b> in <b>Chemistry 2024</b> - <b>NobelPrize</b>.org', 'link': 'https://www.nobelprize.org/prizes/chemistry/2024/press-release/', 'displayLink': 'www.nobelprize.org', 'snippet': 'Oct 9, 2024 ... David Baker has succeeded with the almost impossible feat of building entirely new kinds of proteins. Demis Hassabis and John Jumper have\xa0...', 'htmlSnippet': 'Oct 9, 2024 <b>...</b> David Baker has succeeded with the almost impossible feat of building entirely new kinds of proteins. Demis Hassabis and John Jumper have ...', 'formattedUrl': 'https://www.nobelprize.org/prizes/chemistry/2024/press-release/', 'htmlFormattedUrl': 'https://www.<b>nobelprize</b>.org/<b>prize</b>s/<b>chemistry</b>/<b>2024</b>/press-release/', 'pagemap': {'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQON_Hf1_dyrEACSiPsjRoTG2R4pjcDpbg7BAzGC9LovMbRHVei6GShrCM&s', 'width': '302', 'height': '167'}], 'organization': [{'url': 'https://www.nobelprize.org/'}, {'logo': 'Nobel Prize', 'url': 'Nobel Prize'}], 'metatags': [{'og:image': 'https://www.nobelprize.org/uploads/2024/10/popular-chemistryprize2024-carrier-1024x567.jpg', 'og:type': 'website', 'twitter:card': 'summary_large_image', 'twitter:title': 'Nobel Prize in Chemistry 2024', 'og:site_name': 'NobelPrize.org', 'og:title': 'Nobel Prize in Chemistry 2024', 'msapplication-tileimage': 'https://www.nobelprize.org/uploads/2018/08/Nobel-favicon.png', 'og:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'twitter:image': 'https://www.nobelprize.org/uploads/2024/10/popular-chemistryprize2024-carrier-1024x567.jpg', 'viewport': 'width=device-width, initial-scale=1', 'twitter:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'og:locale': 'en_US', 'og:url': 'https://www.nobelprize.org/prizes/chemistry/2024/press-release/'}], 'cse_image': [{'src': 'https://www.nobelprize.org/uploads/2024/10/popular-chemistryprize2024-carrier-1024x567.jpg'}]}}, {'kind': 'customsearch#result', 'title': 'AIP Congratulates 2024 Nobel Prize Winners in Chemistry - AIP.ORG', 'htmlTitle': 'AIP Congratulates <b>2024 Nobel Prize Winners in Chemistry</b> - AIP.ORG', 'link': 'https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry', 'displayLink': 'ww2.aip.org', 'snippet': 'Oct 9, 2024 ... The 2024 Nobel Prize in chemistry was awarded with one half to David Baker βfor computational protein designβ and the other half jointly to Demis Hassabis and\xa0...', 'htmlSnippet': 'Oct 9, 2024 <b>...</b> The <b>2024 Nobel Prize</b> in <b>chemistry</b> was awarded with one half to David Baker βfor computational protein designβ and the other half jointly to Demis Hassabis and ...', 'formattedUrl': 'https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry', 'htmlFormattedUrl': 'https://ww2.aip.org/aip/<b>2024</b>-<b>nobel</b>-<b>prize</b>-in-<b>chemistry</b>', 'pagemap': {'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTyVGKJaR9X7VE3Y97Z_j8IXIconF0D0zUe88ATVyBuFSXya-CrM1qsveY&s', 'width': '300', 'height': '168'}], 'metatags': [{'og:image': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg', 'og:image:width': '1440', 'og:image:alt': 'Nobel-2024-Chem-i01.jpg', 'og:type': 'article', 'article:published_time': '2024-10-09T12:28:31.16', 'article:section': 'AIP', 'twitter:card': 'summary_large_image', 'og:site_name': 'AIP', 'og:image:url': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg', 'og:image:height': '810', 'og:image:type': 'image/jpeg', 'twitter:image': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg', 'twitter:image:alt': 'Nobel-2024-Chem-i01.jpg', 'fb:app_id': '643005150655973', 'article:modified_time': '2024-10-09T18:23:12.852', 'viewport': 'width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=5', 'brightspot.contentid': '00000192-70e9-da7e-a1fe-fcff80d40000', 'og:url': 'https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry'}], 'cse_image': [{'src': 'https://aip.brightspotcdn.com/dims4/default/079290d/2147483647/strip/true/crop/1200x675+0+0/resize/1440x810!/quality/90/?url=https%3A%2F%2Fk1-prod-aip.s3.us-east-2.amazonaws.com%2Fbrightspot%2F07%2F96%2Fbc7d9a594949bc49f983177d6781%2Fnobel-2024-chem-i01.jpg'}]}}, {'kind': 'customsearch#result', 'title': 'The Nobel Prize in Chemistry 2024 - NobelPrize.org', 'htmlTitle': 'The <b>Nobel Prize</b> in <b>Chemistry 2024</b> - <b>NobelPrize</b>.org', 'link': 'https://www.nobelprize.org/prizes/chemistry/2024/summary/', 'displayLink': 'www.nobelprize.org', 'snippet': 'David Baker Β· Demis Hassabis Β· John Jumper Β· Nobel Prizes and laureates\xa0...', 'htmlSnippet': 'David Baker · Demis Hassabis · John Jumper · <b>Nobel Prizes</b> and <b>laureates</b> ...', 'formattedUrl': 'https://www.nobelprize.org/prizes/chemistry/2024/summary/', 'htmlFormattedUrl': 'https://www.<b>nobelprize</b>.org/<b>prize</b>s/<b>chemistry</b>/<b>2024</b>/summary/', 'pagemap': {'hcard': [{'fn': 'David Baker', 'url': 'https://www.nobelprize.org/prizes/chemistry/2024/baker/facts/'}, {'fn': 'Demis Hassabis', 'url': 'https://www.nobelprize.org/prizes/chemistry/2024/hassabis/facts/'}, {'fn': 'John Jumper', 'url': 'https://www.nobelprize.org/prizes/chemistry/2024/jumper/facts/'}], 'cse_thumbnail': [{'src': 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT4UpOxZHEJhIvhwQsypTDggUJ3pJf_wG3U3jZBDyK8l-Qyqx99fVknWr8Y&s', 'width': '259', 'height': '194'}], 'person': [{'name': 'David Baker', 'description': 'Prize share: 1/2', 'url': 'David Baker'}, {'name': 'Demis Hassabis', 'description': 'Prize share: 1/4', 'url': 'Demis Hassabis'}, {'name': 'John Jumper', 'description': 'Prize share: 1/4', 'url': 'John Jumper'}], 'organization': [{'url': 'https://www.nobelprize.org/'}, {'logo': 'Nobel Prize', 'url': 'Nobel Prize'}], 'metatags': [{'og:image': 'https://www.nobelprize.org/uploads/2024/10/fig_ke_24_4x3-1024x768.jpg', 'og:type': 'website', 'twitter:card': 'summary_large_image', 'twitter:title': 'Nobel Prize in Chemistry 2024', 'og:site_name': 'NobelPrize.org', 'og:title': 'Nobel Prize in Chemistry 2024', 'msapplication-tileimage': 'https://www.nobelprize.org/uploads/2018/08/Nobel-favicon.png', 'og:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'twitter:image': 'https://www.nobelprize.org/uploads/2024/10/fig_ke_24_4x3-1024x768.jpg', 'viewport': 'width=device-width, initial-scale=1', 'twitter:description': 'The Nobel Prize in Chemistry 2024 was divided, one half awarded to David Baker "for computational protein design", the other half jointly to Demis Hassabis and John Jumper "for protein structure prediction"', 'og:locale': 'en_US', 'og:url': 'https://www.nobelprize.org/prizes/chemistry/2024/summary/'}], 'cse_image': [{'src': 'https://www.nobelprize.org/uploads/2024/10/fig_ke_24_4x3-1024x768.jpg'}]}}]}
|
3 |
+
|
4 |
+
Execution Result:
|
5 |
+
Search query: nobel prize winners in chemistry 2024
|
6 |
+
Number of results: 5
|
7 |
+
|
8 |
+
Search Results:
|
9 |
+
|
10 |
+
1. Title: The Nobel Prize in Chemistry 2024
|
11 |
+
URL: https://www.nobelprize.org/prizes/chemistry/
|
12 |
+
Snippet: The Nobel Prize in Chemistry 2024 is about proteins, life's ingenious chemical tools. David Baker has succeeded with the almost impossible feat of buildingΒ ...
|
13 |
+
|
14 |
+
2. Title: NSF congratulates laureates of the 2024 Nobel Prize in chemistry ...
|
15 |
+
URL: https://www.nsf.gov/news/nsf-congratulates-laureates-2024-nobel-prize-chemistry
|
16 |
+
Snippet: Oct 9, 2024 ... The U.S. National Science Foundation congratulates David Baker, Demis Hassabis and John Jumper on being awarded the 2024 Nobel Prize inΒ ...
|
17 |
+
|
18 |
+
3. Title: Press release: The Nobel Prize in Chemistry 2024 - NobelPrize.org
|
19 |
+
URL: https://www.nobelprize.org/prizes/chemistry/2024/press-release/
|
20 |
+
Snippet: Oct 9, 2024 ... David Baker has succeeded with the almost impossible feat of building entirely new kinds of proteins. Demis Hassabis and John Jumper haveΒ ...
|
21 |
+
|
22 |
+
4. Title: AIP Congratulates 2024 Nobel Prize Winners in Chemistry - AIP.ORG
|
23 |
+
URL: https://ww2.aip.org/aip/2024-nobel-prize-in-chemistry
|
24 |
+
Snippet: Oct 9, 2024 ... The 2024 Nobel Prize in chemistry was awarded with one half to David Baker βfor computational protein designβ and the other half jointly to Demis Hassabis andΒ ...
|
25 |
+
|
26 |
+
5. Title: The Nobel Prize in Chemistry 2024 - NobelPrize.org
|
27 |
+
URL: https://www.nobelprize.org/prizes/chemistry/2024/summary/
|
28 |
+
Snippet: David Baker Β· Demis Hassabis Β· John Jumper Β· Nobel Prizes and laureatesΒ ...
|
29 |
+
Done!
|
octotools/tools/google_search/tool.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from typing import List, Dict, Any
|
4 |
+
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
load_dotenv()
|
9 |
+
|
10 |
+
class Google_Search_Tool(BaseTool):
|
11 |
+
def __init__(self):
|
12 |
+
super().__init__(
|
13 |
+
tool_name="Google_Search_Tool",
|
14 |
+
tool_description="A tool that performs Google searches based on a given text query.",
|
15 |
+
tool_version="1.0.0",
|
16 |
+
input_types={
|
17 |
+
"query": "str - The search query to be used for the Google search.",
|
18 |
+
"num_results": "int - The number of search results to return (default: 10).",
|
19 |
+
},
|
20 |
+
output_type="list - A list of dictionaries containing search result information.",
|
21 |
+
demo_commands=[
|
22 |
+
{
|
23 |
+
"command": 'execution = tool.execute(query="Python programming")',
|
24 |
+
"description": "Perform a Google search for 'Python programming' and return the default number of results."
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"command": 'execution = tool.execute(query="Machine learning tutorials", num_results=5)',
|
28 |
+
"description": "Perform a Google search for 'Machine learning tutorials' and return 5 results."
|
29 |
+
},
|
30 |
+
],
|
31 |
+
)
|
32 |
+
# self.api_key = os.getenv("GOOGLE_API_KEY")
|
33 |
+
self.api_key = os.getenv("GOOGLE_API_KEY") # NOTE: Replace with your own API key (Ref: https://developers.google.com/custom-search/v1/introduction)
|
34 |
+
self.cx = os.getenv("GOOGLE_CX") # NOTE: Replace with your own custom search (Ref: https://programmablesearchengine.google.com/controlpanel/all)
|
35 |
+
self.base_url = "https://www.googleapis.com/customsearch/v1"
|
36 |
+
|
37 |
+
def google_search(self, query: str, num_results: int = 10) -> Dict[str, Any]:
|
38 |
+
"""
|
39 |
+
Performs a Google search using the provided query.
|
40 |
+
|
41 |
+
Parameters:
|
42 |
+
query (str): The search query.
|
43 |
+
num_results (int): The number of search results to return.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
Dict[str, Any]: The raw search results from the Google API.
|
47 |
+
"""
|
48 |
+
params = {
|
49 |
+
'q': query,
|
50 |
+
'key': self.api_key,
|
51 |
+
'cx': self.cx,
|
52 |
+
'num': num_results
|
53 |
+
}
|
54 |
+
|
55 |
+
response = requests.get(self.base_url, params=params)
|
56 |
+
return response.json()
|
57 |
+
|
58 |
+
def execute(self, query: str, num_results: int = 10) -> List[Dict[str, Any]]:
|
59 |
+
"""
|
60 |
+
Executes a Google search based on the provided query.
|
61 |
+
|
62 |
+
Parameters:
|
63 |
+
query (str): The search query.
|
64 |
+
num_results (int): The number of search results to return (default: 10).
|
65 |
+
|
66 |
+
Returns:
|
67 |
+
List[Dict[str, Any]]: A list of dictionaries containing search result information.
|
68 |
+
"""
|
69 |
+
if not self.api_key:
|
70 |
+
return [{"error": "Google API key is not set. Please set the GOOGLE_API_KEY environment variable."}]
|
71 |
+
|
72 |
+
try:
|
73 |
+
results = self.google_search(query, num_results)
|
74 |
+
print(results)
|
75 |
+
|
76 |
+
if 'items' in results:
|
77 |
+
return [
|
78 |
+
{
|
79 |
+
"title": item['title'],
|
80 |
+
"link": item['link'],
|
81 |
+
"snippet": item['snippet']
|
82 |
+
}
|
83 |
+
for item in results['items']
|
84 |
+
]
|
85 |
+
else:
|
86 |
+
return [{"error": "No results found."}]
|
87 |
+
except Exception as e:
|
88 |
+
return [{"error": f"An error occurred: {str(e)}"}]
|
89 |
+
|
90 |
+
def get_metadata(self):
|
91 |
+
"""
|
92 |
+
Returns the metadata for the Google_Search_Tool.
|
93 |
+
|
94 |
+
Returns:
|
95 |
+
dict: A dictionary containing the tool's metadata.
|
96 |
+
"""
|
97 |
+
metadata = super().get_metadata()
|
98 |
+
return metadata
|
99 |
+
|
100 |
+
|
101 |
+
if __name__ == "__main__":
|
102 |
+
# Test command:
|
103 |
+
"""
|
104 |
+
Run the following commands in the terminal to test the script:
|
105 |
+
|
106 |
+
export GOOGLE_API_KEY=your_api_key_here
|
107 |
+
cd octotools/tools/google_search
|
108 |
+
python tool.py
|
109 |
+
"""
|
110 |
+
|
111 |
+
# Example usage of the Google_Search_Tool
|
112 |
+
tool = Google_Search_Tool()
|
113 |
+
|
114 |
+
# Get tool metadata
|
115 |
+
metadata = tool.get_metadata()
|
116 |
+
print(metadata)
|
117 |
+
|
118 |
+
# Execute the tool to perform a Google search
|
119 |
+
query = "nobel prize winners in chemistry 2024"
|
120 |
+
try:
|
121 |
+
execution = tool.execute(query=query, num_results=5)
|
122 |
+
print("\nExecution Result:")
|
123 |
+
print(f"Search query: {query}")
|
124 |
+
print(f"Number of results: {len(execution)}")
|
125 |
+
print("\nSearch Results:")
|
126 |
+
if "error" in execution[0]:
|
127 |
+
print(f"Error: {execution[0]['error']}")
|
128 |
+
else:
|
129 |
+
for i, item in enumerate(execution, 1):
|
130 |
+
print(f"\n{i}. Title: {item['title']}")
|
131 |
+
print(f" URL: {item['link']}")
|
132 |
+
print(f" Snippet: {item['snippet']}")
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Execution failed: {e}")
|
135 |
+
|
136 |
+
print("Done!")
|
octotools/tools/image_captioner/__init__.py
ADDED
File without changes
|
octotools/tools/image_captioner/examples/baseball.png
ADDED
![]() |
octotools/tools/image_captioner/test.log
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Initializing Image Captioner Tool with model: gpt-4o
|
3 |
+
!! Cache enabled for model: gpt-4o
|
4 |
+
{'tool_name': 'Image_Captioner_Tool', 'tool_description': "A tool that generates captions for images using OpenAI's multimodal model.", 'tool_version': '1.0.0', 'input_types': {'image': 'str - The path to the image file.', 'prompt': "str - The prompt to guide the image captioning (default: 'Describe this image in detail.')."}, 'output_type': 'str - The generated caption for the image.', 'demo_commands': [{'command': 'execution = tool.execute(image="path/to/image.png")', 'description': 'Generate a caption for an image using the default prompt and model.'}, {'command': 'execution = tool.execute(image="path/to/image.png", prompt="Explain the mood of this scene.")', 'description': 'Generate a caption focusing on the mood using a specific prompt and model.'}], 'require_llm_engine': True, 'user_metadata': {'limitation': 'The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary.'}}
|
5 |
+
Generated Caption:
|
6 |
+
"The image shows four blue buckets, each containing five baseballs. The buckets are arranged in a grid pattern with three on the top row and one on the bottom left. Each bucket has a handle on the side, and the baseballs inside are white with red stitching, typical of standard baseballs. The background is plain white, emphasizing the buckets and their contents."
|
7 |
+
Done!
|
octotools/tools/image_captioner/tool.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from octotools.tools.base import BaseTool
|
3 |
+
from octotools.engine.openai import ChatOpenAI
|
4 |
+
|
5 |
+
class Image_Captioner_Tool(BaseTool):
|
6 |
+
require_llm_engine = True
|
7 |
+
|
8 |
+
def __init__(self, model_string="gpt-4o-mini"):
|
9 |
+
super().__init__(
|
10 |
+
tool_name="Image_Captioner_Tool",
|
11 |
+
tool_description="A tool that generates captions for images using OpenAI's multimodal model.",
|
12 |
+
tool_version="1.0.0",
|
13 |
+
input_types={
|
14 |
+
"image": "str - The path to the image file.",
|
15 |
+
"prompt": "str - The prompt to guide the image captioning (default: 'Describe this image in detail.').",
|
16 |
+
},
|
17 |
+
output_type="str - The generated caption for the image.",
|
18 |
+
demo_commands=[
|
19 |
+
{
|
20 |
+
"command": 'execution = tool.execute(image="path/to/image.png")',
|
21 |
+
"description": "Generate a caption for an image using the default prompt and model."
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"command": 'execution = tool.execute(image="path/to/image.png", prompt="Explain the mood of this scene.")',
|
25 |
+
"description": "Generate a caption focusing on the mood using a specific prompt and model."
|
26 |
+
}
|
27 |
+
],
|
28 |
+
user_metadata = {
|
29 |
+
"limitation": "The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary."
|
30 |
+
},
|
31 |
+
)
|
32 |
+
print(f"\nInitializing Image Captioner Tool with model: {model_string}")
|
33 |
+
self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
|
34 |
+
|
35 |
+
def execute(self, image, prompt="Describe this image in detail."):
|
36 |
+
try:
|
37 |
+
if not self.llm_engine:
|
38 |
+
return "Error: LLM engine not initialized. Please provide a valid model_string."
|
39 |
+
|
40 |
+
input_data = [prompt]
|
41 |
+
|
42 |
+
if image and os.path.isfile(image):
|
43 |
+
try:
|
44 |
+
with open(image, 'rb') as file:
|
45 |
+
image_bytes = file.read()
|
46 |
+
input_data.append(image_bytes)
|
47 |
+
except Exception as e:
|
48 |
+
return f"Error reading image file: {str(e)}"
|
49 |
+
else:
|
50 |
+
return "Error: Invalid image file path."
|
51 |
+
|
52 |
+
caption = self.llm_engine(input_data)
|
53 |
+
return caption
|
54 |
+
except Exception as e:
|
55 |
+
return f"Error generating caption: {str(e)}"
|
56 |
+
|
57 |
+
def get_metadata(self):
|
58 |
+
metadata = super().get_metadata()
|
59 |
+
metadata['require_llm_engine'] = self.require_llm_engine # NOTE: can be removed if not needed
|
60 |
+
return metadata
|
61 |
+
|
62 |
+
if __name__ == "__main__":
|
63 |
+
# Test command:
|
64 |
+
"""
|
65 |
+
Run the following commands in the terminal to test the script:
|
66 |
+
|
67 |
+
cd octotools/tools/image_captioner
|
68 |
+
python tool.py
|
69 |
+
"""
|
70 |
+
|
71 |
+
import json
|
72 |
+
|
73 |
+
# Get the directory of the current script
|
74 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
75 |
+
|
76 |
+
# Example usage of the Image_Captioner_Tool
|
77 |
+
# tool = Image_Captioner_Tool()
|
78 |
+
tool = Image_Captioner_Tool(model_string="gpt-4o")
|
79 |
+
|
80 |
+
# Get tool metadata
|
81 |
+
metadata = tool.get_metadata()
|
82 |
+
print(metadata)
|
83 |
+
|
84 |
+
# Construct the full path to the image using the script's directory
|
85 |
+
relative_image_path = "examples/baseball.png"
|
86 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
87 |
+
|
88 |
+
# Execute the tool with default prompt
|
89 |
+
try:
|
90 |
+
execution = tool.execute(image=image_path)
|
91 |
+
print("Generated Caption:")
|
92 |
+
print(json.dumps(execution, indent=4))
|
93 |
+
except Exception as e:
|
94 |
+
print(f"Execution failed: {e}")
|
95 |
+
|
96 |
+
print("Done!")
|
octotools/tools/nature_news_fetcher/__init__.py
ADDED
File without changes
|
octotools/tools/nature_news_fetcher/test.log
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{'tool_name': 'Nature_News_Fetcher_Tool', 'tool_description': 'A tool that fetches the latest news articles from Nature.', 'tool_version': '1.0.0', 'input_types': {'num_articles': 'int - The number of articles to fetch (default: 100).', 'max_pages': 'int - The maximum number of pages to fetch (default: 5).'}, 'output_type': 'list - A list of dictionaries containing information about the latest Nature news articles.', 'demo_commands': [{'command': 'execution = tool.execute()', 'description': 'Fetch the latest 100 news articles from Nature.'}, {'command': 'execution = tool.execute(num_articles=50, max_pages=3)', 'description': 'Fetch the latest 50 news articles from Nature, searching up to 3 pages.'}], 'require_llm_engine': False}
|
2 |
+
[
|
3 |
+
{
|
4 |
+
"title": "NASA embraced diversity. Trump\u2019s DEI purge is hitting space scientists hard",
|
5 |
+
"url": "https://www.nature.com/articles/d41586-025-00480-x",
|
6 |
+
"description": "Some researchers at NASA and outside it feel betrayed by the changes at the agency, which was known for promoting inclusion in science.",
|
7 |
+
"authors": [
|
8 |
+
"Alexandra Witze"
|
9 |
+
],
|
10 |
+
"date": "2025-02-14",
|
11 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00480-x/d41586-025-00480-x_50636314.jpg"
|
12 |
+
},
|
13 |
+
{
|
14 |
+
"title": "Are the Trump team\u2019s actions affecting your research? How to contact Nature",
|
15 |
+
"url": "https://www.nature.com/articles/d41586-025-00479-4",
|
16 |
+
"description": "Use this form to share information with Nature\u2019s news team, or to make suggestions for future coverage.",
|
17 |
+
"authors": [
|
18 |
+
"No authors found"
|
19 |
+
],
|
20 |
+
"date": "2025-02-13",
|
21 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00479-4/d41586-025-00479-4_50626000.jpg"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"title": "Scientists use AI to design life-like enzymes from scratch",
|
25 |
+
"url": "https://www.nature.com/articles/d41586-025-00488-3",
|
26 |
+
"description": "Combined approach takes AI-engineered enzymes one step closer to practical applications.",
|
27 |
+
"authors": [
|
28 |
+
"Miryam Naddaf"
|
29 |
+
],
|
30 |
+
"date": "2025-02-13",
|
31 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00488-3/d41586-025-00488-3_50636094.jpg"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"title": "Royal Society will meet amid campaign to revoke Elon Musk\u2019s fellowship",
|
35 |
+
"url": "https://www.nature.com/articles/d41586-025-00486-5",
|
36 |
+
"description": "More than 1,300 scientists have signed a letter calling on the world\u2019s oldest science society to reassess the billionaire\u2019s membership following cuts to US science.",
|
37 |
+
"authors": [
|
38 |
+
"Holly Else"
|
39 |
+
],
|
40 |
+
"date": "2025-02-13",
|
41 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00486-5/d41586-025-00486-5_50635956.jpg"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"title": "Vaccine sceptic RFK Jr is now a powerful force in US science: what will he do?",
|
45 |
+
"url": "https://www.nature.com/articles/d41586-025-00439-y",
|
46 |
+
"description": "Kennedy has expressed support for some fields, but has also declared he\u2019d like a \u2018break\u2019 in infectious-disease research.",
|
47 |
+
"authors": [
|
48 |
+
"Amanda Heidt",
|
49 |
+
"Heidi Ledford"
|
50 |
+
],
|
51 |
+
"date": "2025-02-13",
|
52 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00439-y/d41586-025-00439-y_50621992.jpg"
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"title": "Are PhDs losing their lustre? Why fewer students are enrolling in doctoral degrees",
|
56 |
+
"url": "https://www.nature.com/articles/d41586-025-00425-4",
|
57 |
+
"description": "High living costs paired with stagnant stipends are being blamed for a drop in PhD enrolments in several countries.",
|
58 |
+
"authors": [
|
59 |
+
"Diana Kwon"
|
60 |
+
],
|
61 |
+
"date": "2025-02-13",
|
62 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00425-4/d41586-025-00425-4_50621644.jpg"
|
63 |
+
},
|
64 |
+
{
|
65 |
+
"title": "Quantum-computing technology that makes qubits from atoms wins mega investment",
|
66 |
+
"url": "https://www.nature.com/articles/d41586-025-00451-2",
|
67 |
+
"description": "Firms using \u2018neutral atoms\u2019 to create qubits are reentering the race to build useful quantum machines.",
|
68 |
+
"authors": [
|
69 |
+
"Elizabeth Gibney"
|
70 |
+
],
|
71 |
+
"date": "2025-02-13",
|
72 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00451-2/d41586-025-00451-2_50636260.jpg"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"title": "Cheap blood test detects pancreatic cancer before it spreads",
|
76 |
+
"url": "https://www.nature.com/articles/d41586-025-00438-z",
|
77 |
+
"description": "The deadly cancer is often not found until it has spread to other parts of the body.",
|
78 |
+
"authors": [
|
79 |
+
"Smriti Mallapaty"
|
80 |
+
],
|
81 |
+
"date": "2025-02-12",
|
82 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00438-z/d41586-025-00438-z_50621748.jpg"
|
83 |
+
},
|
84 |
+
{
|
85 |
+
"title": "How centuries of isolation shaped Greenlanders\u2019 unique genetics",
|
86 |
+
"url": "https://www.nature.com/articles/d41586-025-00443-2",
|
87 |
+
"description": "Centuries of isolation have given Greenlanders a genetic profile that includes Arctic-specific variants.",
|
88 |
+
"authors": [
|
89 |
+
"Freda Kreier"
|
90 |
+
],
|
91 |
+
"date": "2025-02-12",
|
92 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00443-2/d41586-025-00443-2_50625480.jpg"
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"title": "Record-breaking neutrino is most energetic ever detected",
|
96 |
+
"url": "https://www.nature.com/articles/d41586-025-00444-1",
|
97 |
+
"description": "Although still under construction, the sea-floor KM3NeT detector spotted a neutrino 20 times more powerful than any previously detected.",
|
98 |
+
"authors": [
|
99 |
+
"Davide Castelvecchi"
|
100 |
+
],
|
101 |
+
"date": "2025-02-12",
|
102 |
+
"image_url": "https://media.springernature.com/w290h158/magazine-assets/d41586-025-00444-1/d41586-025-00444-1_50625568.jpg"
|
103 |
+
}
|
104 |
+
]
|
105 |
+
|
106 |
+
Execution Result:
|
107 |
+
Number of articles fetched: 10
|
108 |
+
|
109 |
+
Sample articles:
|
110 |
+
|
111 |
+
1. Title: NASA embraced diversity. Trumpβs DEI purge is hitting space scientists hard
|
112 |
+
URL: https://www.nature.com/articles/d41586-025-00480-x
|
113 |
+
Description: Some researchers at NASA and outside it feel betrayed by the changes at the agency, which was known ...
|
114 |
+
Authors: Alexandra Witze
|
115 |
+
Date: 2025-02-14
|
116 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00480-x/d41586-025-00480-x_50636314.jpg
|
117 |
+
|
118 |
+
2. Title: Are the Trump teamβs actions affecting your research? How to contact Nature
|
119 |
+
URL: https://www.nature.com/articles/d41586-025-00479-4
|
120 |
+
Description: Use this form to share information with Natureβs news team, or to make suggestions for future covera...
|
121 |
+
Authors: No authors found
|
122 |
+
Date: 2025-02-13
|
123 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00479-4/d41586-025-00479-4_50626000.jpg
|
124 |
+
|
125 |
+
3. Title: Scientists use AI to design life-like enzymes from scratch
|
126 |
+
URL: https://www.nature.com/articles/d41586-025-00488-3
|
127 |
+
Description: Combined approach takes AI-engineered enzymes one step closer to practical applications....
|
128 |
+
Authors: Miryam Naddaf
|
129 |
+
Date: 2025-02-13
|
130 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00488-3/d41586-025-00488-3_50636094.jpg
|
131 |
+
|
132 |
+
4. Title: Royal Society will meet amid campaign to revoke Elon Muskβs fellowship
|
133 |
+
URL: https://www.nature.com/articles/d41586-025-00486-5
|
134 |
+
Description: More than 1,300 scientists have signed a letter calling on the worldβs oldest science society to rea...
|
135 |
+
Authors: Holly Else
|
136 |
+
Date: 2025-02-13
|
137 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00486-5/d41586-025-00486-5_50635956.jpg
|
138 |
+
|
139 |
+
5. Title: Vaccine sceptic RFK Jr is now a powerful force in US science: what will he do?
|
140 |
+
URL: https://www.nature.com/articles/d41586-025-00439-y
|
141 |
+
Description: Kennedy has expressed support for some fields, but has also declared heβd like a βbreakβ in infectio...
|
142 |
+
Authors: Amanda Heidt, Heidi Ledford
|
143 |
+
Date: 2025-02-13
|
144 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00439-y/d41586-025-00439-y_50621992.jpg
|
145 |
+
|
146 |
+
6. Title: Are PhDs losing their lustre? Why fewer students are enrolling in doctoral degrees
|
147 |
+
URL: https://www.nature.com/articles/d41586-025-00425-4
|
148 |
+
Description: High living costs paired with stagnant stipends are being blamed for a drop in PhD enrolments in sev...
|
149 |
+
Authors: Diana Kwon
|
150 |
+
Date: 2025-02-13
|
151 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00425-4/d41586-025-00425-4_50621644.jpg
|
152 |
+
|
153 |
+
7. Title: Quantum-computing technology that makes qubits from atoms wins mega investment
|
154 |
+
URL: https://www.nature.com/articles/d41586-025-00451-2
|
155 |
+
Description: Firms using βneutral atomsβ to create qubits are reentering the race to build useful quantum machine...
|
156 |
+
Authors: Elizabeth Gibney
|
157 |
+
Date: 2025-02-13
|
158 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00451-2/d41586-025-00451-2_50636260.jpg
|
159 |
+
|
160 |
+
8. Title: Cheap blood test detects pancreatic cancer before it spreads
|
161 |
+
URL: https://www.nature.com/articles/d41586-025-00438-z
|
162 |
+
Description: The deadly cancer is often not found until it has spread to other parts of the body....
|
163 |
+
Authors: Smriti Mallapaty
|
164 |
+
Date: 2025-02-12
|
165 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00438-z/d41586-025-00438-z_50621748.jpg
|
166 |
+
|
167 |
+
9. Title: How centuries of isolation shaped Greenlandersβ unique genetics
|
168 |
+
URL: https://www.nature.com/articles/d41586-025-00443-2
|
169 |
+
Description: Centuries of isolation have given Greenlanders a genetic profile that includes Arctic-specific varia...
|
170 |
+
Authors: Freda Kreier
|
171 |
+
Date: 2025-02-12
|
172 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00443-2/d41586-025-00443-2_50625480.jpg
|
173 |
+
|
174 |
+
10. Title: Record-breaking neutrino is most energetic ever detected
|
175 |
+
URL: https://www.nature.com/articles/d41586-025-00444-1
|
176 |
+
Description: Although still under construction, the sea-floor KM3NeT detector spotted a neutrino 20 times more po...
|
177 |
+
Authors: Davide Castelvecchi
|
178 |
+
Date: 2025-02-12
|
179 |
+
Image URL: https://media.springernature.com/w290h158/magazine-assets/d41586-025-00444-1/d41586-025-00444-1_50625568.jpg
|
180 |
+
Done!
|
octotools/tools/nature_news_fetcher/tool.py
ADDED
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
import time
|
5 |
+
|
6 |
+
from octotools.tools.base import BaseTool
|
7 |
+
|
8 |
+
class Nature_News_Fetcher_Tool(BaseTool):
|
9 |
+
def __init__(self):
|
10 |
+
super().__init__(
|
11 |
+
tool_name="Nature_News_Fetcher_Tool",
|
12 |
+
tool_description="A tool that fetches the latest news articles from Nature.",
|
13 |
+
tool_version="1.0.0",
|
14 |
+
input_types={
|
15 |
+
"num_articles": "int - The number of articles to fetch (default: 100).",
|
16 |
+
"max_pages": "int - The maximum number of pages to fetch (default: 5).",
|
17 |
+
},
|
18 |
+
output_type="list - A list of dictionaries containing information about the latest Nature news articles.",
|
19 |
+
demo_commands=[
|
20 |
+
{
|
21 |
+
"command": 'execution = tool.execute()',
|
22 |
+
"description": "Fetch the latest 100 news articles from Nature."
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"command": 'execution = tool.execute(num_articles=50, max_pages=3)',
|
26 |
+
"description": "Fetch the latest 50 news articles from Nature, searching up to 3 pages."
|
27 |
+
},
|
28 |
+
],
|
29 |
+
)
|
30 |
+
self.base_url = "https://www.nature.com/nature/articles"
|
31 |
+
|
32 |
+
def fetch_page(self, page_number):
|
33 |
+
"""
|
34 |
+
Fetches a single page of news articles from Nature's website.
|
35 |
+
|
36 |
+
Parameters:
|
37 |
+
page_number (int): The page number to fetch.
|
38 |
+
|
39 |
+
Returns:
|
40 |
+
str: The HTML content of the page.
|
41 |
+
"""
|
42 |
+
params = {
|
43 |
+
"searchType": "journalSearch",
|
44 |
+
"sort": "PubDate",
|
45 |
+
"type": "news",
|
46 |
+
"page": str(page_number)
|
47 |
+
}
|
48 |
+
headers = {
|
49 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
50 |
+
}
|
51 |
+
response = requests.get(self.base_url, params=params, headers=headers)
|
52 |
+
response.raise_for_status()
|
53 |
+
return response.text
|
54 |
+
|
55 |
+
def parse_articles(self, html_content):
|
56 |
+
"""
|
57 |
+
Parses the HTML content and extracts article information.
|
58 |
+
|
59 |
+
Parameters:
|
60 |
+
html_content (str): The HTML content of the page.
|
61 |
+
|
62 |
+
Returns:
|
63 |
+
list: A list of dictionaries containing article information.
|
64 |
+
"""
|
65 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
66 |
+
articles_section = soup.find('section', id='new-article-list')
|
67 |
+
if not articles_section:
|
68 |
+
return []
|
69 |
+
|
70 |
+
articles = []
|
71 |
+
for article in articles_section.find_all('article', class_='c-card'):
|
72 |
+
title_elem = article.find('h3', class_='c-card__title')
|
73 |
+
title = title_elem.text.strip() if title_elem else "No title found"
|
74 |
+
|
75 |
+
url_elem = title_elem.find('a') if title_elem else None
|
76 |
+
url = "https://www.nature.com" + url_elem['href'] if url_elem and 'href' in url_elem.attrs else "No URL found"
|
77 |
+
|
78 |
+
description_elem = article.find('div', {'data-test': 'article-description'})
|
79 |
+
description = description_elem.text.strip() if description_elem else "No description available"
|
80 |
+
|
81 |
+
authors_elem = article.find('ul', {'data-test': 'author-list'})
|
82 |
+
authors = [author.text.strip() for author in authors_elem.find_all('li')] if authors_elem else ["No authors found"]
|
83 |
+
|
84 |
+
date_elem = article.find('time')
|
85 |
+
date = date_elem['datetime'] if date_elem and 'datetime' in date_elem.attrs else "No date found"
|
86 |
+
|
87 |
+
image_elem = article.find('img')
|
88 |
+
image_url = image_elem['src'] if image_elem and 'src' in image_elem.attrs else "No image found"
|
89 |
+
|
90 |
+
articles.append({
|
91 |
+
'title': title,
|
92 |
+
'url': url,
|
93 |
+
'description': description,
|
94 |
+
'authors': authors,
|
95 |
+
'date': date,
|
96 |
+
'image_url': image_url
|
97 |
+
})
|
98 |
+
|
99 |
+
return articles
|
100 |
+
|
101 |
+
def execute(self, num_articles=100, max_pages=5):
|
102 |
+
"""
|
103 |
+
Fetches the latest news articles from Nature's website.
|
104 |
+
|
105 |
+
Parameters:
|
106 |
+
num_articles (int): The number of articles to fetch.
|
107 |
+
max_pages (int): The maximum number of pages to fetch.
|
108 |
+
|
109 |
+
Returns:
|
110 |
+
list: A list of dictionaries containing article information.
|
111 |
+
"""
|
112 |
+
all_articles = []
|
113 |
+
page_number = 1
|
114 |
+
|
115 |
+
try:
|
116 |
+
while len(all_articles) < num_articles and page_number <= max_pages:
|
117 |
+
html_content = self.fetch_page(page_number)
|
118 |
+
page_articles = self.parse_articles(html_content)
|
119 |
+
|
120 |
+
if not page_articles:
|
121 |
+
break # No more articles found
|
122 |
+
|
123 |
+
all_articles.extend(page_articles)
|
124 |
+
page_number += 1
|
125 |
+
time.sleep(1) # Be polite to the server
|
126 |
+
|
127 |
+
return all_articles[:num_articles]
|
128 |
+
except Exception as e:
|
129 |
+
return [{"error": str(e)}]
|
130 |
+
|
131 |
+
def get_metadata(self):
|
132 |
+
"""
|
133 |
+
Returns the metadata for the Nature_News_Fetcher_Tool.
|
134 |
+
|
135 |
+
Returns:
|
136 |
+
dict: A dictionary containing the tool's metadata.
|
137 |
+
"""
|
138 |
+
metadata = super().get_metadata()
|
139 |
+
return metadata
|
140 |
+
|
141 |
+
|
142 |
+
if __name__ == "__main__":
|
143 |
+
# Test command:
|
144 |
+
"""
|
145 |
+
Run the following commands in the terminal to test the script:
|
146 |
+
|
147 |
+
cd octotools/tools/nature_news_fetcher
|
148 |
+
python tool.py
|
149 |
+
"""
|
150 |
+
|
151 |
+
# Get the directory of the current script
|
152 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
153 |
+
|
154 |
+
# Example usage of the Nature_News_Fetcher_Tool
|
155 |
+
tool = Nature_News_Fetcher_Tool()
|
156 |
+
|
157 |
+
# Get tool metadata
|
158 |
+
metadata = tool.get_metadata()
|
159 |
+
print(metadata)
|
160 |
+
|
161 |
+
import json
|
162 |
+
|
163 |
+
|
164 |
+
# Execute the tool to fetch the latest 10 articles (for demonstration purposes)
|
165 |
+
try:
|
166 |
+
execution = tool.execute(num_articles=10, max_pages=1)
|
167 |
+
print(json.dumps(execution, indent=4))
|
168 |
+
print("\nExecution Result:")
|
169 |
+
print(f"Number of articles fetched: {len(execution)}")
|
170 |
+
print("\nSample articles:")
|
171 |
+
for i, article in enumerate(execution[:10], 1):
|
172 |
+
print(f"\n{i}. Title: {article['title']}")
|
173 |
+
print(f" URL: {article['url']}")
|
174 |
+
print(f" Description: {article['description'][:100]}...") # Show first 100 characters
|
175 |
+
print(f" Authors: {', '.join(article['authors'])}")
|
176 |
+
print(f" Date: {article['date']}")
|
177 |
+
print(f" Image URL: {article['image_url']}")
|
178 |
+
except Exception as e:
|
179 |
+
print(f"Execution failed: {e}")
|
180 |
+
|
181 |
+
print("Done!")
|
octotools/tools/object_detector/__init__.py
ADDED
File without changes
|
octotools/tools/object_detector/examples/baseball.png
ADDED
![]() |
octotools/tools/object_detector/test.log
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Could not load the custom kernel for multi-scale deformable attention: Command '['which', 'c++']' returned non-zero exit status 1.
|
2 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
3 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
4 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
5 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
6 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
7 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
8 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
9 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
10 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
11 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
12 |
+
Could not load the custom kernel for multi-scale deformable attention: /root/.cache/torch_extensions/py310_cu121/MultiScaleDeformableAttention/MultiScaleDeformableAttention.so: cannot open shared object file: No such file or directory
|
13 |
+
CUDA_HOME is not set
|
14 |
+
{'tool_name': 'Object_Detector_Tool', 'tool_description': 'A tool that detects objects in an image using the Grounding DINO model and saves individual object images with empty padding.', 'tool_version': '1.0.0', 'input_types': {'image': 'str - The path to the image file.', 'labels': 'list - A list of object labels to detect.', 'threshold': 'float - The confidence threshold for detection (default: 0.35).', 'model_size': "str - The size of the model to use ('tiny' or 'base', default: 'tiny').", 'padding': 'int - The number of pixels to add as empty padding around detected objects (default: 20).'}, 'output_type': 'list - A list of detected objects with their scores, bounding boxes, and saved image paths.', 'demo_commands': [{'command': 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])', 'description': 'Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths.'}, {'command': 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)', 'description': 'Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths.'}], 'require_llm_engine': False, 'user_metadata': {'limitation': 'The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification.'}}
|
15 |
+
Detected Objects:
|
16 |
+
Detected baseball with confidence 0.69
|
17 |
+
Bounding box: (558, 48, 615, 107)
|
18 |
+
Saved image (with padding): detected_objects/baseball_baseball_1.png
|
19 |
+
|
20 |
+
Detected baseball with confidence 0.69
|
21 |
+
Bounding box: (614, 137, 671, 191)
|
22 |
+
Saved image (with padding): detected_objects/baseball_baseball_2.png
|
23 |
+
|
24 |
+
Detected baseball with confidence 0.68
|
25 |
+
Bounding box: (132, 67, 189, 126)
|
26 |
+
Saved image (with padding): detected_objects/baseball_baseball_3.png
|
27 |
+
|
28 |
+
Detected baseball with confidence 0.68
|
29 |
+
Bounding box: (632, 67, 690, 126)
|
30 |
+
Saved image (with padding): detected_objects/baseball_baseball_4.png
|
31 |
+
|
32 |
+
Detected baseball with confidence 0.68
|
33 |
+
Bounding box: (57, 289, 115, 346)
|
34 |
+
Saved image (with padding): detected_objects/baseball_baseball_5.png
|
35 |
+
|
36 |
+
Detected baseball with confidence 0.68
|
37 |
+
Bounding box: (535, 111, 592, 170)
|
38 |
+
Saved image (with padding): detected_objects/baseball_baseball_6.png
|
39 |
+
|
40 |
+
Detected baseball with confidence 0.68
|
41 |
+
Bounding box: (307, 48, 365, 107)
|
42 |
+
Saved image (with padding): detected_objects/baseball_baseball_7.png
|
43 |
+
|
44 |
+
Detected baseball with confidence 0.68
|
45 |
+
Bounding box: (114, 137, 171, 191)
|
46 |
+
Saved image (with padding): detected_objects/baseball_baseball_8.png
|
47 |
+
|
48 |
+
Detected baseball with confidence 0.68
|
49 |
+
Bounding box: (35, 351, 91, 410)
|
50 |
+
Saved image (with padding): detected_objects/baseball_baseball_9.png
|
51 |
+
|
52 |
+
Detected baseball with confidence 0.68
|
53 |
+
Bounding box: (57, 48, 115, 107)
|
54 |
+
Saved image (with padding): detected_objects/baseball_baseball_10.png
|
55 |
+
|
56 |
+
Detected baseball with confidence 0.68
|
57 |
+
Bounding box: (35, 111, 91, 170)
|
58 |
+
Saved image (with padding): detected_objects/baseball_baseball_11.png
|
59 |
+
|
60 |
+
Detected baseball with confidence 0.68
|
61 |
+
Bounding box: (364, 137, 421, 191)
|
62 |
+
Saved image (with padding): detected_objects/baseball_baseball_12.png
|
63 |
+
|
64 |
+
Detected baseball with confidence 0.68
|
65 |
+
Bounding box: (114, 377, 171, 430)
|
66 |
+
Saved image (with padding): detected_objects/baseball_baseball_13.png
|
67 |
+
|
68 |
+
Detected baseball with confidence 0.67
|
69 |
+
Bounding box: (132, 307, 189, 366)
|
70 |
+
Saved image (with padding): detected_objects/baseball_baseball_14.png
|
71 |
+
|
72 |
+
Detected baseball with confidence 0.67
|
73 |
+
Bounding box: (285, 111, 342, 170)
|
74 |
+
Saved image (with padding): detected_objects/baseball_baseball_15.png
|
75 |
+
|
76 |
+
Detected baseball with confidence 0.67
|
77 |
+
Bounding box: (382, 67, 439, 126)
|
78 |
+
Saved image (with padding): detected_objects/baseball_baseball_16.png
|
79 |
+
|
80 |
+
Detected baseball with confidence 0.65
|
81 |
+
Bounding box: (587, 94, 643, 153)
|
82 |
+
Saved image (with padding): detected_objects/baseball_baseball_17.png
|
83 |
+
|
84 |
+
Detected baseball with confidence 0.65
|
85 |
+
Bounding box: (86, 94, 143, 153)
|
86 |
+
Saved image (with padding): detected_objects/baseball_baseball_18.png
|
87 |
+
|
88 |
+
Detected baseball with confidence 0.65
|
89 |
+
Bounding box: (86, 335, 143, 393)
|
90 |
+
Saved image (with padding): detected_objects/baseball_baseball_19.png
|
91 |
+
|
92 |
+
Detected baseball with confidence 0.63
|
93 |
+
Bounding box: (336, 95, 393, 153)
|
94 |
+
Saved image (with padding): detected_objects/baseball_baseball_20.png
|
95 |
+
|
96 |
+
Detected basket with confidence 0.59
|
97 |
+
Bounding box: (252, 2, 468, 215)
|
98 |
+
Saved image (with padding): detected_objects/baseball_basket_1.png
|
99 |
+
|
100 |
+
Detected basket with confidence 0.55
|
101 |
+
Bounding box: (503, 2, 717, 215)
|
102 |
+
Saved image (with padding): detected_objects/baseball_basket_2.png
|
103 |
+
|
104 |
+
Detected basket with confidence 0.54
|
105 |
+
Bounding box: (2, 2, 217, 215)
|
106 |
+
Saved image (with padding): detected_objects/baseball_basket_3.png
|
107 |
+
|
108 |
+
Detected basket with confidence 0.5
|
109 |
+
Bounding box: (2, 242, 217, 455)
|
110 |
+
Saved image (with padding): detected_objects/baseball_basket_4.png
|
111 |
+
|
112 |
+
Done!
|
octotools/tools/object_detector/tool.py
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grounding DINO Object Detection Tool
|
2 |
+
# https://huggingface.co/IDEA-Research/grounding-dino
|
3 |
+
|
4 |
+
import os
|
5 |
+
import time
|
6 |
+
import torch
|
7 |
+
from transformers import pipeline
|
8 |
+
|
9 |
+
from octotools.tools.base import BaseTool
|
10 |
+
from PIL import Image, ImageOps
|
11 |
+
|
12 |
+
import os
|
13 |
+
# If CUDA_HOME is set, print the value
|
14 |
+
print(os.environ.get('CUDA_HOME', 'CUDA_HOME is not set'))
|
15 |
+
|
16 |
+
# Suppress stderr by redirecting it to /dev/null
|
17 |
+
import sys
|
18 |
+
sys.stderr = open(os.devnull, 'w')
|
19 |
+
|
20 |
+
import warnings
|
21 |
+
warnings.filterwarnings("ignore")
|
22 |
+
|
23 |
+
|
24 |
+
class Object_Detector_Tool(BaseTool):
|
25 |
+
def __init__(self):
|
26 |
+
super().__init__(
|
27 |
+
tool_name="Object_Detector_Tool",
|
28 |
+
tool_description="A tool that detects objects in an image using the Grounding DINO model and saves individual object images with empty padding.",
|
29 |
+
tool_version="1.0.0",
|
30 |
+
input_types={
|
31 |
+
"image": "str - The path to the image file.",
|
32 |
+
"labels": "list - A list of object labels to detect.",
|
33 |
+
"threshold": "float - The confidence threshold for detection (default: 0.35).",
|
34 |
+
"model_size": "str - The size of the model to use ('tiny' or 'base', default: 'tiny').",
|
35 |
+
"padding": "int - The number of pixels to add as empty padding around detected objects (default: 20)."
|
36 |
+
},
|
37 |
+
output_type="list - A list of detected objects with their scores, bounding boxes, and saved image paths.",
|
38 |
+
demo_commands=[
|
39 |
+
{
|
40 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["baseball", "basket"])',
|
41 |
+
"description": "Detect baseball and basket in an image, save the detected objects with default empty padding, and return their paths."
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"command": 'execution = tool.execute(image="path/to/image.png", labels=["car", "person"], threshold=0.5, model_size="base", padding=15)',
|
45 |
+
"description": "Detect car and person in an image using the base model, save the detected objects with 15 pixels of empty padding, and return their paths."
|
46 |
+
}
|
47 |
+
],
|
48 |
+
user_metadata={
|
49 |
+
"limitation": "The model may not always detect objects accurately, and its performance can vary depending on the input image and the associated labels. It typically struggles with detecting small objects, objects that are uncommon, or objects with limited or specific attributes. For improved accuracy or better detection in certain situations, consider using supplementary tools or image processing techniques to provide additional information for verification."
|
50 |
+
}
|
51 |
+
)
|
52 |
+
|
53 |
+
def preprocess_caption(self, caption):
|
54 |
+
result = caption.lower().strip()
|
55 |
+
if result.endswith("."):
|
56 |
+
return result
|
57 |
+
return result + "."
|
58 |
+
|
59 |
+
def build_tool(self, model_size='tiny'):
|
60 |
+
model_name = f"IDEA-Research/grounding-dino-{model_size}"
|
61 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
62 |
+
try:
|
63 |
+
pipe = pipeline(model=model_name, task="zero-shot-object-detection", device=device)
|
64 |
+
return pipe
|
65 |
+
except Exception as e:
|
66 |
+
print(f"Error building the Object Detection tool: {e}")
|
67 |
+
return None
|
68 |
+
|
69 |
+
def save_detected_object(self, image, box, image_name, label, index, padding):
|
70 |
+
object_image = image.crop(box)
|
71 |
+
padded_image = ImageOps.expand(object_image, border=padding, fill='white')
|
72 |
+
|
73 |
+
filename = f"{image_name}_{label}_{index}.png"
|
74 |
+
os.makedirs(self.output_dir, exist_ok=True)
|
75 |
+
save_path = os.path.join(self.output_dir, filename)
|
76 |
+
|
77 |
+
padded_image.save(save_path)
|
78 |
+
return save_path
|
79 |
+
|
80 |
+
def execute(self, image, labels, threshold=0.35, model_size='tiny', padding=20, max_retries=10, retry_delay=5, clear_cuda_cache=False):
|
81 |
+
for attempt in range(max_retries):
|
82 |
+
try:
|
83 |
+
saved_files = []
|
84 |
+
|
85 |
+
pipe = self.build_tool(model_size)
|
86 |
+
if pipe is None:
|
87 |
+
raise ValueError("Failed to build the Object Detection tool.")
|
88 |
+
|
89 |
+
preprocessed_labels = [self.preprocess_caption(label) for label in labels]
|
90 |
+
results = pipe(image, candidate_labels=preprocessed_labels, threshold=threshold)
|
91 |
+
|
92 |
+
formatted_results = []
|
93 |
+
original_image = Image.open(image)
|
94 |
+
image_name = os.path.splitext(os.path.basename(image))[0]
|
95 |
+
|
96 |
+
object_counts = {}
|
97 |
+
|
98 |
+
for result in results:
|
99 |
+
box = tuple(result["box"].values())
|
100 |
+
label = result["label"]
|
101 |
+
score = round(result["score"], 2)
|
102 |
+
if label.endswith("."):
|
103 |
+
label = label[:-1]
|
104 |
+
|
105 |
+
object_counts[label] = object_counts.get(label, 0) + 1
|
106 |
+
index = object_counts[label]
|
107 |
+
|
108 |
+
save_path = self.save_detected_object(original_image, box, image_name, label, index, padding)
|
109 |
+
|
110 |
+
formatted_results.append({
|
111 |
+
"label": label,
|
112 |
+
"confidence score": score,
|
113 |
+
"box": box,
|
114 |
+
"saved_image_path": save_path
|
115 |
+
})
|
116 |
+
|
117 |
+
return formatted_results
|
118 |
+
|
119 |
+
except RuntimeError as e:
|
120 |
+
if "CUDA out of memory" in str(e):
|
121 |
+
print(f"CUDA out of memory error on attempt {attempt + 1}.")
|
122 |
+
if clear_cuda_cache:
|
123 |
+
print("Clearing CUDA cache and retrying...")
|
124 |
+
torch.cuda.empty_cache()
|
125 |
+
else:
|
126 |
+
print(f"Retrying in {retry_delay} seconds...")
|
127 |
+
time.sleep(retry_delay)
|
128 |
+
continue
|
129 |
+
else:
|
130 |
+
print(f"Runtime error: {e}")
|
131 |
+
break
|
132 |
+
except Exception as e:
|
133 |
+
print(f"Error detecting objects: {e}")
|
134 |
+
break
|
135 |
+
|
136 |
+
print(f"Failed to detect objects after {max_retries} attempts.")
|
137 |
+
return []
|
138 |
+
|
139 |
+
def get_metadata(self):
|
140 |
+
metadata = super().get_metadata()
|
141 |
+
return metadata
|
142 |
+
|
143 |
+
if __name__ == "__main__":
|
144 |
+
# Test command:
|
145 |
+
"""
|
146 |
+
Run the following commands in the terminal to test the script:
|
147 |
+
|
148 |
+
cd octotools/tools/object_detector
|
149 |
+
python tool.py
|
150 |
+
"""
|
151 |
+
|
152 |
+
# Get the directory of the current script
|
153 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
154 |
+
|
155 |
+
# Example usage of the Object_Detector_Tool
|
156 |
+
tool = Object_Detector_Tool()
|
157 |
+
tool.set_custom_output_dir("detected_objects")
|
158 |
+
|
159 |
+
# Get tool metadata
|
160 |
+
metadata = tool.get_metadata()
|
161 |
+
print(metadata)
|
162 |
+
|
163 |
+
# Construct the full path to the image using the script's directory
|
164 |
+
relative_image_path = "examples/baseball.png"
|
165 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
166 |
+
|
167 |
+
# Execute the tool
|
168 |
+
try:
|
169 |
+
execution = tool.execute(image=image_path, labels=["baseball", "basket"], padding=20)
|
170 |
+
print("Detected Objects:")
|
171 |
+
for obj in execution:
|
172 |
+
print(f"Detected {obj['label']} with confidence {obj['confidence score']}")
|
173 |
+
print(f"Bounding box: {obj['box']}")
|
174 |
+
print(f"Saved image (with padding): {obj['saved_image_path']}")
|
175 |
+
print()
|
176 |
+
except ValueError as e:
|
177 |
+
print(f"Execution failed: {e}")
|
178 |
+
|
179 |
+
print("Done!")
|
octotools/tools/pubmed_search/__init__.py
ADDED
File without changes
|
octotools/tools/pubmed_search/test.log
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
2025-02-14 19:03:45 06a0c5b0e177 metapub.config[158535] WARNING NCBI_API_KEY was not set.
|
2 |
+
[{'title': 'Safety of health workers during the COVID-19 pandemic and beyond: piloting WHO framework in Iran.', 'abstract': "BACKGROUND: Health Workers Safety (HWS) is a global health priority and essential at all times, in stable situations, in emergencies, in disease epidemics or pandemics. This study aimed to assess HWS during the COVID-19 Pandemic.\nMETHODS: This cross-sectional study was conducted in 2022 in east Azerbaijan province, Iran. HWS was assessed based on 22 indicators suggested by WHO EMRO. We selected 15 PHC facilities and six wards from two hospitals randomly. Data collected (qualitative and quantitative) using national digital health records, staff records, and indicator-specific tools. In addition to measuring the indicator's value, the indicators' feasibility was also assessed. Descriptive and inferential statistics with SPSS-16 were used for data analysis.\nRESULTS: Totally, 325 Health Workers (HWs) (218 from PHC facilities and 107 from hospitals) participated in the study. Most of the participants in PHC facilities and hospitals were Community Health Workers (CHWs) (Moragheb Salamat) (45.4%) and nurses (37.38%), respectively. Most of HWs had completed the full vaccination schedule for Hepatitis B and COVID-19. Personal Protective Equipment (PPE) safety protocols were adhered by most of HWs within a healthcare facility. None of managers had attended nationally certified training for mental health support for health and care workers. Less than 20% of HWs participated in the work burnout prevention courses and most of HWs complained about work overload, or burnout. The job satisfaction level of hospital HWs (60.20%) was significantly higher than that of HWs from PHC facilities (57.18%) (P\u2009<\u20090.001).\nCONCLUSION: Even though the mental health of HWs was not as expected, the indicators related to physical health and occupational health were at a suitable level. Also, there is not a system in PHC to audit the application of safety measures to mitigate the risk of contracting COVID-19. We recommend creating a specific system (precise and detailed) for HWs' safety and applying safety measures in the PHC routine programs.", 'keywords': ['Assessment', 'COVID-19 pandemic', 'Health worker', 'Iran', 'Safety'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39920792'}, {'title': 'Experiences of working as a clinical nurse while pregnant during the coronavirus disease-2019 pandemic: a qualitative study.', 'abstract': 'BACKGROUND: Working as a pregnant clinical nurse might experience a range of challenges, such as significant anatomical and physiological changes as well as emotional and cognitive changes. That might be particularly obvious under the historical background of coronavirus disease-2019 (COVID-19) pandemic. However, a dearth of studies has explored the experiences of working as a pregnant nurse during this special period. This study aimed to explore the experiences of working as a clinical nurse while pregnant during the COVID-19 pandemic.\nMETHODS: A descriptive qualitative design was selected. Purposive sampling, combined with maximum variation strategy and snowball sampling, were utilized to identify and select participants from tertiary-teaching hospitals, specialized hospitals, and community hospitals in Zhejiang Province, southeastern China. Online semi-structured individual interviews were used to collect data, and conventional content analysis was used to analyze the data.\nRESULTS: Eleven Chinese nurses with a mean age of 31.8 years, ranging from 26 to 40 years, participated in this study. Four themes and twelve subthemes emerged: (1) still adhering to work as a clinical nurse despite being pregnant during the pandemic; (2) working during pregnancy under pandemic is still an ordinary nurse; (3) still staying in the special life phase as a pregnant mother; and (4) growth and gains as pregnant mother.\nCONCLUSION: The pregnant clinical nurses suffered from various changes and difficulties during the pandemic. Managers, occupational health and other health system leaders, and policymakers should be aware of the importance of establishing a work environment that guarantees safe continued pregnancy. Future studies should focus on the establishment of specific guidelines and manuals regarding how pregnant nurses worked, as well as the development of self-protection interventions during pregnancy. Moreover, research on moral stigma and bullying in nursing during pregnancy deserves further exploration.\nCLINICAL TRIAL NUMBER: Not applicable.', 'keywords': ['COVID-19 pandemic', 'Experiences', 'Nurse', 'Pregnant', 'Qualitative research'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39901239'}, {'title': "Development and psychometric validation of the frontline health workers' occupational risk and characteristics in emergencies index (FORCE-index) - The covid Hospital cohort study.", 'abstract': "OBJECTIVES: A lack of tools for the systematic identification of frontline health workers' changing occupational risks, characteristics, and needs, poses a major barrier to supporting vital personnel to stay in practice through health emergencies and beyond. The current study reports on the development and psychometric evaluation of the Frontline health workers' Occupational Risk and Characteristics in Emergencies index (FORCE-index).\nSTUDY DESIGN: The Covid hospital study is a large, multisite, four-wave, open cohort study of frontline health workers responding to the first four waves of the COVID-19 pandemic (2020-2022).\nMETHODS: 2496 frontline health workers responded to questionnaires assessing various aspects of their work environment. Using exploratory factor analysis, we estimated the latent structure of the FORCE-index at the first and second waves. This structure was evaluated using confirmatory factor analysis at the third and fourth waves. The internal consistency of the instrument's subscales (e.g., factors) was evaluated using omega reliability, Cronbach's alpha coefficient, and mean inter-item correlation.\nRESULTS: A nine-factor solution provided best fit to the data. These factors mapped onto the following aspects of the work environment; competency, stress management, familiarity, workload manageability, work performance, infection safety, personal protective equipment, social safety, and social support. Internal consistency for the full FORCE-index and the nine factors was satisfactory.\nCONCLUSIONS: The initial psychometric validation indicates that the FORCE-index is a valid measure which can be used by health authorities, services, and institutions to adequately and systematically assess central aspects of frontline health workers' work environment that are commonly challenged in health emergencies.", 'keywords': ['Covid-19', 'Frontline', 'Health Personnel', 'Health care worker', 'Health emergency', 'Health response', 'Hospital', 'Index', 'Infectious outbreak', 'Job characteristics', 'Occupational Health', 'Occupational diseases', 'Occupational exposure', 'Occupational stress', 'Preparedness', 'Preparedness planning', 'Psychometric', 'Scale', 'Stress', 'Work environment'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39896339'}, {'title': "A descriptive analysis of nurses' self-reported mental health symptoms during the COVID-19 pandemic: An international study.", 'abstract': "AIM: To describe the self-reported mental health of nurses from 35 countries who worked during the COVID-19 pandemic.\nBACKGROUND: There is little occupationally specific data about nurses' mental health worldwide. Studies have documented the impact on nurses' mental health of the COVID-19 pandemic, but few have baseline referents.\nMETHODS: A descriptive, cross-sectional design structured the study. Data reflect a convenience sample of\xa09,387 participants who completed the opt-in survey between July 31, 2022, and October 31, 2023. Descriptive statistics were run to analyze the following variables associated with mental health: Self-reports of mental health symptoms, burnout, personal losses during the pandemic, access to mental health services, and self-care practices used to cope with pandemic-related stressors. Reporting of this study was steered by the STROBE guideline for quantitative studies.\nRESULTS: Anxiety or depression occurred at rates ranging from 23%-61%, with country-specific trends in reporting observed. Approximately 18% of the sample reported experiencing some symptoms of burnout. The majority of nurses' employers did not provide mental health support in the workplace. Most reported more frequently engaging with self-care practices compared with before the pandemic. Notably, 20% of nurses suffered the loss of a family member, 35% lost a friend, and 34% a coworker due to COVID-19. Nearly half (48%) reported experiencing public aggression due to their identity as a nurse.\nCONCLUSIONS: The data obtained establish a basis for understanding the specific mental health needs of the nursing workforce globally, highlighting key areas for service development.\nIMPLICATIONS FOR NURSING POLICY: Healthcare organizations and governmental bodies need to develop targeted mental health support programs that are readily accessible to nurses to foster a resilient nursing workforce.", 'keywords': ['COVIDβ19', 'global health', 'health workforce', 'nursing', 'nursing shortage', 'occupational health', 'occupational health nursing', 'pandemics'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39871528'}, {'title': 'Work Psychology and Occupational Health: An Editorial.', 'abstract': "Globally, the COVID-19 pandemic has severely impacted workers' health, particularly their mental well-being [...].", 'keywords': [], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39857553'}, {'title': 'Analysis of HFE impact of COVID-19 on OHS in construction enterprises.', 'abstract': "Human factors are critical to Occupational Health and Safety (OHS) in construction enterprises. However, comprehensive industry-wide recognition remains challenging, underscoring the need for Human Factors Engineering (HFE) research. This study develops an optimized HFE evaluation model based on fundamental HFE principles. Examining COVID-19's significant impact on construction enterprise OHS, this research employs an empirical investigation of 259 cases, utilizing a model that integrates NetLogo's System Dynamics (SD) and Multiple Linear Regression (MLR) to analyze the interactions between human factors and other variables. The findings reveal four key factors influencing human factors: management, material, environmental, and methodological. These factors demonstrate a quadratic parabolic relationship, with peak influence occurring at step 36 of the research period. Twelve of the 20 survey factors exhibit a linear regression relationship with human factors' four sub-factors, with pre-job training (Q<sub>9</sub>) demonstrating multiple influential interactions. The strongest correlation is between pre-job training (Q<sub>9</sub>) and living materials (Q<sub>14</sub>), with a weight coefficient of .325. Psychological counseling (Q<sub>8</sub>) and living materials (Q<sub>14</sub>) show a close relationship (weight coefficient .301). Notably, Q<sub>9</sub> and empirical prevention materials (Q<sub>11</sub>) display a negative correlation with a weight coefficient of -.156. This study's practical significance lies in enabling enterprises to identify key HFE control factors and understand critical sub-factors for mitigating COVID-19's adverse impacts.", 'keywords': ['COVID-19', 'Human factors engineering (HFE)', 'Multiple linear regression (MLR)', 'NetLogo', 'Occupational health and safety (OHS)', 'System dynamics (SD)'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39811363'}, {'title': 'COVID-19 workplace countermeasures that occupational physicians could not change in Japan: a qualitative study.', 'abstract': 'BACKGROUND: During the COVID-19 pandemic, information and circumstances changed from moment to moment, including the accumulation of scientific knowledge, the emergence of variants, social tolerance, and government policy. Therefore, it was important to adapt workplace countermeasures punctually and flexibly based on scientific evidence and according to circumstances. However, there has been no assessment of changes in workplace countermeasures. With a view toward preparedness for future pandemics, we surveyed COVID-19 workplace countermeasures that occupational physicians considered as needing to be changed but went unchanged.\nMETHODS: We invited 685 professional occupational physicians certified by Japan Society for Occupational Health to complete an online questionnaire by sending postcards with QR codes. The main questions concerned countermeasures that the participants wanted to change but could not. The survey period was from February 21 to April 28, 2022. The responses were analyzed using the KJ method.\nRESULTS: Of the 168 invitees (24.5%) who responded to the survey, 125 reported countermeasures that needed to be changed (total count: 254). The responses were categorized into basic systems, occupational health services, workplace countermeasures, vaccines, and incidents, with a code count of 7, 8,147, 10, and 82, respectively. The type of countermeasure was 115 for countermeasures to be strengthened (CBS), 110 for measures to be mitigated (CBM), and 29 for neither.\nCONCLUSIONS: Often-mentioned CBS were increased teleworking, strengthened ventilation, smoking cessation, and promotion of vaccines. Often-mentioned CBM were relaxation of protective equipment rules, discontinued environmental disinfection, and shorted isolation and reinstatement. In the early pandemic phases, CBSs were frequently mentioned, whereas CBMs were featured more prominently in the latter phases. The survey revealed countermeasures that occupational physicians thought needed to be changed but were not changed in practice. For future responses to emerging and reemerging infectious diseases, it will be necessary to establish rules compatible with flexible modification of workplace countermeasures in response to changing circumstances.', 'keywords': ['COVID-19', 'Japan', 'Occupational health', 'Occupational physician', 'Workers', 'Workplace', 'Workplace countermeasures'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39780108'}, {'title': 'Rapid COVID-19 Testing of Symptomatic Health Care Personnel: A Strategy for Safely Maintaining the Workforce.', 'abstract': 'Determine performance characteristics and safety outcomes of two rapid COVID-19 screening methods to inform immediate return to work (RTW) decisions while (health care personnel) HCP await results of pending confirmatory laboratory test. Retrospective, occupational health quality improvement study comparing screening with rapid SARS-CoV-2 nucleic acid amplification (NAAT) and antigen test. 531 mildly symptomatic HCP screened over 16 months. Until more accurate affordable NAAT tests become available, antigen test screening alone addresses simultaneous needs to minimize COVID-19 transmission from symptomatic HCP and maintain an adequate workforce.', 'keywords': [], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39739739'}, {'title': 'Satisfaction and Workload as Predictors of Psychological Distress in Professionals of Psychosocial Care Centers During the COVID-19 Pandemic.', 'abstract': 'BACKGROUND AND AIMS: The COVID-19 pandemic significantly impacted the mental health of healthcare professionals, especially those working in Psychosocial Care Centers (CAPS), which are crucial services in the Brazilian mental health system. This study aimed to investigate the association between job satisfaction, workload, and psychological distress among CAPS professionals during the pandemic.\nMETHODS: A cross-sectional study was conducted with 53 professionals from seven CAPS. The Workload Impact Scale (IMPACTO-BR) and Job Satisfaction Scale (SATIS-BR), the General Health Questionnaire (GHQ-12), and a sociodemographic questionnaire were used. Descriptive and analytical statistical analyses were performed. Multiple linear regression analysis was conducted to examine the relationship between job satisfaction, workload, and psychological distress.\nRESULTS: Professionals reported moderate satisfaction (3.67 Β± 0.45) and mild workload (1.82 Β± 0.63). One-third of the sample showed scores indicative of psychological distress. Multiple linear regression analysis revealed that workload (p = 0.0025) and low job satisfaction (p = 0.0495) were significantly associated with psychological distress.\nCONCLUSIONS: Low job satisfaction and high professional workload were predictive variables of psychological distress. These findings highlight the need for investments in promoting the quality of life at work for mental health professionals, especially during crises. The implications for human resource management and public policy development emphasize the importance of an integrated approach that considers the well-being of professionals for the effectiveness and sustainability of the psychosocial care model.', 'keywords': ['COVID-19', 'health personnel', 'job satisfaction', 'mental health', 'mental health services', 'occupational health'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39728651'}, {'title': 'Occupational-related risk of testing SARS-CoV-2 positive for publicly employed medical doctors in Sweden: A nationwide cohort study.', 'abstract': 'AIMS: Doctors have an increased risk of SARS-CoV-2 infection caused by exposure to contagious patients. We aimed to identify which clinical specialities among medical doctors had the highest occupation-related risk of testing positive for SARS-CoV-2, utilizing data for all publicly employed medical doctors in Sweden.\nMETHODS: Data regarding positive SARS-CoV-2 test results and employment for publicly employed doctors in Sweden were divided into three observation periods: 1) 1 February to 31 December 2020, 2) 1 January to 30 June 2021 and 3) 1 July 2021 to 31 March 2022. Individuals were stratified according to occupation clinic and compared with clinical occupations with little to no patient contact. The risk of testing positive for SARS-CoV-2 was estimated using Cox proportional hazards regression, with sex, age and vaccination status as covariates.\nRESULTS: The study cohort included all publicly employed doctors in Sweden: 35,028 individuals. In the first period, Infectious Disease doctors had the highest incidence of SARS-CoV-2 positive tests, with an incidence of 20.2 %, compared with 8.7 % in the reference group, and an adjusted hazard ratio of 2.5 (95% confidence interval 2.02-3.04), which decreased during period 2-3. Doctors in Geriatric Medicine had an elevated risk throughout the whole study period.\nCONCLUSIONS: Our study shows an association between working in a speciality that involves caring for contagious COVID-19 patients, which raises concerns about infection control measures and routines being insufficient to prevent occupational infection in future pandemics.', 'keywords': ['COVID-19', 'Occupational health', 'SARS-CoV-2', 'healthcare workers', 'medical doctors', 'risk factors'], 'url': 'https://ncbi.nlm.nih.gov/pubmed/39726065'}]
|
3 |
+
Done!
|
octotools/tools/pubmed_search/tool.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
from pymed import PubMed
|
4 |
+
from metapub import PubMedFetcher
|
5 |
+
from octotools.tools.base import BaseTool
|
6 |
+
from tenacity import (
|
7 |
+
retry,
|
8 |
+
stop_after_attempt,
|
9 |
+
wait_random_exponential,
|
10 |
+
)
|
11 |
+
|
12 |
+
# Suppress stderr by redirecting it to /dev/null
|
13 |
+
import sys
|
14 |
+
sys.stderr = open(os.devnull, 'w')
|
15 |
+
|
16 |
+
import warnings
|
17 |
+
warnings.filterwarnings("ignore")
|
18 |
+
|
19 |
+
|
20 |
+
class Pubmed_Search_Tool(BaseTool):
|
21 |
+
def __init__(self):
|
22 |
+
super().__init__(
|
23 |
+
tool_name="Pubmed_Search_Tool",
|
24 |
+
tool_description="A tool that searches PubMed Central to retrieve relevant article abstracts based on a given list of text queries. Use this ONLY if you cannot use the other more specific ontology tools.",
|
25 |
+
tool_version="1.0.0",
|
26 |
+
input_types={
|
27 |
+
"queries": "list[str] - list of queries terms for searching PubMed."
|
28 |
+
},
|
29 |
+
output_type="list - List of items matching the search query. Each item consists of the title, abstract, keywords, and URL of the article. If no results found, a string message is returned.",
|
30 |
+
demo_commands=[
|
31 |
+
{
|
32 |
+
"command": 'execution = tool.execute(queries=["scoliosis", "injury"])',
|
33 |
+
"description": "Search for PubMed articles mentioning 'scoliosis' OR 'injury'."
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"command": 'execution = tool.execute(queries=["COVID", "vaccine", "occupational health"])',
|
37 |
+
"description": "Search for PubMed articles mentioning 'COVID' OR 'vaccine' OR 'occupational health'."
|
38 |
+
}
|
39 |
+
],
|
40 |
+
user_metadata={
|
41 |
+
'limitations': "Try to use shorter and more general search queries."
|
42 |
+
}
|
43 |
+
)
|
44 |
+
self.pubmed = PubMed(tool="MyTool", email="[email protected]")
|
45 |
+
self.fetch = PubMedFetcher()
|
46 |
+
|
47 |
+
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(3))
|
48 |
+
def search_query(self, query_str, max_results=10):
|
49 |
+
return self.pubmed.query(query_str, max_results=max_results)
|
50 |
+
|
51 |
+
def execute(self, queries, max_results=10):
|
52 |
+
try:
|
53 |
+
query_str = f"({'[Title/Abstract] OR '.join(queries) + '[Title/Abstract]'}) AND hasabstract[All Fields] AND fha[Filter]"
|
54 |
+
max_results = min(max_results, 50)
|
55 |
+
|
56 |
+
results = self.search_query(query_str, max_results=max_results) # API can only get most recent
|
57 |
+
|
58 |
+
items = []
|
59 |
+
for article in results:
|
60 |
+
try:
|
61 |
+
article = json.loads(article.toJSON())
|
62 |
+
pubmed_id = article['pubmed_id'] # get id using pymed then get content using metapub
|
63 |
+
|
64 |
+
article = self.fetch.article_by_pmid(pubmed_id)
|
65 |
+
items.append({
|
66 |
+
'title': article.title,
|
67 |
+
'abstract': article.abstract,
|
68 |
+
'keywords': article.keywords,
|
69 |
+
'url': article.url
|
70 |
+
})
|
71 |
+
except:
|
72 |
+
continue
|
73 |
+
|
74 |
+
if len(items) == 0:
|
75 |
+
return "No results found for search query. Try another query or tool."
|
76 |
+
|
77 |
+
return items
|
78 |
+
|
79 |
+
except Exception as e:
|
80 |
+
print(f"Error searching PubMed: {e}")
|
81 |
+
return []
|
82 |
+
|
83 |
+
def get_metadata(self):
|
84 |
+
metadata = super().get_metadata()
|
85 |
+
return metadata
|
86 |
+
|
87 |
+
if __name__ == "__main__":
|
88 |
+
# Test command:
|
89 |
+
"""
|
90 |
+
Run the following commands in the terminal to test the script:
|
91 |
+
|
92 |
+
cd octotools/tools/pubmed_search
|
93 |
+
python tool.py
|
94 |
+
"""
|
95 |
+
|
96 |
+
# Get the directory of the current script
|
97 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
98 |
+
|
99 |
+
# Example usage
|
100 |
+
tool = Pubmed_Search_Tool()
|
101 |
+
|
102 |
+
# Queries
|
103 |
+
queries = ["COVID occupational health"]
|
104 |
+
|
105 |
+
# Execute the tool
|
106 |
+
try:
|
107 |
+
execution = tool.execute(queries=queries)
|
108 |
+
print(execution)
|
109 |
+
except ValueError as e:
|
110 |
+
print(f"Execution failed: {e}")
|
111 |
+
|
112 |
+
print("Done!")
|
octotools/tools/python_code_generator/__init__.py
ADDED
File without changes
|
octotools/tools/python_code_generator/test.log
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Initializing Python_Code_Generator_Tool with model_string: gpt-4o-mini
|
3 |
+
!! Cache enabled for model: gpt-4o-mini
|
4 |
+
|
5 |
+
Initializing Python_Code_Generator_Tool with model_string: gpt-4o-mini
|
6 |
+
!! Cache enabled for model: gpt-4o-mini
|
7 |
+
{'tool_name': 'Python_Code_Generator_Tool', 'tool_description': 'A tool that generates and executes simple Python code snippets for basic arithmetical calculations and math-related problems. The generated code runs in a highly restricted environment with only basic mathematical operations available.', 'tool_version': '1.0.0', 'input_types': {'query': 'str - A clear, specific description of the arithmetic calculation or math problem to be solved, including any necessary numerical inputs.'}, 'output_type': 'dict - A dictionary containing the generated code, calculation result, and any error messages.', 'demo_commands': [{'command': 'execution = tool.execute(query="Calculate the factorial of 5")', 'description': 'Generate a Python code snippet to calculate the factorial of 5.'}, {'command': 'execution = tool.execute(query="Find the sum of prime numbers up to 50")', 'description': 'Generate a Python code snippet to find the sum of prime numbers up to 50.'}, {'command': 'query="Given the list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], calculate the sum of squares of odd numbers"\nexecution = tool.execute(query=query)', 'description': 'Generate a Python function for a specific mathematical operation on a given list of numbers.'}], 'require_llm_engine': True, 'user_metadata': {'limitations': ['Restricted to basic Python arithmetic operations and built-in mathematical functions.', 'Cannot use any external libraries or modules, including those in the Python standard library.', 'Limited to simple mathematical calculations and problems.', 'Cannot perform any string processing, data structure manipulation, or complex algorithms.', 'No access to any system resources, file operations, or network requests.', "Cannot use 'import' statements.", 'All calculations must be self-contained within a single function or script.', 'Input must be provided directly in the query string.', 'Output is limited to numerical results or simple lists/tuples of numbers.'], 'best_practices': ['Provide clear and specific queries that describe the desired mathematical calculation.', 'Include all necessary numerical inputs directly in the query string.', 'Keep tasks focused on basic arithmetic, algebraic calculations, or simple mathematical algorithms.', 'Ensure all required numerical data is included in the query.', 'Verify that the query only involves mathematical operations and does not require any data processing or complex algorithms.', 'Review generated code to ensure it only uses basic Python arithmetic operations and built-in math functions.']}}
|
8 |
+
|
9 |
+
###Query: Given the number list: [1, 2, 3, 4, 5], calculate the sum of all the numbers in the list.
|
10 |
+
The sum of all the numbers in the list [1, 2, 3, 4, 5] is: 15
|
11 |
+
|
12 |
+
###Execution Result: {'printed_output': 'The sum of all the numbers in the list [1, 2, 3, 4, 5] is: 15', 'variables': {'numbers': [1, 2, 3, 4, 5], 'total_sum': 15}}
|
13 |
+
Done!
|
octotools/tools/python_code_generator/tool.py
ADDED
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# octotools/tools/python_code_generator/tool.py
|
2 |
+
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import sys
|
6 |
+
from io import StringIO
|
7 |
+
import contextlib
|
8 |
+
|
9 |
+
|
10 |
+
from octotools.tools.base import BaseTool
|
11 |
+
from octotools.engine.openai import ChatOpenAI
|
12 |
+
|
13 |
+
import signal
|
14 |
+
from contextlib import contextmanager
|
15 |
+
|
16 |
+
# Custom exception for code execution timeout
|
17 |
+
class TimeoutException(Exception):
|
18 |
+
pass
|
19 |
+
|
20 |
+
# Custom context manager for code execution timeout
|
21 |
+
@contextmanager
|
22 |
+
def timeout(seconds):
|
23 |
+
def timeout_handler(signum, frame):
|
24 |
+
raise TimeoutException("Code execution timed out")
|
25 |
+
|
26 |
+
# Set the timeout handler
|
27 |
+
original_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
28 |
+
signal.alarm(seconds)
|
29 |
+
|
30 |
+
try:
|
31 |
+
yield
|
32 |
+
finally:
|
33 |
+
# Restore the original handler and disable the alarm
|
34 |
+
signal.alarm(0)
|
35 |
+
signal.signal(signal.SIGALRM, original_handler)
|
36 |
+
|
37 |
+
|
38 |
+
class Python_Code_Generator_Tool(BaseTool):
|
39 |
+
require_llm_engine = True
|
40 |
+
|
41 |
+
def __init__(self, model_string="gpt-4o-mini"):
|
42 |
+
super().__init__(
|
43 |
+
tool_name="Python_Code_Generator_Tool",
|
44 |
+
tool_description="A tool that generates and executes simple Python code snippets for basic arithmetical calculations and math-related problems. The generated code runs in a highly restricted environment with only basic mathematical operations available.",
|
45 |
+
tool_version="1.0.0",
|
46 |
+
input_types={
|
47 |
+
"query": "str - A clear, specific description of the arithmetic calculation or math problem to be solved, including any necessary numerical inputs."},
|
48 |
+
output_type="dict - A dictionary containing the generated code, calculation result, and any error messages.",
|
49 |
+
demo_commands=[
|
50 |
+
{
|
51 |
+
"command": 'execution = tool.execute(query="Calculate the factorial of 5")',
|
52 |
+
"description": "Generate a Python code snippet to calculate the factorial of 5."
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"command": 'execution = tool.execute(query="Find the sum of prime numbers up to 50")',
|
56 |
+
"description": "Generate a Python code snippet to find the sum of prime numbers up to 50."
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"command": 'query="Given the list [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], calculate the sum of squares of odd numbers"\nexecution = tool.execute(query=query)',
|
60 |
+
"description": "Generate a Python function for a specific mathematical operation on a given list of numbers."
|
61 |
+
},
|
62 |
+
],
|
63 |
+
user_metadata = {
|
64 |
+
"limitations": [
|
65 |
+
"Restricted to basic Python arithmetic operations and built-in mathematical functions.",
|
66 |
+
"Cannot use any external libraries or modules, including those in the Python standard library.",
|
67 |
+
"Limited to simple mathematical calculations and problems.",
|
68 |
+
"Cannot perform any string processing, data structure manipulation, or complex algorithms.",
|
69 |
+
"No access to any system resources, file operations, or network requests.",
|
70 |
+
"Cannot use 'import' statements.",
|
71 |
+
"All calculations must be self-contained within a single function or script.",
|
72 |
+
"Input must be provided directly in the query string.",
|
73 |
+
"Output is limited to numerical results or simple lists/tuples of numbers."
|
74 |
+
],
|
75 |
+
"best_practices": [
|
76 |
+
"Provide clear and specific queries that describe the desired mathematical calculation.",
|
77 |
+
"Include all necessary numerical inputs directly in the query string.",
|
78 |
+
"Keep tasks focused on basic arithmetic, algebraic calculations, or simple mathematical algorithms.",
|
79 |
+
"Ensure all required numerical data is included in the query.",
|
80 |
+
"Verify that the query only involves mathematical operations and does not require any data processing or complex algorithms.",
|
81 |
+
"Review generated code to ensure it only uses basic Python arithmetic operations and built-in math functions."
|
82 |
+
]
|
83 |
+
}
|
84 |
+
)
|
85 |
+
print(f"\nInitializing Python_Code_Generator_Tool with model_string: {model_string}")
|
86 |
+
self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=False) if model_string else None
|
87 |
+
|
88 |
+
@staticmethod
|
89 |
+
def preprocess_code(code):
|
90 |
+
"""
|
91 |
+
Preprocesses the generated code snippet by extracting it from the response.
|
92 |
+
|
93 |
+
Parameters:
|
94 |
+
code (str): The response containing the code snippet.
|
95 |
+
|
96 |
+
Returns:
|
97 |
+
str: The extracted code snippet.
|
98 |
+
"""
|
99 |
+
code = re.search(r"```python(.*)```", code, re.DOTALL).group(1).strip()
|
100 |
+
return code
|
101 |
+
|
102 |
+
@contextlib.contextmanager
|
103 |
+
def capture_output(self):
|
104 |
+
"""
|
105 |
+
Context manager to capture the standard output.
|
106 |
+
|
107 |
+
Yields:
|
108 |
+
StringIO: The captured output.
|
109 |
+
"""
|
110 |
+
new_out = StringIO()
|
111 |
+
old_out = sys.stdout
|
112 |
+
sys.stdout = new_out
|
113 |
+
try:
|
114 |
+
yield sys.stdout
|
115 |
+
finally:
|
116 |
+
sys.stdout = old_out
|
117 |
+
|
118 |
+
def execute_code_snippet(self, code):
|
119 |
+
"""
|
120 |
+
Executes the given Python code snippet.
|
121 |
+
|
122 |
+
Parameters:
|
123 |
+
code (str): The Python code snippet to be executed.
|
124 |
+
|
125 |
+
Returns:
|
126 |
+
dict: A dictionary containing the printed output and local variables.
|
127 |
+
"""
|
128 |
+
# Check for dangerous functions and remove them
|
129 |
+
dangerous_functions = ['exit', 'quit', 'sys.exit']
|
130 |
+
for func in dangerous_functions:
|
131 |
+
if func in code:
|
132 |
+
print(f"Warning: Removing unsafe '{func}' call from code")
|
133 |
+
# Use regex to remove function calls with any arguments
|
134 |
+
code = re.sub(rf'{func}\s*\([^)]*\)', 'break', code)
|
135 |
+
|
136 |
+
try:
|
137 |
+
execution_code = self.preprocess_code(code)
|
138 |
+
|
139 |
+
# Execute with 10-second timeout
|
140 |
+
with timeout(10):
|
141 |
+
try:
|
142 |
+
exec(execution_code)
|
143 |
+
except TimeoutException:
|
144 |
+
print("Error: Code execution exceeded 60 seconds timeout")
|
145 |
+
return {"error": "Execution timed out after 60 seconds"}
|
146 |
+
except Exception as e:
|
147 |
+
print(f"Error executing code: {e}")
|
148 |
+
return {"error": str(e)}
|
149 |
+
|
150 |
+
# Capture the output and local variables
|
151 |
+
local_vars = {}
|
152 |
+
with self.capture_output() as output:
|
153 |
+
exec(execution_code, {}, local_vars)
|
154 |
+
printed_output = output.getvalue().strip()
|
155 |
+
|
156 |
+
# Filter out built-in variables and modules
|
157 |
+
"""
|
158 |
+
only the variables used in the code are returned,
|
159 |
+
excluding built-in variables (which start with '__') and imported modules.
|
160 |
+
"""
|
161 |
+
used_vars = {k: v for k, v in local_vars.items()
|
162 |
+
if not k.startswith('__') and not isinstance(v, type(sys))}
|
163 |
+
|
164 |
+
return {"printed_output": printed_output, "variables": used_vars}
|
165 |
+
|
166 |
+
except Exception as e:
|
167 |
+
print(f"Error executing code: {e}")
|
168 |
+
return {"error": str(e)}
|
169 |
+
|
170 |
+
def execute(self, query):
|
171 |
+
"""
|
172 |
+
Generates and executes Python code based on the provided query.
|
173 |
+
|
174 |
+
Parameters:
|
175 |
+
query (str): A query describing the desired operation.
|
176 |
+
|
177 |
+
Returns:
|
178 |
+
dict: A dictionary containing the executed output, local variables, or any error message.
|
179 |
+
"""
|
180 |
+
|
181 |
+
if not self.llm_engine:
|
182 |
+
raise ValueError("LLM engine not initialized. Please provide a valid model_string when initializing the tool.")
|
183 |
+
|
184 |
+
task_description = """
|
185 |
+
Given a query, generate a Python code snippet that performs the specified operation on the provided data. Please think step by step. Ensure to break down the process into clear, logical steps. Make sure to print the final result in the generated code snippet with a descriptive message explaining what the output represents. The final output should be presented in the following format:
|
186 |
+
|
187 |
+
```python
|
188 |
+
<code snippet>
|
189 |
+
```
|
190 |
+
"""
|
191 |
+
task_description = task_description.strip()
|
192 |
+
full_prompt = f"Task:\n{task_description}\n\nQuery:\n{query}"
|
193 |
+
|
194 |
+
response = self.llm_engine(full_prompt)
|
195 |
+
result_or_error = self.execute_code_snippet(response)
|
196 |
+
return result_or_error
|
197 |
+
|
198 |
+
def get_metadata(self):
|
199 |
+
"""
|
200 |
+
Returns the metadata for the Python_Code_Generator_Tool.
|
201 |
+
|
202 |
+
Returns:
|
203 |
+
dict: A dictionary containing the tool's metadata.
|
204 |
+
"""
|
205 |
+
metadata = super().get_metadata()
|
206 |
+
metadata["require_llm_engine"] = self.require_llm_engine # NOTE: can be removed if not needed
|
207 |
+
return metadata
|
208 |
+
|
209 |
+
|
210 |
+
if __name__ == "__main__":
|
211 |
+
# Test command:
|
212 |
+
"""
|
213 |
+
Run the following commands in the terminal to test the script:
|
214 |
+
|
215 |
+
cd octotools/tools/python_code_generator
|
216 |
+
python tool.py
|
217 |
+
"""
|
218 |
+
|
219 |
+
# Get the directory of the current script
|
220 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
221 |
+
|
222 |
+
# Example usage of the Python_Code_Generator_Tool
|
223 |
+
tool = Python_Code_Generator_Tool()
|
224 |
+
tool = Python_Code_Generator_Tool(model_string="gpt-4o-mini")
|
225 |
+
|
226 |
+
# Get tool metadata
|
227 |
+
metadata = tool.get_metadata()
|
228 |
+
print(metadata)
|
229 |
+
|
230 |
+
# Sample query for generating and executing Python code
|
231 |
+
queries = [
|
232 |
+
"Given the number list: [1, 2, 3, 4, 5], calculate the sum of all the numbers in the list.",
|
233 |
+
]
|
234 |
+
for query in queries:
|
235 |
+
print(f"\n###Query: {query}")
|
236 |
+
# Execute the tool with the sample query
|
237 |
+
try:
|
238 |
+
execution = tool.execute(query=query)
|
239 |
+
print("\n###Execution Result:", execution)
|
240 |
+
except ValueError as e:
|
241 |
+
print(f"Execution failed: {e}")
|
242 |
+
|
243 |
+
print("Done!")
|
octotools/tools/relevant_patch_zoomer/__init__.py
ADDED
File without changes
|
octotools/tools/relevant_patch_zoomer/examples/car.png
ADDED
![]() |
octotools/tools/relevant_patch_zoomer/test.log
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Initializing Patch Zoomer Tool with model: gpt-4o
|
3 |
+
!! Cache enabled for model: gpt-4o
|
4 |
+
{'tool_name': 'Relevant_Patch_Zoomer_Tool', 'tool_description': 'A tool that analyzes an image, divides it into 5 regions (4 quarters + center), and identifies the most relevant patches based on a question. The returned patches are zoomed in by a factor of 2.', 'tool_version': '1.0.0', 'input_types': {'image': 'str - The path to the image file.', 'question': 'str - The question about the image content.'}, 'output_type': 'dict - Contains analysis text and list of saved zoomed patch paths.', 'demo_commands': [{'command': 'execution = tool.execute(image="path/to/image.jpg", question="What is the color of the car?")', 'description': "Analyze image and return relevant zoomed patches that show the car's color."}], 'require_llm_engine': True, 'user_metadata': {'best_practices': ['It might be helpful to zoom in on the image first to get a better look at the object(s).', 'It might be helpful if the question requires a close-up view of the object(s), symbols, texts, etc.', 'The tool should be used to provide a high-level analysis first, and then use other tools for fine-grained analysis. For example, you can use Relevant_Patch_Zoomer_Tool first to get a zoomed patch of specific objects, and then use Image_Captioner_Tool to describe the objects in detail.']}}
|
5 |
+
|
6 |
+
Detected Patches:
|
7 |
+
Path: /root/Projects/octotools/octotools/tools/relevant_patch_zoomer/zoomed_patches/car_bottom-right_zoomed_2x.png
|
8 |
+
Description: The bottom-right region of the image: /root/Projects/octotools/octotools/tools/relevant_patch_zoomer/examples/car.png.
|
9 |
+
|
10 |
+
Done!
|
octotools/tools/relevant_patch_zoomer/tool.py
ADDED
@@ -0,0 +1,188 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
from pydantic import BaseModel
|
4 |
+
from octotools.tools.base import BaseTool
|
5 |
+
from octotools.engine.openai import ChatOpenAI
|
6 |
+
|
7 |
+
class PatchZoomerResponse(BaseModel):
|
8 |
+
analysis: str
|
9 |
+
patch: list[str]
|
10 |
+
|
11 |
+
class Relevant_Patch_Zoomer_Tool(BaseTool):
|
12 |
+
require_llm_engine = True
|
13 |
+
|
14 |
+
def __init__(self, model_string="gpt-4o"):
|
15 |
+
super().__init__(
|
16 |
+
tool_name="Relevant_Patch_Zoomer_Tool",
|
17 |
+
tool_description="A tool that analyzes an image, divides it into 5 regions (4 quarters + center), and identifies the most relevant patches based on a question. The returned patches are zoomed in by a factor of 2.",
|
18 |
+
tool_version="1.0.0",
|
19 |
+
input_types={
|
20 |
+
"image": "str - The path to the image file.",
|
21 |
+
"question": "str - The question about the image content.",
|
22 |
+
},
|
23 |
+
output_type="dict - Contains analysis text and list of saved zoomed patch paths.",
|
24 |
+
demo_commands=[
|
25 |
+
{
|
26 |
+
"command": 'execution = tool.execute(image="path/to/image.jpg", question="What is the color of the car?")',
|
27 |
+
"description": "Analyze image and return relevant zoomed patches that show the car's color."
|
28 |
+
}
|
29 |
+
],
|
30 |
+
user_metadata = {
|
31 |
+
"best_practices": [
|
32 |
+
"It might be helpful to zoom in on the image first to get a better look at the object(s).",
|
33 |
+
"It might be helpful if the question requires a close-up view of the object(s), symbols, texts, etc.",
|
34 |
+
"The tool should be used to provide a high-level analysis first, and then use other tools for fine-grained analysis. For example, you can use Relevant_Patch_Zoomer_Tool first to get a zoomed patch of specific objects, and then use Image_Captioner_Tool to describe the objects in detail."
|
35 |
+
]
|
36 |
+
}
|
37 |
+
)
|
38 |
+
self.matching_dict = {
|
39 |
+
"A": "top-left",
|
40 |
+
"B": "top-right",
|
41 |
+
"C": "bottom-left",
|
42 |
+
"D": "bottom-right",
|
43 |
+
"E": "center"
|
44 |
+
}
|
45 |
+
|
46 |
+
print(f"\nInitializing Patch Zoomer Tool with model: {model_string}")
|
47 |
+
self.llm_engine = ChatOpenAI(model_string=model_string, is_multimodal=True) if model_string else None
|
48 |
+
|
49 |
+
def _save_patch(self, image_path, patch, save_path, zoom_factor=2):
|
50 |
+
"""Extract and save a specific patch from the image with 10% margins."""
|
51 |
+
img = cv2.imread(image_path)
|
52 |
+
height, width = img.shape[:2]
|
53 |
+
|
54 |
+
quarter_h = height // 2
|
55 |
+
quarter_w = width // 2
|
56 |
+
|
57 |
+
margin_h = int(quarter_h * 0.1)
|
58 |
+
margin_w = int(quarter_w * 0.1)
|
59 |
+
|
60 |
+
patch_coords = {
|
61 |
+
'A': ((max(0, 0 - margin_w), max(0, 0 - margin_h)),
|
62 |
+
(min(width, quarter_w + margin_w), min(height, quarter_h + margin_h))),
|
63 |
+
'B': ((max(0, quarter_w - margin_w), max(0, 0 - margin_h)),
|
64 |
+
(min(width, width + margin_w), min(height, quarter_h + margin_h))),
|
65 |
+
'C': ((max(0, 0 - margin_w), max(0, quarter_h - margin_h)),
|
66 |
+
(min(width, quarter_w + margin_w), min(height, height + margin_h))),
|
67 |
+
'D': ((max(0, quarter_w - margin_w), max(0, quarter_h - margin_h)),
|
68 |
+
(min(width, width + margin_w), min(height, height + margin_h))),
|
69 |
+
'E': ((max(0, quarter_w//2 - margin_w), max(0, quarter_h//2 - margin_h)),
|
70 |
+
(min(width, quarter_w//2 + quarter_w + margin_w),
|
71 |
+
min(height, quarter_h//2 + quarter_h + margin_h)))
|
72 |
+
}
|
73 |
+
|
74 |
+
(x1, y1), (x2, y2) = patch_coords[patch]
|
75 |
+
patch_img = img[y1:y2, x1:x2]
|
76 |
+
|
77 |
+
zoomed_patch = cv2.resize(patch_img,
|
78 |
+
(patch_img.shape[1] * zoom_factor,
|
79 |
+
patch_img.shape[0] * zoom_factor),
|
80 |
+
interpolation=cv2.INTER_LINEAR)
|
81 |
+
|
82 |
+
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
83 |
+
cv2.imwrite(save_path, zoomed_patch)
|
84 |
+
return save_path
|
85 |
+
|
86 |
+
def execute(self, image, question, zoom_factor=2):
|
87 |
+
try:
|
88 |
+
if not self.llm_engine:
|
89 |
+
return "Error: LLM engine not initialized. Please provide a valid model_string."
|
90 |
+
|
91 |
+
# Prepare the prompt
|
92 |
+
prompt = f"""
|
93 |
+
Analyze this image to identify the most relevant region(s) for answering the question:
|
94 |
+
|
95 |
+
Question: {question}
|
96 |
+
|
97 |
+
The image is divided into 5 regions:
|
98 |
+
- (A) Top-left quarter
|
99 |
+
- (B) Top-right quarter
|
100 |
+
- (C) Bottom-left quarter
|
101 |
+
- (D) Bottom-right quarter
|
102 |
+
- (E) Center region (1/4 size, overlapping middle section)
|
103 |
+
|
104 |
+
Instructions:
|
105 |
+
1. First describe what you see in each of the five regions.
|
106 |
+
2. Then select the most relevant region(s) to answer the question.
|
107 |
+
3. Choose only the minimum necessary regions - avoid selecting redundant areas that show the same content. For example, if one patch contains the entire object(s), do not select another patch that only shows a part of the same object(s).
|
108 |
+
|
109 |
+
|
110 |
+
Response format:
|
111 |
+
<analysis>: Describe the image and five patches first. Then analyze the question and select the most relevant patch or list of patches.
|
112 |
+
<patch>: List of letters (A-E)
|
113 |
+
"""
|
114 |
+
# Read image and create input data
|
115 |
+
with open(image, 'rb') as file:
|
116 |
+
image_bytes = file.read()
|
117 |
+
input_data = [prompt, image_bytes]
|
118 |
+
|
119 |
+
# Get response from LLM
|
120 |
+
response = self.llm_engine(input_data, response_format=PatchZoomerResponse)
|
121 |
+
|
122 |
+
# Save patches
|
123 |
+
image_dir = os.path.dirname(image)
|
124 |
+
image_name = os.path.splitext(os.path.basename(image))[0]
|
125 |
+
|
126 |
+
# Update the return structure
|
127 |
+
patch_info = []
|
128 |
+
for patch in response.patch:
|
129 |
+
patch_name = self.matching_dict[patch]
|
130 |
+
save_path = os.path.join(self.output_dir,
|
131 |
+
f"{image_name}_{patch_name}_zoomed_{zoom_factor}x.png")
|
132 |
+
saved_path = self._save_patch(image, patch, save_path, zoom_factor)
|
133 |
+
save_path = os.path.abspath(saved_path)
|
134 |
+
patch_info.append({
|
135 |
+
"path": save_path,
|
136 |
+
"description": f"The {self.matching_dict[patch]} region of the image: {image}."
|
137 |
+
})
|
138 |
+
|
139 |
+
return {
|
140 |
+
"analysis": response.analysis,
|
141 |
+
"patches": patch_info
|
142 |
+
}
|
143 |
+
|
144 |
+
except Exception as e:
|
145 |
+
print(f"Error in patch zooming: {e}")
|
146 |
+
return None
|
147 |
+
|
148 |
+
def get_metadata(self):
|
149 |
+
return super().get_metadata()
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
# Test command:
|
153 |
+
"""
|
154 |
+
Run the following commands in the terminal to test the script:
|
155 |
+
|
156 |
+
cd octotools/tools/relevant_patch_zoomer
|
157 |
+
python tool.py
|
158 |
+
"""
|
159 |
+
|
160 |
+
# Get the directory of the current script
|
161 |
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
162 |
+
|
163 |
+
# Example usage of the Relevant_Patch_Zoomer_Tool
|
164 |
+
tool = Relevant_Patch_Zoomer_Tool()
|
165 |
+
tool.set_custom_output_dir(f"{script_dir}/zoomed_patches")
|
166 |
+
|
167 |
+
# Get tool metadata
|
168 |
+
metadata = tool.get_metadata()
|
169 |
+
print(metadata)
|
170 |
+
|
171 |
+
# Construct the full path to the image using the script's directory
|
172 |
+
relative_image_path = "examples/car.png"
|
173 |
+
image_path = os.path.join(script_dir, relative_image_path)
|
174 |
+
question = "What is the color of the car?"
|
175 |
+
|
176 |
+
# Execute the tool
|
177 |
+
try:
|
178 |
+
result = tool.execute(image=image_path, question=question)
|
179 |
+
if result:
|
180 |
+
print("\nDetected Patches:")
|
181 |
+
for patch in result['patches']:
|
182 |
+
print(f"Path: {patch['path']}")
|
183 |
+
print(f"Description: {patch['description']}")
|
184 |
+
print()
|
185 |
+
except Exception as e:
|
186 |
+
print(f"Execution failed: {e}")
|
187 |
+
|
188 |
+
print("Done!")
|