Spaces:
Running
on
A10G
Running
on
A10G
recorded query data, user data, and step data
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +2 -0
- app.py +195 -42
- demo_solver_cache/20250217_062225_8ce3e482/query_image.jpg +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_1.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_10.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_11.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_12.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_13.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_14.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_15.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_16.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_17.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_18.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_19.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_2.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_20.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_3.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_4.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_5.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_6.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_7.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_8.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_9.png +0 -0
- demo_solver_cache/20250217_062225_8ce3e482/user_feedback.json +0 -22
- demo_solver_cache/20250217_063316_09285db1/query_image.jpg +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_1.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_10.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_11.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_12.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_13.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_14.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_15.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_16.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_17.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_18.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_19.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_2.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_20.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_3.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_4.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_5.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_6.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_7.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_8.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_9.png +0 -0
- demo_solver_cache/20250217_063316_09285db1/user_feedback.json +0 -12
- demo_solver_cache/20250217_183323_b0e58b32/query_image.jpg +0 -0
- demo_solver_cache/20250217_183323_b0e58b32/user_feedback.json +0 -32
- feedback_dataset/feedback-20250217_212246.json +0 -0
- feedback_dataset/feedback-20250217_212401.json +0 -10
.gitignore
CHANGED
@@ -175,4 +175,6 @@ detected_objects/
|
|
175 |
|
176 |
# [Gradio]
|
177 |
demo_solver_cache/
|
|
|
178 |
backups/
|
|
|
|
175 |
|
176 |
# [Gradio]
|
177 |
demo_solver_cache/
|
178 |
+
solver_cache/
|
179 |
backups/
|
180 |
+
data/
|
app.py
CHANGED
@@ -22,41 +22,125 @@ from octotools.models.memory import Memory
|
|
22 |
from octotools.models.executor import Executor
|
23 |
from octotools.models.utils import make_json_serializable
|
24 |
|
25 |
-
from utils import save_feedback
|
26 |
|
27 |
-
|
28 |
-
########### Test Huggingface Dataset ###########
|
29 |
from pathlib import Path
|
30 |
from huggingface_hub import CommitScheduler
|
31 |
|
32 |
-
# Add these near the top of the file with other constants
|
33 |
-
DATASET_DIR = Path("feedback_dataset")
|
34 |
-
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
35 |
-
DATASET_PATH = DATASET_DIR / f"feedback-{time.strftime('%Y%m%d_%H%M%S')}.json"
|
36 |
-
|
37 |
# Get Huggingface token from environment variable
|
38 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
scheduler = CommitScheduler(
|
41 |
repo_id="lupantech/OctoTools-Gradio-Demo-User-Data",
|
42 |
repo_type="dataset",
|
43 |
folder_path=DATASET_DIR,
|
44 |
-
path_in_repo="
|
45 |
token=HF_TOKEN
|
46 |
)
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
########### End of Test Huggingface Dataset ###########
|
61 |
|
62 |
class Solver:
|
@@ -72,7 +156,7 @@ class Solver:
|
|
72 |
verbose: bool = True,
|
73 |
max_steps: int = 10,
|
74 |
max_time: int = 60,
|
75 |
-
|
76 |
):
|
77 |
self.planner = planner
|
78 |
self.memory = memory
|
@@ -83,7 +167,7 @@ class Solver:
|
|
83 |
self.verbose = verbose
|
84 |
self.max_steps = max_steps
|
85 |
self.max_time = max_time
|
86 |
-
self.
|
87 |
|
88 |
self.output_types = output_types.lower().split(',')
|
89 |
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
@@ -109,14 +193,14 @@ class Solver:
|
|
109 |
# os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
|
110 |
# img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
|
111 |
|
112 |
-
img_path = os.path.join(self.
|
113 |
user_image.save(img_path)
|
114 |
else:
|
115 |
img_path = None
|
116 |
|
117 |
# Set tool cache directory
|
118 |
-
|
119 |
-
self.executor.set_query_cache_dir(
|
120 |
|
121 |
# Step 1: Display the received inputs
|
122 |
if user_image:
|
@@ -145,6 +229,13 @@ class Solver:
|
|
145 |
metadata={"title": "🔍 Query Analysis"}))
|
146 |
yield messages
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
# Step 5: Execution loop (similar to your step-by-step solver)
|
149 |
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
150 |
step_count += 1
|
@@ -158,6 +249,14 @@ class Solver:
|
|
158 |
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
159 |
)
|
160 |
context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
# Display the step information
|
163 |
messages.append(ChatMessage(
|
@@ -183,6 +282,21 @@ class Solver:
|
|
183 |
result = self.executor.execute_tool_command(tool_name, command)
|
184 |
result = make_json_serializable(result)
|
185 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
messages.append(ChatMessage(
|
187 |
role="assistant",
|
188 |
content=f"{json.dumps(result, indent=4)}",
|
@@ -194,6 +308,14 @@ class Solver:
|
|
194 |
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
195 |
conclusion = self.planner.extract_conclusion(stop_verification)
|
196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
messages.append(ChatMessage(
|
198 |
role="assistant",
|
199 |
content=f"🛑 Step {step_count} Conclusion: {conclusion}"))
|
@@ -208,15 +330,29 @@ class Solver:
|
|
208 |
messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
|
209 |
yield messages
|
210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
if 'direct' in self.output_types:
|
212 |
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
213 |
messages.append(ChatMessage(role="assistant", content=f"🔹 Direct Output:\n{direct_output}"))
|
214 |
yield messages
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
# Step 8: Completion Message
|
217 |
messages.append(ChatMessage(role="assistant", content="✅ Problem-solving process completed."))
|
218 |
yield messages
|
219 |
-
|
220 |
|
221 |
def parse_arguments():
|
222 |
parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
|
@@ -230,7 +366,8 @@ def parse_arguments():
|
|
230 |
help="Comma-separated list of required outputs (base,final,direct)"
|
231 |
)
|
232 |
parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
|
233 |
-
parser.add_argument("--root_cache_dir", default="
|
|
|
234 |
parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
|
235 |
|
236 |
# NOTE: Add new arguments
|
@@ -245,18 +382,28 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
245 |
Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
|
246 |
"""
|
247 |
|
248 |
-
# Generate
|
249 |
query_id = time.strftime("%Y%m%d_%H%M%S") + "_" + str(uuid.uuid4())[:8] # e.g, 20250217_062225_612f2474
|
250 |
print(f"Query ID: {query_id}")
|
251 |
|
|
|
|
|
|
|
|
|
252 |
# Create a directory for the query ID
|
253 |
-
|
254 |
-
os.makedirs(
|
255 |
-
args.root_cache_dir = query_dir
|
256 |
|
257 |
if api_key is None:
|
258 |
return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
|
259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
# # Initialize Tools
|
261 |
# enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
262 |
|
@@ -284,7 +431,7 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
284 |
# Instantiate Executor
|
285 |
executor = Executor(
|
286 |
llm_engine_name=llm_model_engine,
|
287 |
-
|
288 |
enable_signal=False,
|
289 |
api_key=api_key
|
290 |
)
|
@@ -300,16 +447,23 @@ def solve_problem_gradio(user_query, user_image, max_steps=10, max_time=60, api_
|
|
300 |
verbose=args.verbose,
|
301 |
max_steps=max_steps,
|
302 |
max_time=max_time,
|
303 |
-
|
304 |
)
|
305 |
|
306 |
if solver is None:
|
307 |
return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
|
308 |
|
|
|
309 |
messages = [] # Initialize message list
|
310 |
for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
|
311 |
yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
|
312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
|
314 |
def main(args):
|
315 |
#################### Gradio Interface ####################
|
@@ -325,8 +479,8 @@ def main(args):
|
|
325 |
|
326 |
[Website](https://octotools.github.io/) |
|
327 |
[Github](https://github.com/octotools/octotools) |
|
328 |
-
[arXiv](https://arxiv.org/abs/2502.
|
329 |
-
[Paper](https://arxiv.org/pdf/2502.
|
330 |
[Tool Cards](https://octotools.github.io/#tool-cards) |
|
331 |
[Example Visualizations](https://octotools.github.io/#visualization) |
|
332 |
[Discord](https://discord.gg/NMJx66DC)
|
@@ -424,20 +578,20 @@ def main(args):
|
|
424 |
|
425 |
# Update the button click handlers
|
426 |
upvote_btn.click(
|
427 |
-
fn=lambda: save_feedback(
|
428 |
inputs=[],
|
429 |
outputs=[]
|
430 |
)
|
431 |
|
432 |
downvote_btn.click(
|
433 |
-
fn=lambda: save_feedback(
|
434 |
inputs=[],
|
435 |
outputs=[]
|
436 |
)
|
437 |
|
438 |
# Add handler for comment submission
|
439 |
comment_textbox.submit(
|
440 |
-
fn=lambda comment: save_feedback(
|
441 |
inputs=[comment_textbox],
|
442 |
outputs=[]
|
443 |
)
|
@@ -481,9 +635,6 @@ def main(args):
|
|
481 |
if __name__ == "__main__":
|
482 |
args = parse_arguments()
|
483 |
|
484 |
-
# Manually set enabled tools
|
485 |
-
# args.enabled_tools = "Generalist_Solution_Generator_Tool"
|
486 |
-
|
487 |
# All tools
|
488 |
all_tools = [
|
489 |
"Generalist_Solution_Generator_Tool",
|
@@ -504,5 +655,7 @@ if __name__ == "__main__":
|
|
504 |
]
|
505 |
args.enabled_tools = ",".join(all_tools)
|
506 |
|
|
|
|
|
507 |
main(args)
|
508 |
|
|
|
22 |
from octotools.models.executor import Executor
|
23 |
from octotools.models.utils import make_json_serializable
|
24 |
|
|
|
25 |
|
|
|
|
|
26 |
from pathlib import Path
|
27 |
from huggingface_hub import CommitScheduler
|
28 |
|
|
|
|
|
|
|
|
|
|
|
29 |
# Get Huggingface token from environment variable
|
30 |
HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
31 |
|
32 |
+
########### Test Huggingface Dataset ###########
|
33 |
+
# Update the HuggingFace dataset constants
|
34 |
+
DATASET_DIR = Path("solver_cache") # the directory to save the dataset
|
35 |
+
DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
36 |
+
|
37 |
+
global QUERY_ID
|
38 |
+
QUERY_ID = None
|
39 |
+
|
40 |
scheduler = CommitScheduler(
|
41 |
repo_id="lupantech/OctoTools-Gradio-Demo-User-Data",
|
42 |
repo_type="dataset",
|
43 |
folder_path=DATASET_DIR,
|
44 |
+
path_in_repo="solver_cache", # Update path in repo
|
45 |
token=HF_TOKEN
|
46 |
)
|
47 |
|
48 |
+
|
49 |
+
def save_query_data(query_id: str, query: str, image_path: str) -> None:
|
50 |
+
"""Save query data to Huggingface dataset"""
|
51 |
+
# Save query metadata
|
52 |
+
query_cache_dir = DATASET_DIR / query_id
|
53 |
+
query_cache_dir.mkdir(parents=True, exist_ok=True)
|
54 |
+
query_file = query_cache_dir / "query_metadata.json"
|
55 |
+
|
56 |
+
query_metadata = {
|
57 |
+
"query_id": query_id,
|
58 |
+
"query_text": query,
|
59 |
+
"datetime": time.strftime("%Y%m%d_%H%M%S"),
|
60 |
+
"image_path": image_path if image_path else None
|
61 |
+
}
|
62 |
+
|
63 |
+
print(f"Saving query metadata to {query_file}")
|
64 |
+
with query_file.open("w") as f:
|
65 |
+
json.dump(query_metadata, f, indent=4)
|
66 |
+
|
67 |
+
# # NOTE: As we are using the same name for the query cache directory as the dataset directory,
|
68 |
+
# # NOTE: we don't need to copy the content from the query cache directory to the query directory.
|
69 |
+
# # Copy all content from root_cache_dir to query_dir
|
70 |
+
# import shutil
|
71 |
+
# shutil.copytree(args.root_cache_dir, query_data_dir, dirs_exist_ok=True)
|
72 |
+
|
73 |
+
|
74 |
+
def save_feedback(query_id: str, feedback_type: str, feedback_text: str = None) -> None:
|
75 |
+
"""
|
76 |
+
Save user feedback to the query directory.
|
77 |
+
|
78 |
+
Args:
|
79 |
+
query_id: Unique identifier for the query
|
80 |
+
feedback_type: Type of feedback ('upvote', 'downvote', or 'comment')
|
81 |
+
feedback_text: Optional text feedback from user
|
82 |
+
"""
|
83 |
+
|
84 |
+
feedback_data_dir = DATASET_DIR / query_id
|
85 |
+
feedback_data_dir.mkdir(parents=True, exist_ok=True)
|
86 |
+
|
87 |
+
feedback_data = {
|
88 |
+
"query_id": query_id,
|
89 |
+
"feedback_type": feedback_type,
|
90 |
+
"feedback_text": feedback_text,
|
91 |
+
"datetime": time.strftime("%Y%m%d_%H%M%S")
|
92 |
+
}
|
93 |
+
|
94 |
+
# Save feedback in the query directory
|
95 |
+
feedback_file = feedback_data_dir / "feedback.json"
|
96 |
+
print(f"Saving feedback to {feedback_file}")
|
97 |
+
|
98 |
+
# If feedback file exists, update it
|
99 |
+
if feedback_file.exists():
|
100 |
+
with feedback_file.open("r") as f:
|
101 |
+
existing_feedback = json.load(f)
|
102 |
+
# Convert to list if it's a single feedback entry
|
103 |
+
if not isinstance(existing_feedback, list):
|
104 |
+
existing_feedback = [existing_feedback]
|
105 |
+
existing_feedback.append(feedback_data)
|
106 |
+
feedback_data = existing_feedback
|
107 |
+
|
108 |
+
# Write feedback data
|
109 |
+
with feedback_file.open("w") as f:
|
110 |
+
json.dump(feedback_data, f, indent=4)
|
111 |
+
|
112 |
+
|
113 |
+
def save_steps_data(query_id: str, memory: Memory) -> None:
|
114 |
+
"""Save steps data to Huggingface dataset"""
|
115 |
+
steps_file = DATASET_DIR / query_id / "all_steps.json"
|
116 |
+
|
117 |
+
memory_actions = memory.get_actions()
|
118 |
+
memory_actions = make_json_serializable(memory_actions) # NOTE: make the memory actions serializable
|
119 |
+
print("Memory actions: ", memory_actions)
|
120 |
+
|
121 |
+
with steps_file.open("w") as f:
|
122 |
+
json.dump(memory_actions, f, indent=4)
|
123 |
+
|
124 |
+
|
125 |
+
def save_module_data(query_id: str, key: str, value: Any) -> None:
|
126 |
+
"""Save module data to Huggingface dataset"""
|
127 |
+
try:
|
128 |
+
key = key.replace(" ", "_").lower()
|
129 |
+
module_file = DATASET_DIR / query_id / f"{key}.json"
|
130 |
+
value = make_json_serializable(value) # NOTE: make the value serializable
|
131 |
+
with module_file.open("a") as f:
|
132 |
+
json.dump(value, f, indent=4)
|
133 |
+
except Exception as e:
|
134 |
+
print(f"Warning: Failed to save as JSON: {e}")
|
135 |
+
# Fallback to saving as text file
|
136 |
+
text_file = DATASET_DIR / query_id / f"{key}.txt"
|
137 |
+
try:
|
138 |
+
with text_file.open("a") as f:
|
139 |
+
f.write(str(value) + "\n")
|
140 |
+
print(f"Successfully saved as text file: {text_file}")
|
141 |
+
except Exception as e:
|
142 |
+
print(f"Error: Failed to save as text file: {e}")
|
143 |
+
|
144 |
########### End of Test Huggingface Dataset ###########
|
145 |
|
146 |
class Solver:
|
|
|
156 |
verbose: bool = True,
|
157 |
max_steps: int = 10,
|
158 |
max_time: int = 60,
|
159 |
+
query_cache_dir: str = "solver_cache"
|
160 |
):
|
161 |
self.planner = planner
|
162 |
self.memory = memory
|
|
|
167 |
self.verbose = verbose
|
168 |
self.max_steps = max_steps
|
169 |
self.max_time = max_time
|
170 |
+
self.query_cache_dir = query_cache_dir
|
171 |
|
172 |
self.output_types = output_types.lower().split(',')
|
173 |
assert all(output_type in ["base", "final", "direct"] for output_type in self.output_types), "Invalid output type. Supported types are 'base', 'final', 'direct'."
|
|
|
193 |
# os.makedirs(os.path.join(self.root_cache_dir, 'images'), exist_ok=True)
|
194 |
# img_path = os.path.join(self.root_cache_dir, 'images', str(uuid.uuid4()) + '.jpg')
|
195 |
|
196 |
+
img_path = os.path.join(self.query_cache_dir, 'query_image.jpg')
|
197 |
user_image.save(img_path)
|
198 |
else:
|
199 |
img_path = None
|
200 |
|
201 |
# Set tool cache directory
|
202 |
+
_tool_cache_dir = os.path.join(self.query_cache_dir, "tool_cache") # NOTE: This is the directory for tool cache
|
203 |
+
self.executor.set_query_cache_dir(_tool_cache_dir) # NOTE: set query cache directory
|
204 |
|
205 |
# Step 1: Display the received inputs
|
206 |
if user_image:
|
|
|
229 |
metadata={"title": "🔍 Query Analysis"}))
|
230 |
yield messages
|
231 |
|
232 |
+
# Save the query analysis data
|
233 |
+
query_analysis_data = {
|
234 |
+
"query_analysis": query_analysis,
|
235 |
+
"time": round(time.time() - start_time, 5)
|
236 |
+
}
|
237 |
+
save_module_data(QUERY_ID, "step_0_query_analysis", query_analysis_data)
|
238 |
+
|
239 |
# Step 5: Execution loop (similar to your step-by-step solver)
|
240 |
while step_count < self.max_steps and (time.time() - start_time) < self.max_time:
|
241 |
step_count += 1
|
|
|
249 |
user_query, img_path, query_analysis, self.memory, step_count, self.max_steps
|
250 |
)
|
251 |
context, sub_goal, tool_name = self.planner.extract_context_subgoal_and_tool(next_step)
|
252 |
+
step_data = {
|
253 |
+
"step_count": step_count,
|
254 |
+
"context": context,
|
255 |
+
"sub_goal": sub_goal,
|
256 |
+
"tool_name": tool_name,
|
257 |
+
"time": round(time.time() - start_time, 5)
|
258 |
+
}
|
259 |
+
save_module_data(QUERY_ID, f"step_{step_count}_action_prediction", step_data)
|
260 |
|
261 |
# Display the step information
|
262 |
messages.append(ChatMessage(
|
|
|
282 |
result = self.executor.execute_tool_command(tool_name, command)
|
283 |
result = make_json_serializable(result)
|
284 |
|
285 |
+
# Save the command generation data
|
286 |
+
command_generation_data = {
|
287 |
+
"explanation": explanation,
|
288 |
+
"command": command,
|
289 |
+
"time": round(time.time() - start_time, 5)
|
290 |
+
}
|
291 |
+
save_module_data(QUERY_ID, f"step_{step_count}_command_generation", command_generation_data)
|
292 |
+
|
293 |
+
# Save the command execution data
|
294 |
+
command_execution_data = {
|
295 |
+
"result": result,
|
296 |
+
"time": round(time.time() - start_time, 5)
|
297 |
+
}
|
298 |
+
save_module_data(QUERY_ID, f"step_{step_count}_command_execution", command_execution_data)
|
299 |
+
|
300 |
messages.append(ChatMessage(
|
301 |
role="assistant",
|
302 |
content=f"{json.dumps(result, indent=4)}",
|
|
|
308 |
stop_verification = self.planner.verificate_memory(user_query, img_path, query_analysis, self.memory)
|
309 |
conclusion = self.planner.extract_conclusion(stop_verification)
|
310 |
|
311 |
+
# Save the context verification data
|
312 |
+
context_verification_data = {
|
313 |
+
"stop_verification": stop_verification,
|
314 |
+
"conclusion": conclusion,
|
315 |
+
"time": round(time.time() - start_time, 5)
|
316 |
+
}
|
317 |
+
save_module_data(QUERY_ID, f"step_{step_count}_context_verification", context_verification_data)
|
318 |
+
|
319 |
messages.append(ChatMessage(
|
320 |
role="assistant",
|
321 |
content=f"🛑 Step {step_count} Conclusion: {conclusion}"))
|
|
|
330 |
messages.append(ChatMessage(role="assistant", content=f"🎯 Final Output:\n{final_output}"))
|
331 |
yield messages
|
332 |
|
333 |
+
# Save the final output data
|
334 |
+
final_output_data = {
|
335 |
+
"final_output": final_output,
|
336 |
+
"time": round(time.time() - start_time, 5)
|
337 |
+
}
|
338 |
+
save_module_data(QUERY_ID, "final_output", final_output_data)
|
339 |
+
|
340 |
if 'direct' in self.output_types:
|
341 |
direct_output = self.planner.generate_direct_output(user_query, img_path, self.memory)
|
342 |
messages.append(ChatMessage(role="assistant", content=f"🔹 Direct Output:\n{direct_output}"))
|
343 |
yield messages
|
344 |
|
345 |
+
# Save the direct output data
|
346 |
+
direct_output_data = {
|
347 |
+
"direct_output": direct_output,
|
348 |
+
"time": round(time.time() - start_time, 5)
|
349 |
+
}
|
350 |
+
save_module_data(QUERY_ID, "direct_output", direct_output_data)
|
351 |
+
|
352 |
# Step 8: Completion Message
|
353 |
messages.append(ChatMessage(role="assistant", content="✅ Problem-solving process completed."))
|
354 |
yield messages
|
355 |
+
|
356 |
|
357 |
def parse_arguments():
|
358 |
parser = argparse.ArgumentParser(description="Run the OctoTools demo with specified parameters.")
|
|
|
366 |
help="Comma-separated list of required outputs (base,final,direct)"
|
367 |
)
|
368 |
parser.add_argument("--enabled_tools", default="Generalist_Solution_Generator_Tool", help="List of enabled tools.")
|
369 |
+
parser.add_argument("--root_cache_dir", default="solver_cache", help="Path to solver cache directory.")
|
370 |
+
parser.add_argument("--query_id", default=None, help="Query ID.")
|
371 |
parser.add_argument("--verbose", type=bool, default=True, help="Enable verbose output.")
|
372 |
|
373 |
# NOTE: Add new arguments
|
|
|
382 |
Streams responses from `solver.stream_solve_user_problem` for real-time UI updates.
|
383 |
"""
|
384 |
|
385 |
+
# Generate Unique Query ID (Date and first 8 characters of UUID)
|
386 |
query_id = time.strftime("%Y%m%d_%H%M%S") + "_" + str(uuid.uuid4())[:8] # e.g, 20250217_062225_612f2474
|
387 |
print(f"Query ID: {query_id}")
|
388 |
|
389 |
+
# NOTE: update the global variable to save the query ID
|
390 |
+
global QUERY_ID
|
391 |
+
QUERY_ID = query_id
|
392 |
+
|
393 |
# Create a directory for the query ID
|
394 |
+
query_cache_dir = os.path.join(DATASET_DIR.name, query_id) # NOTE
|
395 |
+
os.makedirs(query_cache_dir, exist_ok=True)
|
|
|
396 |
|
397 |
if api_key is None:
|
398 |
return [["assistant", "⚠️ Error: OpenAI API Key is required."]]
|
399 |
|
400 |
+
# Save the query data
|
401 |
+
save_query_data(
|
402 |
+
query_id=query_id,
|
403 |
+
query=user_query,
|
404 |
+
image_path=os.path.join(query_cache_dir, 'query_image.jpg') if user_image else None
|
405 |
+
)
|
406 |
+
|
407 |
# # Initialize Tools
|
408 |
# enabled_tools = args.enabled_tools.split(",") if args.enabled_tools else []
|
409 |
|
|
|
431 |
# Instantiate Executor
|
432 |
executor = Executor(
|
433 |
llm_engine_name=llm_model_engine,
|
434 |
+
query_cache_dir=query_cache_dir, # NOTE
|
435 |
enable_signal=False,
|
436 |
api_key=api_key
|
437 |
)
|
|
|
447 |
verbose=args.verbose,
|
448 |
max_steps=max_steps,
|
449 |
max_time=max_time,
|
450 |
+
query_cache_dir=query_cache_dir # NOTE
|
451 |
)
|
452 |
|
453 |
if solver is None:
|
454 |
return [["assistant", "⚠️ Error: Solver is not initialized. Please restart the application."]]
|
455 |
|
456 |
+
|
457 |
messages = [] # Initialize message list
|
458 |
for message_batch in solver.stream_solve_user_problem(user_query, user_image, api_key, messages):
|
459 |
yield [msg for msg in message_batch] # Ensure correct format for Gradio Chatbot
|
460 |
|
461 |
+
# Save steps
|
462 |
+
save_steps_data(
|
463 |
+
query_id=query_id,
|
464 |
+
memory=memory
|
465 |
+
)
|
466 |
+
|
467 |
|
468 |
def main(args):
|
469 |
#################### Gradio Interface ####################
|
|
|
479 |
|
480 |
[Website](https://octotools.github.io/) |
|
481 |
[Github](https://github.com/octotools/octotools) |
|
482 |
+
[arXiv](https://arxiv.org/abs/2502.11271) |
|
483 |
+
[Paper](https://arxiv.org/pdf/2502.11271) |
|
484 |
[Tool Cards](https://octotools.github.io/#tool-cards) |
|
485 |
[Example Visualizations](https://octotools.github.io/#visualization) |
|
486 |
[Discord](https://discord.gg/NMJx66DC)
|
|
|
578 |
|
579 |
# Update the button click handlers
|
580 |
upvote_btn.click(
|
581 |
+
fn=lambda: save_feedback(QUERY_ID, "upvote"),
|
582 |
inputs=[],
|
583 |
outputs=[]
|
584 |
)
|
585 |
|
586 |
downvote_btn.click(
|
587 |
+
fn=lambda: save_feedback(QUERY_ID, "downvote"),
|
588 |
inputs=[],
|
589 |
outputs=[]
|
590 |
)
|
591 |
|
592 |
# Add handler for comment submission
|
593 |
comment_textbox.submit(
|
594 |
+
fn=lambda comment: save_feedback(QUERY_ID, "comment", comment),
|
595 |
inputs=[comment_textbox],
|
596 |
outputs=[]
|
597 |
)
|
|
|
635 |
if __name__ == "__main__":
|
636 |
args = parse_arguments()
|
637 |
|
|
|
|
|
|
|
638 |
# All tools
|
639 |
all_tools = [
|
640 |
"Generalist_Solution_Generator_Tool",
|
|
|
655 |
]
|
656 |
args.enabled_tools = ",".join(all_tools)
|
657 |
|
658 |
+
# NOTE: Use the same name for the query cache directory as the dataset directory
|
659 |
+
args.root_cache_dir = DATASET_DIR.name
|
660 |
main(args)
|
661 |
|
demo_solver_cache/20250217_062225_8ce3e482/query_image.jpg
DELETED
Binary file (42 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_1.png
DELETED
Binary file (7.75 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_10.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_11.png
DELETED
Binary file (7.77 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_12.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_13.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_14.png
DELETED
Binary file (7.47 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_15.png
DELETED
Binary file (8.05 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_16.png
DELETED
Binary file (7.86 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_17.png
DELETED
Binary file (7.88 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_18.png
DELETED
Binary file (7.76 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_19.png
DELETED
Binary file (8.02 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_2.png
DELETED
Binary file (7.65 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_20.png
DELETED
Binary file (8.03 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_3.png
DELETED
Binary file (7.92 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_4.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_5.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_6.png
DELETED
Binary file (7.82 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_7.png
DELETED
Binary file (7.53 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_8.png
DELETED
Binary file (7.67 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/query_image_baseball_9.png
DELETED
Binary file (7.41 kB)
|
|
demo_solver_cache/20250217_062225_8ce3e482/user_feedback.json
DELETED
@@ -1,22 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"timestamp": "20250217_062307",
|
4 |
-
"feedback_type": "upvote",
|
5 |
-
"comment": null
|
6 |
-
},
|
7 |
-
{
|
8 |
-
"timestamp": "20250217_062315",
|
9 |
-
"feedback_type": "downvote",
|
10 |
-
"comment": null
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"timestamp": "20250217_062322",
|
14 |
-
"feedback_type": "upvote",
|
15 |
-
"comment": null
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"timestamp": "20250217_062333",
|
19 |
-
"feedback_type": "It is helpful!",
|
20 |
-
"comment": null
|
21 |
-
}
|
22 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo_solver_cache/20250217_063316_09285db1/query_image.jpg
DELETED
Binary file (42 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_1.png
DELETED
Binary file (7.75 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_10.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_11.png
DELETED
Binary file (7.77 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_12.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_13.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_14.png
DELETED
Binary file (7.47 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_15.png
DELETED
Binary file (8.05 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_16.png
DELETED
Binary file (7.86 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_17.png
DELETED
Binary file (7.88 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_18.png
DELETED
Binary file (7.76 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_19.png
DELETED
Binary file (8.02 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_2.png
DELETED
Binary file (7.65 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_20.png
DELETED
Binary file (8.03 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_3.png
DELETED
Binary file (7.92 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_4.png
DELETED
Binary file (7.71 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_5.png
DELETED
Binary file (7.6 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_6.png
DELETED
Binary file (7.82 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_7.png
DELETED
Binary file (7.53 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_8.png
DELETED
Binary file (7.67 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/tools/query_image_baseball_9.png
DELETED
Binary file (7.41 kB)
|
|
demo_solver_cache/20250217_063316_09285db1/user_feedback.json
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"timestamp": "20250217_063350",
|
4 |
-
"feedback_type": "upvote",
|
5 |
-
"comment": null
|
6 |
-
},
|
7 |
-
{
|
8 |
-
"timestamp": "20250217_063359",
|
9 |
-
"feedback_type": "Thanks! It is interesting!",
|
10 |
-
"comment": null
|
11 |
-
}
|
12 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo_solver_cache/20250217_183323_b0e58b32/query_image.jpg
DELETED
Binary file (42 kB)
|
|
demo_solver_cache/20250217_183323_b0e58b32/user_feedback.json
DELETED
@@ -1,32 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"timestamp": "20250217_190313",
|
4 |
-
"feedback_type": "upvote",
|
5 |
-
"comment": null
|
6 |
-
},
|
7 |
-
{
|
8 |
-
"timestamp": "20250217_190319",
|
9 |
-
"feedback_type": "downvote",
|
10 |
-
"comment": null
|
11 |
-
},
|
12 |
-
{
|
13 |
-
"timestamp": "20250217_190321",
|
14 |
-
"feedback_type": "upvote",
|
15 |
-
"comment": null
|
16 |
-
},
|
17 |
-
{
|
18 |
-
"timestamp": "20250217_190322",
|
19 |
-
"feedback_type": "downvote",
|
20 |
-
"comment": null
|
21 |
-
},
|
22 |
-
{
|
23 |
-
"timestamp": "20250217_190338",
|
24 |
-
"feedback_type": "Thanks! It is interesting!",
|
25 |
-
"comment": null
|
26 |
-
},
|
27 |
-
{
|
28 |
-
"timestamp": "20250217_190341",
|
29 |
-
"feedback_type": "Thanks! It is interesting!",
|
30 |
-
"comment": null
|
31 |
-
}
|
32 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
feedback_dataset/feedback-20250217_212246.json
DELETED
File without changes
|
feedback_dataset/feedback-20250217_212401.json
DELETED
@@ -1,10 +0,0 @@
|
|
1 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212450"}
|
2 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212452"}
|
3 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "It is good!", "comment": null, "datetime": "20250217_212459"}
|
4 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212523"}
|
5 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212524"}
|
6 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212524"}
|
7 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "downvote", "comment": null, "datetime": "20250217_212524"}
|
8 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "It is good!", "comment": null, "datetime": "20250217_212526"}
|
9 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "upvote", "comment": null, "datetime": "20250217_212619"}
|
10 |
-
{"query_id": "20250217_212439_f48ed6ff", "feedback_type": "It is good!", "comment": null, "datetime": "20250217_212650"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|