TeeA commited on
Commit
af430db
·
1 Parent(s): 5e4b407

text-based query with metadata information

Browse files
Files changed (2) hide show
  1. .gitignore +44 -1
  2. app.py +19 -14
.gitignore CHANGED
@@ -1,2 +1,45 @@
1
  .venv/*
2
- .env/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  .venv/*
2
+ .env/*
3
+ # Python cache files
4
+ __pycache__/
5
+ *.py[cod]
6
+ # Distribution / packaging
7
+ .Python
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ # Installer logs
20
+ pip-log.txt
21
+ pip-delete-this-directory.txt
22
+ # Unit test / coverage reports
23
+ .coverage
24
+ .coverage.*
25
+ .hypothesis/
26
+ .tox/
27
+ # Pytest cache
28
+ .pytest_cache/
29
+ # Jupyter Notebook checkpoints
30
+ .ipynb_checkpoints
31
+ # PyCharm project files
32
+ .idea/
33
+ # VS Code settings
34
+ .vscode/
35
+ # Environment variables
36
+ .env
37
+ .gradio/
38
+ # Local configuration files
39
+ *.local
40
+ # MacOS specific files
41
+ .DS_Store
42
+ # Windows specific files
43
+ Thumbs.db
44
+ # Logs
45
+ *.log
app.py CHANGED
@@ -275,7 +275,7 @@ def extract_step_metadata(file_path):
275
 
276
  # Extract FILE_DESCRIPTION
277
  desc_match = re.search(
278
- r"FILE_DESCRIPTION\s*\(\s*\((.*?)\),\s*\'(.*?)\'\);", content, re.DOTALL
279
  )
280
  if desc_match:
281
  metadata["Description"] = desc_match.group(1).replace("'", "")
@@ -320,11 +320,11 @@ def parse_3d_file(original_filepath: str):
320
  if original_filepath.endswith((".3dxml", ".3DXML")):
321
  meta = extract_header_from_3dxml(original_filepath)
322
  text = dict_to_markdown(meta)
323
- return f"Parsed metadata: {text}"
324
  elif original_filepath.endswith((".step", ".STEP")):
325
  meta = extract_step_metadata(original_filepath)
326
  text = dict_to_markdown(meta)
327
- return f"Parsed metadata: {text}"
328
  logger.warning(f"No metadata found in the file {original_filepath}")
329
  return "No metadata found!"
330
 
@@ -332,9 +332,10 @@ def parse_3d_file(original_filepath: str):
332
  def render_3D_metadata(
333
  original_filepath: str, obj_path: str, embedding_dict: dict
334
  ) -> Tuple[str, str]:
335
- return parse_3d_file(original_filepath=original_filepath), embedding_dict.get(
336
- obj_path, {}
337
- ).get("description", "No description found!")
 
338
 
339
 
340
  #######################################################################################################################
@@ -470,12 +471,7 @@ async def embedding_3d_object(obj_path: str) -> Dict[str, Any]:
470
  image_embedding = await aget_image_embedding_from_np_image(
471
  np_image=aggregated_image
472
  )
473
- text_embedding = await text_embedding_model.aget_text_embedding(text=description)
474
- return {
475
- "description": description,
476
- "image_embedding": image_embedding,
477
- "text_embedding": text_embedding,
478
- }
479
 
480
 
481
  BASE_SAMPLE_DIR = "/Users/tridoan/Spartan/Datum/service-ai/poc/3D/gradio_cache/"
@@ -505,11 +501,20 @@ async def accumulate_and_embedding(input_files, file_list, embedding_dict):
505
  logger.info("Processing new upload file:", file_path)
506
  obj_path = convert_to_obj(file_path)
507
  embeddings = await embedding_3d_object(obj_path)
 
508
  if obj_path not in embedding_dict:
509
  embedding_dict[obj_path] = {}
 
 
 
 
 
 
 
 
510
  embedding_dict[obj_path]["description"] = embeddings["description"]
511
  embedding_dict[obj_path]["image_embedding"] = embeddings["image_embedding"]
512
- embedding_dict[obj_path]["text_embedding"] = embeddings["text_embedding"]
513
 
514
  return all_files, gr.update(choices=all_files), embedding_dict
515
 
@@ -679,4 +684,4 @@ with gr.Blocks() as demo:
679
  )
680
 
681
  if __name__ == "__main__":
682
- demo.launch(share=True)
 
275
 
276
  # Extract FILE_DESCRIPTION
277
  desc_match = re.search(
278
+ r"FILE_DESCRIPTION\s*\(\s*\((.*?)\),\s*'([^']*)'\);", content, re.DOTALL
279
  )
280
  if desc_match:
281
  metadata["Description"] = desc_match.group(1).replace("'", "")
 
320
  if original_filepath.endswith((".3dxml", ".3DXML")):
321
  meta = extract_header_from_3dxml(original_filepath)
322
  text = dict_to_markdown(meta)
323
+ return text
324
  elif original_filepath.endswith((".step", ".STEP")):
325
  meta = extract_step_metadata(original_filepath)
326
  text = dict_to_markdown(meta)
327
+ return text
328
  logger.warning(f"No metadata found in the file {original_filepath}")
329
  return "No metadata found!"
330
 
 
332
  def render_3D_metadata(
333
  original_filepath: str, obj_path: str, embedding_dict: dict
334
  ) -> Tuple[str, str]:
335
+ return (
336
+ embedding_dict.get(obj_path, {}).get("metadata", "No metadata found!"),
337
+ embedding_dict.get(obj_path, {}).get("description", "No description found!"),
338
+ )
339
 
340
 
341
  #######################################################################################################################
 
471
  image_embedding = await aget_image_embedding_from_np_image(
472
  np_image=aggregated_image
473
  )
474
+ return {"description": description, "image_embedding": image_embedding}
 
 
 
 
 
475
 
476
 
477
  BASE_SAMPLE_DIR = "/Users/tridoan/Spartan/Datum/service-ai/poc/3D/gradio_cache/"
 
501
  logger.info("Processing new upload file:", file_path)
502
  obj_path = convert_to_obj(file_path)
503
  embeddings = await embedding_3d_object(obj_path)
504
+ metadata = parse_3d_file(original_filepath=file_path)
505
  if obj_path not in embedding_dict:
506
  embedding_dict[obj_path] = {}
507
+ text_embedding = await text_embedding_model.aget_text_embedding(
508
+ text="The 3D object is: "
509
+ + embeddings["description"]
510
+ + f".\n {'n' * 20}\nMetadata: "
511
+ + metadata
512
+ )
513
+ # store embeddings and metadata
514
+ embedding_dict[obj_path]["metadata"] = metadata
515
  embedding_dict[obj_path]["description"] = embeddings["description"]
516
  embedding_dict[obj_path]["image_embedding"] = embeddings["image_embedding"]
517
+ embedding_dict[obj_path]["text_embedding"] = text_embedding
518
 
519
  return all_files, gr.update(choices=all_files), embedding_dict
520
 
 
684
  )
685
 
686
  if __name__ == "__main__":
687
+ demo.launch(share=True, debug=True)