timeki commited on
Commit
3fbe6fe
·
1 Parent(s): da3f47f

Update logging.py

Browse files
Files changed (1) hide show
  1. climateqa/logging.py +4 -4
climateqa/logging.py CHANGED
@@ -8,9 +8,9 @@ import pandas as pd
8
  import io
9
  from typing import TypedDict, List
10
  from climateqa.constants import DOCUMENT_METADATA_DEFAULT_VALUES
 
11
 
12
-
13
- def serialize_docs(docs:list)->list:
14
  """Convert document objects to a simplified format compatible with Hugging Face datasets.
15
 
16
  This function processes document objects by extracting their page content and metadata,
@@ -27,13 +27,13 @@ def serialize_docs(docs:list)->list:
27
  for doc in docs:
28
  # Make sure we have a clean doc format
29
  new_doc = {
30
- "page_content": doc.get("page_content", ""),
31
  "metadata": {}
32
  }
33
 
34
  # Ensure all metadata fields exist with defaults if missing
35
  for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
36
- new_value = (doc.get("metadata", {}).get(field, default_value))
37
  try:
38
  new_doc["metadata"][field] = type(default_value)(new_value)
39
  except:
 
8
  import io
9
  from typing import TypedDict, List
10
  from climateqa.constants import DOCUMENT_METADATA_DEFAULT_VALUES
11
+ from langchain_core.documents import Document
12
 
13
+ def serialize_docs(docs:list[Document])->list:
 
14
  """Convert document objects to a simplified format compatible with Hugging Face datasets.
15
 
16
  This function processes document objects by extracting their page content and metadata,
 
27
  for doc in docs:
28
  # Make sure we have a clean doc format
29
  new_doc = {
30
+ "page_content": doc.page_content,
31
  "metadata": {}
32
  }
33
 
34
  # Ensure all metadata fields exist with defaults if missing
35
  for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
36
+ new_value = doc.metadata.get(field, default_value)
37
  try:
38
  new_doc["metadata"][field] = type(default_value)(new_value)
39
  except: