Commit
·
a11ffc5
1
Parent(s):
5461e28
Fix json file parse (#4004)
Browse files### What problem does this PR solve?
Fix json file parsing
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
---------
Signed-off-by: jinhai <[email protected]>
deepdoc/parser/json_parser.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
| 4 |
|
| 5 |
import json
|
| 6 |
from typing import Any
|
|
|
|
| 7 |
from rag.nlp import find_codec
|
| 8 |
class RAGFlowJsonParser:
|
| 9 |
def __init__(
|
|
@@ -53,7 +54,7 @@ class RAGFlowJsonParser:
|
|
| 53 |
|
| 54 |
def _json_split(
|
| 55 |
self,
|
| 56 |
-
data
|
| 57 |
current_path: list[str] | None,
|
| 58 |
chunks: list[dict] | None,
|
| 59 |
) -> list[dict]:
|
|
@@ -86,13 +87,14 @@ class RAGFlowJsonParser:
|
|
| 86 |
|
| 87 |
def split_json(
|
| 88 |
self,
|
| 89 |
-
json_data
|
| 90 |
convert_lists: bool = False,
|
| 91 |
) -> list[dict]:
|
| 92 |
"""Splits JSON into a list of JSON chunks"""
|
| 93 |
|
| 94 |
if convert_lists:
|
| 95 |
-
|
|
|
|
| 96 |
else:
|
| 97 |
chunks = self._json_split(json_data, None, None)
|
| 98 |
|
|
|
|
| 4 |
|
| 5 |
import json
|
| 6 |
from typing import Any
|
| 7 |
+
|
| 8 |
from rag.nlp import find_codec
|
| 9 |
class RAGFlowJsonParser:
|
| 10 |
def __init__(
|
|
|
|
| 54 |
|
| 55 |
def _json_split(
|
| 56 |
self,
|
| 57 |
+
data,
|
| 58 |
current_path: list[str] | None,
|
| 59 |
chunks: list[dict] | None,
|
| 60 |
) -> list[dict]:
|
|
|
|
| 87 |
|
| 88 |
def split_json(
|
| 89 |
self,
|
| 90 |
+
json_data,
|
| 91 |
convert_lists: bool = False,
|
| 92 |
) -> list[dict]:
|
| 93 |
"""Splits JSON into a list of JSON chunks"""
|
| 94 |
|
| 95 |
if convert_lists:
|
| 96 |
+
preprocessed_data = self._list_to_dict_preprocessing(json_data)
|
| 97 |
+
chunks = self._json_split(preprocessed_data, None, None)
|
| 98 |
else:
|
| 99 |
chunks = self._json_split(json_data, None, None)
|
| 100 |
|