Kevin Hu
commited on
Commit
·
03a3e60
1
Parent(s):
d624a4e
trival (#2650)
Browse files### What problem does this PR solve?
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
deepdoc/parser/txt_parser.py
CHANGED
@@ -10,13 +10,13 @@
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
-
from deepdoc.parser.utils import
|
14 |
from rag.nlp import num_tokens_from_string
|
15 |
|
16 |
|
17 |
class RAGFlowTxtParser:
|
18 |
def __call__(self, fnm, binary=None, chunk_token_num=128, delimiter="\n!?;。;!?"):
|
19 |
-
txt =
|
20 |
return self.parser_txt(txt, chunk_token_num, delimiter)
|
21 |
|
22 |
@classmethod
|
|
|
10 |
# See the License for the specific language governing permissions and
|
11 |
# limitations under the License.
|
12 |
#
|
13 |
+
from deepdoc.parser.utils import get_text
|
14 |
from rag.nlp import num_tokens_from_string
|
15 |
|
16 |
|
17 |
class RAGFlowTxtParser:
|
18 |
def __call__(self, fnm, binary=None, chunk_token_num=128, delimiter="\n!?;。;!?"):
|
19 |
+
txt = get_text(fnm, binary)
|
20 |
return self.parser_txt(txt, chunk_token_num, delimiter)
|
21 |
|
22 |
@classmethod
|