dashi6174 commited on
Commit
c46aa24
·
1 Parent(s): 9d66cd6

Support for code files parse (#789)

Browse files

### What problem does this PR solve?

_Briefly describe what this PR aims to solve. Include background context
that will help reviewers understand the purpose of the PR._

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Files changed (2) hide show
  1. api/utils/file_utils.py +1 -1
  2. rag/app/naive.py +1 -1
api/utils/file_utils.py CHANGED
@@ -156,7 +156,7 @@ def filename_type(filename):
156
  return FileType.PDF.value
157
 
158
  if re.match(
159
- r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md)$", filename):
160
  return FileType.DOC.value
161
 
162
  if re.match(
 
156
  return FileType.PDF.value
157
 
158
  if re.match(
159
+ r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename):
160
  return FileType.DOC.value
161
 
162
  if re.match(
rag/app/naive.py CHANGED
@@ -136,7 +136,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
136
  excel_parser = ExcelParser()
137
  sections = [(excel_parser.html(binary), "")]
138
 
139
- elif re.search(r"\.(txt|md)$", filename, re.IGNORECASE):
140
  callback(0.1, "Start to parse.")
141
  txt = ""
142
  if binary:
 
136
  excel_parser = ExcelParser()
137
  sections = [(excel_parser.html(binary), "")]
138
 
139
+ elif re.search(r"\.(txt|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename, re.IGNORECASE):
140
  callback(0.1, "Start to parse.")
141
  txt = ""
142
  if binary: