Kevin Hu
commited on
Commit
·
58ecd6d
1
Parent(s):
cf493b7
make excel parsing configurable (#2517)
Browse files### What problem does this PR solve?
#2516
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- rag/app/naive.py +4 -1
- rag/llm/chat_model.py +1 -0
rag/app/naive.py
CHANGED
|
@@ -221,7 +221,10 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
|
|
| 221 |
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
| 222 |
callback(0.1, "Start to parse.")
|
| 223 |
excel_parser = ExcelParser()
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
| 227 |
callback(0.1, "Start to parse.")
|
|
|
|
| 221 |
elif re.search(r"\.xlsx?$", filename, re.IGNORECASE):
|
| 222 |
callback(0.1, "Start to parse.")
|
| 223 |
excel_parser = ExcelParser()
|
| 224 |
+
if parser_config.get("html4excel"):
|
| 225 |
+
sections = [(l, "") for l in excel_parser.html(binary, 12) if l]
|
| 226 |
+
else:
|
| 227 |
+
sections = [(l, "") for l in excel_parser(binary) if l]
|
| 228 |
|
| 229 |
elif re.search(r"\.(txt|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt|sql)$", filename, re.IGNORECASE):
|
| 230 |
callback(0.1, "Start to parse.")
|
rag/llm/chat_model.py
CHANGED
|
@@ -689,6 +689,7 @@ class BedrockChat(Base):
|
|
| 689 |
|
| 690 |
yield num_tokens_from_string(ans)
|
| 691 |
|
|
|
|
| 692 |
class GeminiChat(Base):
|
| 693 |
|
| 694 |
def __init__(self, key, model_name,base_url=None):
|
|
|
|
| 689 |
|
| 690 |
yield num_tokens_from_string(ans)
|
| 691 |
|
| 692 |
+
|
| 693 |
class GeminiChat(Base):
|
| 694 |
|
| 695 |
def __init__(self, key, model_name,base_url=None):
|