balibabu
commited on
Commit
·
0b0058a
1
Parent(s):
aaf24a6
feat: Configurable for excel, html table or row based text #2516 (#2538)
Browse files### What problem does this PR solve?
feat: Configurable for excel, html table or row based text #2516
### Type of change
- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):
web/src/components/chunk-method-modal/index.tsx
CHANGED
@@ -24,6 +24,7 @@ import { useFetchParserListOnMount } from './hooks';
|
|
24 |
import { useTranslate } from '@/hooks/common-hooks';
|
25 |
import Delimiter from '../delimiter';
|
26 |
import EntityTypesItem from '../entity-types-item';
|
|
|
27 |
import LayoutRecognize from '../layout-recognize';
|
28 |
import ParseConfiguration, {
|
29 |
showRaptorParseConfiguration,
|
@@ -104,6 +105,9 @@ const ChunkMethodModal: React.FC<IProps> = ({
|
|
104 |
|
105 |
const showEntityTypes = selectedTag === 'knowledge_graph';
|
106 |
|
|
|
|
|
|
|
107 |
const afterClose = () => {
|
108 |
form.resetFields();
|
109 |
};
|
@@ -279,6 +283,7 @@ const ChunkMethodModal: React.FC<IProps> = ({
|
|
279 |
<Delimiter></Delimiter>
|
280 |
</>
|
281 |
)}
|
|
|
282 |
{showRaptorParseConfiguration(selectedTag) && (
|
283 |
<ParseConfiguration></ParseConfiguration>
|
284 |
)}
|
|
|
24 |
import { useTranslate } from '@/hooks/common-hooks';
|
25 |
import Delimiter from '../delimiter';
|
26 |
import EntityTypesItem from '../entity-types-item';
|
27 |
+
import ExcelToHtml from '../excel-to-html';
|
28 |
import LayoutRecognize from '../layout-recognize';
|
29 |
import ParseConfiguration, {
|
30 |
showRaptorParseConfiguration,
|
|
|
105 |
|
106 |
const showEntityTypes = selectedTag === 'knowledge_graph';
|
107 |
|
108 |
+
const showExcelToHtml =
|
109 |
+
selectedTag === 'naive' && documentExtension === 'xlsx';
|
110 |
+
|
111 |
const afterClose = () => {
|
112 |
form.resetFields();
|
113 |
};
|
|
|
283 |
<Delimiter></Delimiter>
|
284 |
</>
|
285 |
)}
|
286 |
+
{showExcelToHtml && <ExcelToHtml></ExcelToHtml>}
|
287 |
{showRaptorParseConfiguration(selectedTag) && (
|
288 |
<ParseConfiguration></ParseConfiguration>
|
289 |
)}
|
web/src/components/excel-to-html.tsx
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { useTranslate } from '@/hooks/common-hooks';
|
2 |
+
import { Form, Switch } from 'antd';
|
3 |
+
|
4 |
+
const ExcelToHtml = () => {
|
5 |
+
const { t } = useTranslate('knowledgeDetails');
|
6 |
+
return (
|
7 |
+
<Form.Item
|
8 |
+
name={['parser_config', 'html4excel']}
|
9 |
+
label={t('html4excel')}
|
10 |
+
initialValue={false}
|
11 |
+
valuePropName="checked"
|
12 |
+
tooltip={t('html4excelTip')}
|
13 |
+
>
|
14 |
+
<Switch />
|
15 |
+
</Form.Item>
|
16 |
+
);
|
17 |
+
};
|
18 |
+
|
19 |
+
export default ExcelToHtml;
|
web/src/locales/en.ts
CHANGED
@@ -150,6 +150,8 @@ export default {
|
|
150 |
topK: 'Top-K',
|
151 |
topKTip: `K chunks will be fed into rerank models.`,
|
152 |
delimiter: `Delimiter`,
|
|
|
|
|
153 |
},
|
154 |
knowledgeConfiguration: {
|
155 |
titleDescription:
|
|
|
150 |
topK: 'Top-K',
|
151 |
topKTip: `K chunks will be fed into rerank models.`,
|
152 |
delimiter: `Delimiter`,
|
153 |
+
html4excel: 'Excel to HTML',
|
154 |
+
html4excelTip: `Excel will be parsed into HTML table or not. If it's FALSE, every row in Excel will be formed as a chunk.`,
|
155 |
},
|
156 |
knowledgeConfiguration: {
|
157 |
titleDescription:
|
web/src/locales/zh-traditional.ts
CHANGED
@@ -146,6 +146,8 @@ export default {
|
|
146 |
topK: 'Top-K',
|
147 |
topKTip: `K塊將被送入Rerank型號。`,
|
148 |
delimiter: `分段標識符`,
|
|
|
|
|
149 |
},
|
150 |
knowledgeConfiguration: {
|
151 |
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
|
|
|
146 |
topK: 'Top-K',
|
147 |
topKTip: `K塊將被送入Rerank型號。`,
|
148 |
delimiter: `分段標識符`,
|
149 |
+
html4excel: '表格轉HTML',
|
150 |
+
html4excelTip: `Excel 是否會被解析為 HTML 表格。如果為 FALSE,Excel 中的每一行都會形成一個區塊。`,
|
151 |
},
|
152 |
knowledgeConfiguration: {
|
153 |
titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
|
web/src/locales/zh.ts
CHANGED
@@ -147,6 +147,8 @@ export default {
|
|
147 |
topK: 'Top-K',
|
148 |
topKTip: `K块将被送入Rerank型号。`,
|
149 |
delimiter: `分段标识符`,
|
|
|
|
|
150 |
},
|
151 |
knowledgeConfiguration: {
|
152 |
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
|
|
|
147 |
topK: 'Top-K',
|
148 |
topKTip: `K块将被送入Rerank型号。`,
|
149 |
delimiter: `分段标识符`,
|
150 |
+
html4excel: '表格转HTML',
|
151 |
+
html4excelTip: `Excel 是否将被解析为 HTML 表。如果为 FALSE,Excel 中的每一行都将形成一个块。`,
|
152 |
},
|
153 |
knowledgeConfiguration: {
|
154 |
titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
|
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import Delimiter from '@/components/delimiter';
|
2 |
import EntityTypesItem from '@/components/entity-types-item';
|
|
|
3 |
import LayoutRecognize from '@/components/layout-recognize';
|
4 |
import MaxTokenNumber from '@/components/max-token-number';
|
5 |
import ParseConfiguration, {
|
@@ -124,6 +125,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
|
|
124 |
<MaxTokenNumber></MaxTokenNumber>
|
125 |
<Delimiter></Delimiter>
|
126 |
<LayoutRecognize></LayoutRecognize>
|
|
|
127 |
</>
|
128 |
)}
|
129 |
{showRaptorParseConfiguration(parserId) && (
|
|
|
1 |
import Delimiter from '@/components/delimiter';
|
2 |
import EntityTypesItem from '@/components/entity-types-item';
|
3 |
+
import ExcelToHtml from '@/components/excel-to-html';
|
4 |
import LayoutRecognize from '@/components/layout-recognize';
|
5 |
import MaxTokenNumber from '@/components/max-token-number';
|
6 |
import ParseConfiguration, {
|
|
|
125 |
<MaxTokenNumber></MaxTokenNumber>
|
126 |
<Delimiter></Delimiter>
|
127 |
<LayoutRecognize></LayoutRecognize>
|
128 |
+
<ExcelToHtml></ExcelToHtml>
|
129 |
</>
|
130 |
)}
|
131 |
{showRaptorParseConfiguration(parserId) && (
|