balibabu commited on
Commit
0b0058a
·
1 Parent(s): aaf24a6

feat: Configurable for excel, html table or row based text #2516 (#2538)

Browse files

### What problem does this PR solve?

feat: Configurable for excel, html table or row based text #2516

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

web/src/components/chunk-method-modal/index.tsx CHANGED
@@ -24,6 +24,7 @@ import { useFetchParserListOnMount } from './hooks';
24
  import { useTranslate } from '@/hooks/common-hooks';
25
  import Delimiter from '../delimiter';
26
  import EntityTypesItem from '../entity-types-item';
 
27
  import LayoutRecognize from '../layout-recognize';
28
  import ParseConfiguration, {
29
  showRaptorParseConfiguration,
@@ -104,6 +105,9 @@ const ChunkMethodModal: React.FC<IProps> = ({
104
 
105
  const showEntityTypes = selectedTag === 'knowledge_graph';
106
 
 
 
 
107
  const afterClose = () => {
108
  form.resetFields();
109
  };
@@ -279,6 +283,7 @@ const ChunkMethodModal: React.FC<IProps> = ({
279
  <Delimiter></Delimiter>
280
  </>
281
  )}
 
282
  {showRaptorParseConfiguration(selectedTag) && (
283
  <ParseConfiguration></ParseConfiguration>
284
  )}
 
24
  import { useTranslate } from '@/hooks/common-hooks';
25
  import Delimiter from '../delimiter';
26
  import EntityTypesItem from '../entity-types-item';
27
+ import ExcelToHtml from '../excel-to-html';
28
  import LayoutRecognize from '../layout-recognize';
29
  import ParseConfiguration, {
30
  showRaptorParseConfiguration,
 
105
 
106
  const showEntityTypes = selectedTag === 'knowledge_graph';
107
 
108
+ const showExcelToHtml =
109
+ selectedTag === 'naive' && documentExtension === 'xlsx';
110
+
111
  const afterClose = () => {
112
  form.resetFields();
113
  };
 
283
  <Delimiter></Delimiter>
284
  </>
285
  )}
286
+ {showExcelToHtml && <ExcelToHtml></ExcelToHtml>}
287
  {showRaptorParseConfiguration(selectedTag) && (
288
  <ParseConfiguration></ParseConfiguration>
289
  )}
web/src/components/excel-to-html.tsx ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useTranslate } from '@/hooks/common-hooks';
2
+ import { Form, Switch } from 'antd';
3
+
4
+ const ExcelToHtml = () => {
5
+ const { t } = useTranslate('knowledgeDetails');
6
+ return (
7
+ <Form.Item
8
+ name={['parser_config', 'html4excel']}
9
+ label={t('html4excel')}
10
+ initialValue={false}
11
+ valuePropName="checked"
12
+ tooltip={t('html4excelTip')}
13
+ >
14
+ <Switch />
15
+ </Form.Item>
16
+ );
17
+ };
18
+
19
+ export default ExcelToHtml;
web/src/locales/en.ts CHANGED
@@ -150,6 +150,8 @@ export default {
150
  topK: 'Top-K',
151
  topKTip: `K chunks will be fed into rerank models.`,
152
  delimiter: `Delimiter`,
 
 
153
  },
154
  knowledgeConfiguration: {
155
  titleDescription:
 
150
  topK: 'Top-K',
151
  topKTip: `K chunks will be fed into rerank models.`,
152
  delimiter: `Delimiter`,
153
+ html4excel: 'Excel to HTML',
154
+ html4excelTip: `Excel will be parsed into HTML table or not. If it's FALSE, every row in Excel will be formed as a chunk.`,
155
  },
156
  knowledgeConfiguration: {
157
  titleDescription:
web/src/locales/zh-traditional.ts CHANGED
@@ -146,6 +146,8 @@ export default {
146
  topK: 'Top-K',
147
  topKTip: `K塊將被送入Rerank型號。`,
148
  delimiter: `分段標識符`,
 
 
149
  },
150
  knowledgeConfiguration: {
151
  titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
 
146
  topK: 'Top-K',
147
  topKTip: `K塊將被送入Rerank型號。`,
148
  delimiter: `分段標識符`,
149
+ html4excel: '表格轉HTML',
150
+ html4excelTip: `Excel 是否會被解析為 HTML 表格。如果為 FALSE,Excel 中的每一行都會形成一個區塊。`,
151
  },
152
  knowledgeConfiguration: {
153
  titleDescription: '在這裡更新您的知識庫詳細信息,尤其是解析方法。',
web/src/locales/zh.ts CHANGED
@@ -147,6 +147,8 @@ export default {
147
  topK: 'Top-K',
148
  topKTip: `K块将被送入Rerank型号。`,
149
  delimiter: `分段标识符`,
 
 
150
  },
151
  knowledgeConfiguration: {
152
  titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
 
147
  topK: 'Top-K',
148
  topKTip: `K块将被送入Rerank型号。`,
149
  delimiter: `分段标识符`,
150
+ html4excel: '表格转HTML',
151
+ html4excelTip: `Excel 是否将被解析为 HTML 表。如果为 FALSE,Excel 中的每一行都将形成一个块。`,
152
  },
153
  knowledgeConfiguration: {
154
  titleDescription: '在这里更新您的知识库详细信息,尤其是解析方法。',
web/src/pages/add-knowledge/components/knowledge-setting/configuration.tsx CHANGED
@@ -1,5 +1,6 @@
1
  import Delimiter from '@/components/delimiter';
2
  import EntityTypesItem from '@/components/entity-types-item';
 
3
  import LayoutRecognize from '@/components/layout-recognize';
4
  import MaxTokenNumber from '@/components/max-token-number';
5
  import ParseConfiguration, {
@@ -124,6 +125,7 @@ const ConfigurationForm = ({ form }: { form: FormInstance }) => {
124
  <MaxTokenNumber></MaxTokenNumber>
125
  <Delimiter></Delimiter>
126
  <LayoutRecognize></LayoutRecognize>
 
127
  </>
128
  )}
129
  {showRaptorParseConfiguration(parserId) && (
 
1
  import Delimiter from '@/components/delimiter';
2
  import EntityTypesItem from '@/components/entity-types-item';
3
+ import ExcelToHtml from '@/components/excel-to-html';
4
  import LayoutRecognize from '@/components/layout-recognize';
5
  import MaxTokenNumber from '@/components/max-token-number';
6
  import ParseConfiguration, {
 
125
  <MaxTokenNumber></MaxTokenNumber>
126
  <Delimiter></Delimiter>
127
  <LayoutRecognize></LayoutRecognize>
128
+ <ExcelToHtml></ExcelToHtml>
129
  </>
130
  )}
131
  {showRaptorParseConfiguration(parserId) && (