Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -40,7 +40,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
|
|
40 |
if splitter_selection == LABEL_TEXTSPLITTER:
|
41 |
text_splitter = CharacterTextSplitter(
|
42 |
chunk_size=length,
|
43 |
-
chunk_overlap=
|
44 |
length_function=length_function,
|
45 |
strip_whitespace=False,
|
46 |
is_separator_regex=False,
|
@@ -49,7 +49,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
|
|
49 |
elif splitter_selection == LABEL_RECURSIVE:
|
50 |
text_splitter = RecursiveCharacterTextSplitter(
|
51 |
chunk_size=length,
|
52 |
-
chunk_overlap=
|
53 |
length_function=length_function,
|
54 |
strip_whitespace=False,
|
55 |
separators=separators,
|
@@ -59,7 +59,7 @@ def chunk(text, length, splitter_selection, separators_str, length_unit_selectio
|
|
59 |
|
60 |
unoverlapped_text_splits = unoverlap_list(text_splits)
|
61 |
|
62 |
-
output = [((split[0], 0) if split[1] else (split[0], str(i+1))) for i, split in enumerate(unoverlapped_text_splits)]
|
63 |
print(output)
|
64 |
return output
|
65 |
|
@@ -138,10 +138,10 @@ with gr.Blocks(theme=gr.themes.Soft(text_size='lg', font=["monospace"], primary_
|
|
138 |
info="How should we measure our chunk lengths?",
|
139 |
)
|
140 |
slider_count = gr.Slider(
|
141 |
-
20, 500, value=200, label="Chunk length π", info="In the chosen unit."
|
142 |
)
|
143 |
chunk_overlap = gr.Slider(
|
144 |
-
0, 30, value=10, label="Overlap between chunks", info="In the chosen unit."
|
145 |
)
|
146 |
out = gr.HighlightedText(
|
147 |
label="Output",
|
|
|
40 |
if splitter_selection == LABEL_TEXTSPLITTER:
|
41 |
text_splitter = CharacterTextSplitter(
|
42 |
chunk_size=length,
|
43 |
+
chunk_overlap=10,
|
44 |
length_function=length_function,
|
45 |
strip_whitespace=False,
|
46 |
is_separator_regex=False,
|
|
|
49 |
elif splitter_selection == LABEL_RECURSIVE:
|
50 |
text_splitter = RecursiveCharacterTextSplitter(
|
51 |
chunk_size=length,
|
52 |
+
chunk_overlap=10,
|
53 |
length_function=length_function,
|
54 |
strip_whitespace=False,
|
55 |
separators=separators,
|
|
|
59 |
|
60 |
unoverlapped_text_splits = unoverlap_list(text_splits)
|
61 |
|
62 |
+
output = [((split[0], '0') if split[1] else (split[0], str(i+1))) for i, split in enumerate(unoverlapped_text_splits)]
|
63 |
print(output)
|
64 |
return output
|
65 |
|
|
|
138 |
info="How should we measure our chunk lengths?",
|
139 |
)
|
140 |
slider_count = gr.Slider(
|
141 |
+
20, 500, value=200, step=1, label="Chunk length π", info="In the chosen unit."
|
142 |
)
|
143 |
chunk_overlap = gr.Slider(
|
144 |
+
0, 30, value=10, step=1, label="Overlap between chunks", info="In the chosen unit."
|
145 |
)
|
146 |
out = gr.HighlightedText(
|
147 |
label="Output",
|