m-ric HF staff commited on
Commit
eb84d7e
β€’
1 Parent(s): 9501bef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -4,20 +4,29 @@ from langchain.text_splitter import (
4
  RecursiveCharacterTextSplitter,
5
  )
6
 
 
 
 
7
  def extract_separators_from_string(separator_str):
8
  try:
9
  separators = separators_str[1:-1].split(", ")
10
  return [separator.replace('"', "").replace("'", "") for separator in separators]
11
- except:
 
12
  raise gr.Error(f"""
13
  Did not succeed in extracting seperators from string: {separator_str}.
14
  Please type it in the correct format: "['separator_1', 'separator_2', etc]"
15
  """)
 
 
 
 
 
16
 
17
  def chunk(text, length, splitter_selection, separators_str):
18
  separators = extract_separators_from_string(separators_str)
19
 
20
- if splitter_selection == "LangChain's CharacterTextSplitter":
21
  text_splitter = CharacterTextSplitter(
22
  separator="",
23
  chunk_size=length,
@@ -27,7 +36,7 @@ def chunk(text, length, splitter_selection, separators_str):
27
  )
28
  splits = text_splitter.create_documents([text])
29
  text_splits = [split.page_content for split in splits]
30
- elif splitter_selection == "Langchain's RecursiveCharacterTextSplitter":
31
  text_splitter = RecursiveCharacterTextSplitter(
32
  chunk_size=length,
33
  chunk_overlap=0,
@@ -71,12 +80,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
71
  with gr.Row():
72
  split_selection = gr.Dropdown(
73
  choices=[
74
- "LangChain's CharacterTextSplitter",
75
- "Langchain's RecursiveCharacterTextSplitter",
76
  ],
77
- value="LangChain's CharacterTextSplitter",
78
  label="Chunking method ",
79
- info="How should we split our chunks?",
80
  )
81
  separator_selection = gr.Textbox(
82
  value=["\n\n", "\n", ".", " ", ""],
 
4
  RecursiveCharacterTextSplitter,
5
  )
6
 
7
+ LABEL_TEXTSPLITTER = "LangChain's CharacterTextSplitter"
8
+ LABEL_RECURSIVE = "Langchain's RecursiveCharacterTextSplitter"
9
+
10
  def extract_separators_from_string(separator_str):
11
  try:
12
  separators = separators_str[1:-1].split(", ")
13
  return [separator.replace('"', "").replace("'", "") for separator in separators]
14
+ except Exception as e:
15
+ print(e)
16
  raise gr.Error(f"""
17
  Did not succeed in extracting seperators from string: {separator_str}.
18
  Please type it in the correct format: "['separator_1', 'separator_2', etc]"
19
  """)
20
+
21
+ def change_split_selection(text, slider_count, split_selection, separator_selection):
22
+ separator_selection.update(interactive=(split_selection==LABEL_RECURSIVE))
23
+ return chunk(text, slider_count, split_selection, separator_selection)
24
+
25
 
26
  def chunk(text, length, splitter_selection, separators_str):
27
  separators = extract_separators_from_string(separators_str)
28
 
29
+ if splitter_selection == LABEL_TEXTSPLITTER:
30
  text_splitter = CharacterTextSplitter(
31
  separator="",
32
  chunk_size=length,
 
36
  )
37
  splits = text_splitter.create_documents([text])
38
  text_splits = [split.page_content for split in splits]
39
+ elif splitter_selection == LABEL_RECURSIVE:
40
  text_splitter = RecursiveCharacterTextSplitter(
41
  chunk_size=length,
42
  chunk_overlap=0,
 
80
  with gr.Row():
81
  split_selection = gr.Dropdown(
82
  choices=[
83
+ LABEL_TEXTSPLITTER,
84
+ LABEL_RECURSIVE,
85
  ],
86
+ value=LABEL_TEXTSPLITTER,
87
  label="Chunking method ",
 
88
  )
89
  separator_selection = gr.Textbox(
90
  value=["\n\n", "\n", ".", " ", ""],