Paula Leonova
commited on
Commit
·
c5fa7c2
1
Parent(s):
eb0efc1
Add expanders for uploading files
Browse files
app.py
CHANGED
@@ -36,33 +36,52 @@ else:
|
|
36 |
input_glabels = ''
|
37 |
|
38 |
|
39 |
-
|
40 |
with st.form(key='my_form'):
|
41 |
-
|
42 |
-
|
43 |
-
('Free form text', 'CSV')
|
44 |
-
)
|
45 |
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
gen_keywords = st.radio(
|
53 |
-
"Generate keywords from text?",
|
54 |
-
('Yes', 'No')
|
55 |
-
)
|
56 |
|
57 |
if text_input == display_text and display_text != '':
|
58 |
text_input = example_text
|
59 |
|
|
|
|
|
|
|
60 |
labels = st.text_input('Enter possible topic labels, which can be either keywords and/or general themes (comma-separated):',input_labels, max_chars=1000)
|
61 |
labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
glabels = st.text_input('If available, enter ground truth topic labels to evaluate results, otherwise leave blank (comma-separated):',input_glabels, max_chars=1000)
|
64 |
glabels = list(set([x.strip() for x in glabels.strip().split(',') if len(x.strip()) > 0]))
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
threshold_value = st.slider(
|
67 |
'Select a threshold cutoff for matching percentage (used for ground truth label evaluation)',
|
68 |
0.0, 1.0, (0.5))
|
@@ -122,7 +141,7 @@ if submit_button or example_button:
|
|
122 |
summary = []
|
123 |
|
124 |
st.markdown("_Once the original text is broken into smaller chunks (totaling no more than 1024 tokens, \
|
125 |
-
with complete
|
126 |
and then combined at the very end to generate the final summary._")
|
127 |
|
128 |
for num_chunk, text_chunk in enumerate(text_chunks):
|
|
|
36 |
input_glabels = ''
|
37 |
|
38 |
|
|
|
39 |
with st.form(key='my_form'):
|
40 |
+
st.markdown("##### Step 1: Upload Text")
|
41 |
+
text_input = st.text_area("Input any text you want to summarize & classify here (keep in mind very long text will take a while to process):", display_text)
|
|
|
|
|
42 |
|
43 |
+
text_csv_expander = st.expander(label=f'Want to upload multiple texts at once? Expand to upload your text files below.', expanded=False)
|
44 |
+
with text_csv_expander:
|
45 |
+
uploaded_text_file = st.file_uploader(label="Upload file(s) that end with the .txt suffix",
|
46 |
+
accept_multiple_files=True,
|
47 |
+
type = 'txt')
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
if text_input == display_text and display_text != '':
|
50 |
text_input = example_text
|
51 |
|
52 |
+
|
53 |
+
st.text("\n\n\n")
|
54 |
+
st.markdown("##### Step 2: Enter Labels")
|
55 |
labels = st.text_input('Enter possible topic labels, which can be either keywords and/or general themes (comma-separated):',input_labels, max_chars=1000)
|
56 |
labels = list(set([x.strip() for x in labels.strip().split(',') if len(x.strip()) > 0]))
|
57 |
+
|
58 |
+
labels_csv_expander = st.expander(label=f'Prefer to upload a list of labels instead? Click here to upload your CSV file.',expanded=False)
|
59 |
+
with labels_csv_expander:
|
60 |
+
uploaded_labels_file = st.file_uploader("Choose a CSV file with one column and no header, where each cell is a separate label",
|
61 |
+
key='labels_uploader')
|
62 |
+
|
63 |
+
gen_keywords = st.radio(
|
64 |
+
"Generate keywords from text (independent from the above labels)?",
|
65 |
+
('Yes', 'No')
|
66 |
+
)
|
67 |
+
|
68 |
+
st.text("\n\n\n")
|
69 |
+
st.markdown("##### Step 3: Provide Ground Truth Labels (_Optional_)")
|
70 |
glabels = st.text_input('If available, enter ground truth topic labels to evaluate results, otherwise leave blank (comma-separated):',input_glabels, max_chars=1000)
|
71 |
glabels = list(set([x.strip() for x in glabels.strip().split(',') if len(x.strip()) > 0]))
|
72 |
|
73 |
+
|
74 |
+
glabels_csv_expander = st.expander(label=f'Have a file with labels for the text? Click here to upload your CSV file.', expanded=False)
|
75 |
+
with glabels_csv_expander:
|
76 |
+
st.write("Option A:")
|
77 |
+
uploaded_onetext_glabels_file = st.file_uploader("Choose a CSV file with one column and no header, where each cell is a separate label",
|
78 |
+
key = 'onetext_glabels_uplaoder')
|
79 |
+
st.write("Option B:")
|
80 |
+
uploaded_multitext_glabels_file = st.file_uploader('Choose a CSV file with two columns "title" and "label", with the cells in the title column matching the name of the files uploaded in step #1.',
|
81 |
+
key = 'multitext_glabels_uplaoder')
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
threshold_value = st.slider(
|
86 |
'Select a threshold cutoff for matching percentage (used for ground truth label evaluation)',
|
87 |
0.0, 1.0, (0.5))
|
|
|
141 |
summary = []
|
142 |
|
143 |
st.markdown("_Once the original text is broken into smaller chunks (totaling no more than 1024 tokens, \
|
144 |
+
with complete sentences), each block of text is then summarized separately using BART NLI \
|
145 |
and then combined at the very end to generate the final summary._")
|
146 |
|
147 |
for num_chunk, text_chunk in enumerate(text_chunks):
|