Paula Leonova
commited on
Commit
·
89f1955
1
Parent(s):
44ef896
Update summary section to include multiple text inputs
Browse files
app.py
CHANGED
@@ -48,7 +48,7 @@ with st.form(key='my_form'):
|
|
48 |
accept_multiple_files=True, key = 'text_uploader',
|
49 |
type = 'txt')
|
50 |
st.write("__Option B:__")
|
51 |
-
uploaded_csv_text_files = st.file_uploader(label='Upload a CSV file with columns: "title" and "text"',
|
52 |
accept_multiple_files=False, key = 'csv_text_uploader',
|
53 |
type = 'csv')
|
54 |
|
@@ -145,7 +145,7 @@ if submit_button or example_button:
|
|
145 |
|
146 |
|
147 |
if len(text_input) != 0:
|
148 |
-
text_df = pd.DataFrame.from_dict({'title': ['
|
149 |
|
150 |
|
151 |
with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
|
@@ -185,6 +185,7 @@ if submit_button or example_button:
|
|
185 |
title_element = ['title']
|
186 |
kw_column_list = ['keyword', 'score']
|
187 |
kw_df = kw_df[kw_df['score'] > 0.25][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
|
|
|
188 |
st.dataframe(kw_df)
|
189 |
st.download_button(
|
190 |
label="Download data as CSV",
|
@@ -195,30 +196,38 @@ if submit_button or example_button:
|
|
195 |
|
196 |
|
197 |
st.markdown("### Summary")
|
198 |
-
with st.spinner(f'Generating summaries for {text_chunk_counter}
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
st.error('Enter some text and at least one possible topic to see label predictions.')
|
223 |
else:
|
224 |
st.markdown("### Top Label Predictions on Summary vs Full Text")
|
|
|
48 |
accept_multiple_files=True, key = 'text_uploader',
|
49 |
type = 'txt')
|
50 |
st.write("__Option B:__")
|
51 |
+
uploaded_csv_text_files = st.file_uploader(label='Upload a CSV file with two columns: "title" and "text"',
|
52 |
accept_multiple_files=False, key = 'csv_text_uploader',
|
53 |
type = 'csv')
|
54 |
|
|
|
145 |
|
146 |
|
147 |
if len(text_input) != 0:
|
148 |
+
text_df = pd.DataFrame.from_dict({'title': ['Submitted Text'], 'text': [text_input]})
|
149 |
|
150 |
|
151 |
with st.spinner('Breaking up text into more reasonable chunks (transformers cannot exceed a 1024 token max)...'):
|
|
|
185 |
title_element = ['title']
|
186 |
kw_column_list = ['keyword', 'score']
|
187 |
kw_df = kw_df[kw_df['score'] > 0.25][title_element + kw_column_list].sort_values(title_element + ['score'], ascending=False).reset_index().drop(columns='index')
|
188 |
+
|
189 |
st.dataframe(kw_df)
|
190 |
st.download_button(
|
191 |
label="Download data as CSV",
|
|
|
196 |
|
197 |
|
198 |
st.markdown("### Summary")
|
199 |
+
with st.spinner(f'Generating summaries for {len(text_df)} texts consisting of a total of {text_chunk_counter} chunks (this may take a minute)...'):
|
200 |
+
sum_dict = dict()
|
201 |
+
for i, key in enumerate(text_chunks_lib):
|
202 |
+
with st.expander(label=f'({i+1}/{len(text_df)}) Expand to see intermediate summary generation details for: {key}', expanded=False):
|
203 |
+
# for key in text_chunks_lib:
|
204 |
+
summary = []
|
205 |
+
for num_chunk, text_chunk in enumerate(text_chunks_lib[key]):
|
206 |
+
chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens=300, minimum_tokens=20)
|
207 |
+
summary.append(chunk_summary)
|
208 |
+
|
209 |
+
st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
|
210 |
+
st.markdown(text_chunk)
|
211 |
+
st.markdown(f"###### Partial Summary {num_chunk+1}/{len(text_chunks)}")
|
212 |
+
st.markdown(chunk_summary)
|
213 |
+
|
214 |
+
# Combine all the summaries into a list and compress into one document, again
|
215 |
+
final_summary = "\n\n".join(list(summary))
|
216 |
+
sum_dict[key] = [final_summary]
|
217 |
+
|
218 |
+
sum_df = pd.DataFrame.from_dict(sum_dict).reset_index().T
|
219 |
+
# sum_df.columns = ['title', 'summary_text']
|
220 |
+
|
221 |
+
st.dataframe(sum_df)
|
222 |
+
st.download_button(
|
223 |
+
label="Download data as CSV",
|
224 |
+
data=sum_df.to_csv().encode('utf-8'),
|
225 |
+
file_name='title_summary.csv',
|
226 |
+
mime='title_summary/csv',
|
227 |
+
)
|
228 |
+
|
229 |
+
if (len(text_input) == 0 or len(labels) == 0
|
230 |
+
or uploaded_labels_file is None or uploaded_text_files is None or uploaded_csv_text_files is None):
|
231 |
st.error('Enter some text and at least one possible topic to see label predictions.')
|
232 |
else:
|
233 |
st.markdown("### Top Label Predictions on Summary vs Full Text")
|