Paula Leonova
commited on
Commit
·
71541e6
1
Parent(s):
32dc2d3
Update summarization to use BART Large CNN
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ ex_long_text = example_long_text_load()
|
|
22 |
######## App Description ##########
|
23 |
###################################
|
24 |
st.markdown("### Long Text Summarization & Multi-Label Classification")
|
25 |
-
st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli). The keywords are generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT).")
|
26 |
st.write("__Inputs__: User enters their own custom text(s) and labels.")
|
27 |
st.write("__Outputs__: A summary of the text, likelihood match score for each label and a downloadable csv of the results. \
|
28 |
Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
|
@@ -246,7 +246,7 @@ if submit_button or example_button:
|
|
246 |
# for key in text_chunks_lib:
|
247 |
summary = []
|
248 |
for num_chunk, text_chunk in enumerate(text_chunks_lib[key]):
|
249 |
-
chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens=
|
250 |
summary.append(chunk_summary)
|
251 |
|
252 |
st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
|
|
|
22 |
######## App Description ##########
|
23 |
###################################
|
24 |
st.markdown("### Long Text Summarization & Multi-Label Classification")
|
25 |
+
st.write("This app summarizes and then classifies your long text(s) with multiple labels using [BART Large CNN](https://huggingface.co/facebook/bart-large-cnn) for the summarization task and [BART Large MNLI](https://huggingface.co/facebook/bart-large-mnli) for the multi-labels matching. The keywords are independently generated using [KeyBERT](https://github.com/MaartenGr/KeyBERT) and not used in any downstream tasks.")
|
26 |
st.write("__Inputs__: User enters their own custom text(s) and labels.")
|
27 |
st.write("__Outputs__: A summary of the text, likelihood match score for each label and a downloadable csv of the results. \
|
28 |
Includes additional options to generate a list of keywords and/or evaluate results against a list of ground truth labels, if available.")
|
|
|
246 |
# for key in text_chunks_lib:
|
247 |
summary = []
|
248 |
for num_chunk, text_chunk in enumerate(text_chunks_lib[key]):
|
249 |
+
chunk_summary = md.summarizer_gen(summarizer, sequence=text_chunk, maximum_tokens=400, minimum_tokens=100)
|
250 |
summary.append(chunk_summary)
|
251 |
|
252 |
st.markdown(f"###### Original Text Chunk {num_chunk+1}/{len(text_chunks)}" )
|
models.py
CHANGED
@@ -50,7 +50,7 @@ def keyword_gen(kw_model, sequence:str):
|
|
50 |
# Reference: https://huggingface.co/facebook/bart-large-mnli
|
51 |
@st.cache(allow_output_mutation=True)
|
52 |
def load_summary_model():
|
53 |
-
model_name = "facebook/bart-large-
|
54 |
summarizer = pipeline(task='summarization', model=model_name)
|
55 |
return summarizer
|
56 |
|
|
|
50 |
# Reference: https://huggingface.co/facebook/bart-large-mnli
|
51 |
@st.cache(allow_output_mutation=True)
|
52 |
def load_summary_model():
|
53 |
+
model_name = "facebook/bart-large-cnn"
|
54 |
summarizer = pipeline(task='summarization', model=model_name)
|
55 |
return summarizer
|
56 |
|