brian-challenge / fill_in_summary.py
Christian Koch
fix missing import, some cleanup
cc3c391
raw
history blame
2.19 kB
from transformers import pipeline
class FillInSummary:
"""Organizing Summarization and Subsequent Fill-In-Task."""
def __init__(self):
"""Initialize Class with Summarizer and NER-Model."""
# Refer to https://huggingface.co/docs/transformers/v4.18.0/en/main_classes/pipelines#transformers.SummarizationPipeline
# for further information about configuration.
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Using default model: https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english
self.ner = ner= pipeline("ner", aggregation_strategy='simple')
def summarize(self, text: str) -> str:
"""Summarize given Text.
Parameter
---------
text : str
Text to be summarized. Must not exceeds BART's maximal input length.
Returns
-------
str
Summary
"""
# Refer to https://huggingface.co/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig
# for further configuration of of the
output: list = self.summarizer(
text,
max_length=400,
min_length=100,
do_sample=False)
return output[0]['summary_text']
def blank_ne_out(self, text: str) -> dict():
"""Blank out named entities.
Transforms 'X did this.' to {
'text': '_ did this',
'ner': [{
'end': 1,
'entity_group': 'ORG',
'score': 0.73085225,
'start': 0,
'word': 'X'
}]}
Parameter
---------
text : str
Summarized text.
Returns
-------
dict
Entails blanked out text and recognized named entity list.
"""
ner_list: list = self.ner(text)
output_str: str = text
for start, end in map(lambda e : (e['start'], e['end']), ner_list):
length: int = end - start
output_str = output_str[0:start] + ("_" * length) + output_str[end::]
return {
'text': output_str,
'ner': ner_list
}