|
from transformers import MBartForConditionalGeneration, MBart50Tokenizer |
|
|
|
def summarize_text(text, max_length=150, min_length=30, num_beams=4): |
|
|
|
model_name = "facebook/mbart-large-50-many-to-many-mmt" |
|
tokenizer = MBart50Tokenizer.from_pretrained(model_name) |
|
model = MBartForConditionalGeneration.from_pretrained(model_name) |
|
|
|
|
|
max_length = int(max_length) |
|
min_length = int(min_length) |
|
num_beams = int(num_beams) |
|
|
|
|
|
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True) |
|
|
|
|
|
summary_ids = model.generate( |
|
inputs["input_ids"], |
|
max_length=max_length, |
|
min_length=min_length, |
|
num_beams=num_beams, |
|
length_penalty=2.0, |
|
early_stopping=True |
|
) |
|
|
|
|
|
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
|
return summary |
|
|
|
|
|
|
|
def simple_summarize_text(text): |
|
from transformers import pipeline |
|
|
|
summarizer = pipeline("summarization", model="Falconsai/text_summarization") |
|
summary_text = summarizer(text, max_length=50, min_length=30, do_sample=False)[0]['summary_text'] |
|
return summary_text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def example_summarize_text(model_name, text): |
|
|
|
summary = summarize_text(model_name, text) |
|
print("Summary:", summary) |
|
|
|
|
|
|
|
|
|
|