HF-LLM-Intent-Detection / src /E_Summarization.py
georgeek's picture
Transfer
5ecde30
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
def summarize_text(text, max_length=150, min_length=30, num_beams=4):
# Load the model and tokenizer
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
model = MBartForConditionalGeneration.from_pretrained(model_name)
# Ensure max_length and min_length are integers
max_length = int(max_length)
min_length = int(min_length)
num_beams = int(num_beams)
# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
# Generate the summary
summary_ids = model.generate(
inputs["input_ids"],
max_length=max_length,
min_length=min_length,
num_beams=num_beams,
length_penalty=2.0,
early_stopping=True
)
# Decode the summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
# Simple summarization function
def simple_summarize_text(text):
from transformers import pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
summary_text = summarizer(text, max_length=50, min_length=30, do_sample=False)[0]['summary_text']
return summary_text
# Example text to summarize
#user_text = 'Cat o sa mai astept sa imi deblocati cartela ca nu pot vorbi in Spania si toti prietenii mei asteapta sa ii sun de sarbatori. Deci cand rezolvati problema mea cu cartela?'
#model_name = "facebook/mbart-large-cc25"
def example_summarize_text(model_name, text):
# Model for multi-language summarization
summary = summarize_text(model_name, text)
print("Summary:", summary)
#example_summarize_text(model_name, user_text)
#simple_summarize_text(user_text)