olivercareyncl's picture
Create app.py
78036fd verified
raw
history blame
2.87 kB
import gradio as gr
from transformers import pipeline
import networkx as nx
import numpy as np
import re
import nltk
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Download NLTK data
nltk.download('punkt')
# Load Transformer model for abstractive summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Extractive summarization using TextRank
def extractive_summarization(text, num_sentences=3):
sentences = sent_tokenize(text)
if len(sentences) <= num_sentences:
return text # If text is short, return as is
vectorizer = TfidfVectorizer(stop_words="english")
sentence_vectors = vectorizer.fit_transform(sentences)
similarity_matrix = cosine_similarity(sentence_vectors)
graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(graph)
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
return " ".join([s for _, s in ranked_sentences[:num_sentences]])
# Abstractive summarization using BART
def abstractive_summarization(text, length):
if len(text.split()) < 30:
return "Text is too short for summarization."
max_length = {"short": 50, "medium": 100, "long": 150}[length]
summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text']
return summary
# Main function
def summarize_text(text, method, length):
if method == "Abstractive (BART)":
return abstractive_summarization(text, length)
else:
num_sentences = {"short": 2, "medium": 4, "long": 6}[length]
return extractive_summarization(text, num_sentences)
# Function to process file upload
def process_file(file):
return file.read().decode("utf-8")
# UI with Gradio
with gr.Blocks(theme=gr.themes.Soft()) as iface:
gr.Markdown("# πŸ“„ AI-Powered Text Summarizer")
gr.Markdown("Summarize long articles, news, and research papers using advanced NLP models.")
with gr.Row():
method_choice = gr.Radio(["Abstractive (BART)", "Extractive (TextRank)"], label="Summarization Type", value="Abstractive (BART)")
length_choice = gr.Radio(["short", "medium", "long"], label="Summary Length", value="medium")
text_input = gr.Textbox(lines=8, placeholder="Paste long text here...", label="Input Text")
file_input = gr.File(label="Or Upload a .txt file")
summarize_button = gr.Button("Summarize ✨")
summary_output = gr.Textbox(lines=6, label="Summarized Text", interactive=False)
file_input.change(process_file, inputs=file_input, outputs=text_input)
summarize_button.click(summarize_text, inputs=[text_input, method_choice, length_choice], outputs=summary_output)
# Launch app
if __name__ == "__main__":
iface.launch()