import streamlit as st import nltk from nltk.corpus import stopwords from nltk.cluster.util import cosine_distance import numpy as np import networkx as nx from googletrans import Translator # Download NLTK resources nltk.download('punkt') nltk.download('stopwords') # Function to read and preprocess the article def read_article(article): sentences = nltk.sent_tokenize(article) sentences = [sentence for sentence in sentences if len(sentence) > 10] # Filter out very short sentences return sentences # Function to compute sentence similarity based on cosine similarity def sentence_similarity(sent1, sent2, stopwords): words1 = nltk.word_tokenize(sent1) words2 = nltk.word_tokenize (sent2) words1 = [word.lower() for word in words1 if word.isalnum()] words2 = [word.lower() for word in words2 if word.isalnum()] all_words = list(set(words1 + words2)) vector1 = [0] * len(all_words) vector2 = [0] * len(all_words) for word in words1: if word in stopwords: continue vector1[all_words.index(word)] += 1 for word in words2: if word in stopwords: continue vector2[all_words.index(word)] += 1 return 1 - cosine_distance(vector1, vector2) # Function to create a similarity matrix of sentences def build_similarity_matrix(sentences, stopwords): similarity_matrix = np.zeros((len(sentences), len(sentences))) for i in range(len(sentences)): for j in range(len(sentences)): if i == j: # Skip comparing a sentence to itself continue similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords) return similarity_matrix # Function to generate the article summary def generate_summary(article, top_n=5): sentences = read_article(article) stop_words = set(stopwords.words('english')) sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words) # Create a graph from the similarity matrix sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix) # Use the PageRank algorithm to rank the sentences scores = nx.pagerank(sentence_similarity_graph) # Sort the sentences by score ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True) # Get the top N sentences as the summary summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]]) return summary # Streamlit web app st.title("Article Summarizer") user_article = st.text_area("Enter your article here:") translate = st.checkbox("Translate Summary") target_language = st.selectbox("Select Target Language", ["English", "French", "Spanish", "German"]) if st.button("Summarize"): if user_article: summary = generate_summary(user_article) st.subheader("Summary:") st.write(summary) if translate: if target_language == "English": target_language_code = "en" elif target_language == "French": target_language_code = "fr" elif target_language == "Spanish": target_language_code = "es" elif target_language == "German": target_language_code = "de" translator = Translator() translated_summary = translator.translate(summary, dest=target_language_code) st.subheader("Translated Summary:") st.write(translated_summary.text) else: st.warning("Please enter an article to summarize.")