import streamlit as st import nltk from nltk.corpus import stopwords from nltk.cluster.util import cosine_distance import numpy as np import networkx as nx from googletrans import Translator # Download NLTK resources nltk.download('punkt') nltk.download('stopwords') def read_article(article): sentences = nltk.sent_tokenize(article) sentences = [sentence for sentence in sentences if len(sentence) > 10] # filter very short sentences return sentences def sentence_similarity(sent1, sent2, stopwords): words1 = nltk.word_tokenize(sent1) words2 = nltk.word_tokenize(sent2) words1 = [word.lower() for word in words1 if word.isalnum()] words2 = [word.lower() for word in words2 if word.isalnum()] all_words = list(set(words1 + words2)) vector1 = [0] * len(all_words) vector2 = [0] * len(all_words) for word in words1: if word in stopwords: continue vector1[all_words.index(word)] += 1 for word in words2: if word in stopwords: continue vector2[all_words.index(word)] += 1 return 1 - cosine_distance(vector1, vector2) def build_similarity_matrix(sentences, stopwords): similarity_matrix = np.zeros((len(sentences), len(sentences))) for i in range(len(sentences)): for j in range(len(sentences)): if i == j: continue similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords) return similarity_matrix def generate_summary(article, top_n=5): sentences = read_article(article) stop_words = set(stopwords.words('english')) sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words) sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix) scores = nx.pagerank(sentence_similarity_graph) ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True) summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]]) return summary # Set page configuration and styles st.set_page_config(page_title="Article Summarizer", page_icon="✍️") st.title("We sumerize and translate your articles") st.markdown( """ """, unsafe_allow_html=True, ) # Input text area user_article = st.text_area("✒️ Enter your article here:", height=100) # Translation options translate = st.checkbox("🌐 Translate Summary") if translate: target_language = st.selectbox("🌎 Select Target Language", ["🇺🇸 English", "🇫🇷 French", "🇪🇸 Spanish", "🇩🇪 German"]) # Summarize button if st.button("Summarize"): if user_article: summary = generate_summary(user_article) st.subheader("📄 Summary:") st.write(summary) # Translation logic if translate: if target_language == "🇺🇸 English": target_language_code = "en" elif target_language == "🇫🇷 French": target_language_code = "fr" elif target_language == "🇪🇸 Spanish": target_language_code = "es" elif target_language == "🇩🇪 German": target_language_code = "de" translator = Translator() translated_summary = translator.translate(summary, dest=target_language_code) st.subheader("🌐 Translated Summary:") st.write(translated_summary.text) else: st.warning("🚫 Please enter an article to summarize.")