|
import streamlit as st |
|
import nltk |
|
from nltk.corpus import stopwords |
|
from nltk.cluster.util import cosine_distance |
|
import numpy as np |
|
import networkx as nx |
|
from googletrans import Translator |
|
|
|
|
|
nltk.download('punkt') |
|
nltk.download('stopwords') |
|
|
|
def read_article(article): |
|
sentences = nltk.sent_tokenize(article) |
|
sentences = [sentence for sentence in sentences if len(sentence) > 10] |
|
return sentences |
|
|
|
def sentence_similarity(sent1, sent2, stopwords): |
|
words1 = nltk.word_tokenize(sent1) |
|
words2 = nltk.word_tokenize(sent2) |
|
|
|
words1 = [word.lower() for word in words1 if word.isalnum()] |
|
words2 = [word.lower() for word in words2 if word.isalnum()] |
|
|
|
all_words = list(set(words1 + words2)) |
|
|
|
vector1 = [0] * len(all_words) |
|
vector2 = [0] * len(all_words) |
|
|
|
for word in words1: |
|
if word in stopwords: |
|
continue |
|
vector1[all_words.index(word)] += 1 |
|
|
|
for word in words2: |
|
if word in stopwords: |
|
continue |
|
vector2[all_words.index(word)] += 1 |
|
|
|
return 1 - cosine_distance(vector1, vector2) |
|
|
|
def build_similarity_matrix(sentences, stopwords): |
|
similarity_matrix = np.zeros((len(sentences), len(sentences))) |
|
|
|
for i in range(len(sentences)): |
|
for j in range(len(sentences)): |
|
if i == j: |
|
continue |
|
similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords) |
|
|
|
return similarity_matrix |
|
|
|
def generate_summary(article, top_n=5): |
|
sentences = read_article(article) |
|
stop_words = set(stopwords.words('english')) |
|
sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words) |
|
|
|
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix) |
|
|
|
scores = nx.pagerank(sentence_similarity_graph) |
|
|
|
ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True) |
|
|
|
summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]]) |
|
return summary |
|
|
|
|
|
st.set_page_config(page_title="Article Summarizer", page_icon="βοΈ") |
|
st.title("We sumerize and translate your articles") |
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.stApp { |
|
background-color: orange; |
|
color: #333; |
|
} |
|
.stButton button { |
|
background-color: black; |
|
color: white; |
|
border-radius: 5px; |
|
} |
|
.stTextInput input { |
|
color: #333; |
|
border: 1px solid #008CBA; |
|
border-radius: 5px; |
|
} |
|
.stText { |
|
color: #333; |
|
} |
|
</style> |
|
""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
user_article = st.text_area("βοΈ Enter your article here:", height=100) |
|
|
|
|
|
translate = st.checkbox("π Translate Summary") |
|
if translate: |
|
target_language = st.selectbox("π Select Target Language", ["πΊπΈ English", "π«π· French", "πͺπΈ Spanish", "π©πͺ German"]) |
|
|
|
|
|
if st.button("Summarize"): |
|
if user_article: |
|
summary = generate_summary(user_article) |
|
st.subheader("π Summary:") |
|
st.write(summary) |
|
|
|
|
|
if translate: |
|
if target_language == "πΊπΈ English": |
|
target_language_code = "en" |
|
elif target_language == "π«π· French": |
|
target_language_code = "fr" |
|
elif target_language == "πͺπΈ Spanish": |
|
target_language_code = "es" |
|
elif target_language == "π©πͺ German": |
|
target_language_code = "de" |
|
|
|
translator = Translator() |
|
translated_summary = translator.translate(summary, dest=target_language_code) |
|
st.subheader("π Translated Summary:") |
|
st.write(translated_summary.text) |
|
else: |
|
st.warning("π« Please enter an article to summarize.") |