File size: 4,051 Bytes
8a890e9 c13fd5d d9f1d02 8a890e9 c13fd5d 9682bdc c13fd5d c85d7fb c13fd5d ded9c3a aab69e8 c13fd5d 5f1623c c13fd5d 8a890e9 9682bdc 8a890e9 9682bdc d9f1d02 8de644b 8a890e9 c13fd5d 8a890e9 9682bdc d9f1d02 4a3dc04 9682bdc 4a3dc04 8de644b 4a3dc04 8a890e9 9682bdc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
from googletrans import Translator
# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
# Function to read and preprocess the article
def read_article(article):
sentences = nltk.sent_tokenize(article)
sentences = [sentence for sentence in sentences if len(sentence) > 10] # Filter out very short sentences
return sentences
# Function to compute sentence similarity based on cosine similarity
def sentence_similarity(sent1, sent2, stopwords):
words1 = nltk.word_tokenize(sent1)
words2 = nltk.word_tokenize(sent2)
words1 = [word.lower() for word in words1 if word.isalnum()]
words2 = [word.lower() for word in words2 if word.isalnum()]
all_words = list(set(words1 + words2))
vector1 = [0] * len(all_words)
vector2 = [0] * len(all_words)
for word in words1:
if word in stopwords:
continue
vector1[all_words.index(word)] += 1
for word in words2:
if word in stopwords:
continue
vector2[all_words.index(word)] += 1
return 1 - cosine_distance(vector1, vector2)
# Function to create a similarity matrix of sentences
def build_similarity_matrix(sentences, stopwords):
similarity_matrix = np.zeros((len(sentences), len(sentences)))
for i in range(len(sentences)):
for j in range(len(sentences)):
if i == j: # Skip comparing a sentence to itself
continue
similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)
return similarity_matrix
# Function to generate the article summary
def generate_summary(article, top_n=5):
sentences = read_article(article)
stop_words = set(stopwords.words('english'))
sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)
# Create a graph from the similarity matrix
sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
# Use the PageRank algorithm to rank the sentences
scores = nx.pagerank(sentence_similarity_graph)
# Sort the sentences by score
ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)
# Get the top N sentences as the summary
summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
return summary
# Streamlit web app with improved styling
st.set_page_config(page_title="Article Summarizer", page_icon="π")
st.title("Article Summarizer")
# Custom CSS to style the app
st.markdown(
"""
<style>
.stApp {
background-color: #f7f7f7;
}
.stButton button {
background-color: #008CBA;
color: white;
}
.stTextInput input {
color: #333;
}
.stText {
color: #333;
}
</style>
""",
unsafe_allow_html=True,
)
user_article = st.text_area("Enter your article here:", height=200)
translate = st.checkbox("Translate Summary")
if translate:
target_language = st.selectbox("Select Target Language", ["English", "French", "Spanish", "German"])
if st.button("Summarize"):
if user_article:
summary = generate_summary(user_article)
st.subheader("Summary:")
st.write(summary)
if translate:
if target_language == "English":
target_language_code = "en"
elif target_language == "French":
target_language_code = "fr"
elif target_language == "Spanish":
target_language_code = "es"
elif target_language == "German":
target_language_code = "de"
translator = Translator()
translated_summary = translator.translate(summary, dest=target_language_code)
st.subheader("Translated Summary:")
st.write(translated_summary.text)
else:
st.warning("Please enter an article to summarize.")
|