File size: 4,051 Bytes
8a890e9
c13fd5d
 
 
 
 
d9f1d02
8a890e9
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
9682bdc
c13fd5d
 
 
 
c85d7fb
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ded9c3a
aab69e8
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f1623c
c13fd5d
 
 
8a890e9
 
9682bdc
 
8a890e9
9682bdc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9f1d02
8de644b
 
 
8a890e9
 
c13fd5d
 
8a890e9
 
9682bdc
d9f1d02
4a3dc04
 
 
 
 
 
 
 
9682bdc
4a3dc04
 
8de644b
4a3dc04
8a890e9
9682bdc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
from googletrans import Translator

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Function to read and preprocess the article
def read_article(article):
    sentences = nltk.sent_tokenize(article)
    sentences = [sentence for sentence in sentences if len(sentence) > 10]  # Filter out very short sentences
    return sentences

# Function to compute sentence similarity based on cosine similarity
def sentence_similarity(sent1, sent2, stopwords):
    words1 = nltk.word_tokenize(sent1)
    words2 = nltk.word_tokenize(sent2)

    words1 = [word.lower() for word in words1 if word.isalnum()]
    words2 = [word.lower() for word in words2 if word.isalnum()]

    all_words = list(set(words1 + words2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)

    for word in words1:
        if word in stopwords:
            continue
        vector1[all_words.index(word)] += 1

    for word in words2:
        if word in stopwords:
            continue
        vector2[all_words.index(word)] += 1

    return 1 - cosine_distance(vector1, vector2)

# Function to create a similarity matrix of sentences
def build_similarity_matrix(sentences, stopwords):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))

    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i == j:  # Skip comparing a sentence to itself
                continue
            similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)

    return similarity_matrix

# Function to generate the article summary
def generate_summary(article, top_n=5):
    sentences = read_article(article)
    stop_words = set(stopwords.words('english'))
    sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)

    # Create a graph from the similarity matrix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)

    # Use the PageRank algorithm to rank the sentences
    scores = nx.pagerank(sentence_similarity_graph)

    # Sort the sentences by score
    ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)

    # Get the top N sentences as the summary
    summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
    return summary

# Streamlit web app with improved styling
st.set_page_config(page_title="Article Summarizer", page_icon="πŸ“„")
st.title("Article Summarizer")

# Custom CSS to style the app
st.markdown(
    """
    <style>
    .stApp {
        background-color: #f7f7f7;
    }
    .stButton button {
        background-color: #008CBA;
        color: white;
    }
    .stTextInput input {
        color: #333;
    }
    .stText {
        color: #333;
    }
    </style>
    """,
    unsafe_allow_html=True,
)

user_article = st.text_area("Enter your article here:", height=200)
translate = st.checkbox("Translate Summary")

if translate:
    target_language = st.selectbox("Select Target Language", ["English", "French", "Spanish", "German"])

if st.button("Summarize"):
    if user_article:
        summary = generate_summary(user_article)
        st.subheader("Summary:")
        st.write(summary)

        if translate:
            if target_language == "English":
                target_language_code = "en"
            elif target_language == "French":
                target_language_code = "fr"
            elif target_language == "Spanish":
                target_language_code = "es"
            elif target_language == "German":
                target_language_code = "de"

            translator = Translator()
            translated_summary = translator.translate(summary, dest=target_language_code)
            st.subheader("Translated Summary:")
            st.write(translated_summary.text)
    else:
        st.warning("Please enter an article to summarize.")