File size: 3,599 Bytes
8a890e9
c13fd5d
 
 
 
 
d9f1d02
8a890e9
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
aab69e8
c13fd5d
 
 
 
f0c7269
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0c7269
946ddbf
aab69e8
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a890e9
 
c13fd5d
8a890e9
c13fd5d
d9f1d02
aab69e8
8a890e9
 
c13fd5d
 
8a890e9
 
d9f1d02
 
4a3dc04
 
 
 
 
 
 
 
 
 
 
aab69e8
4a3dc04
8a890e9
aab69e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
from googletrans import Translator

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Function to read and preprocess the article
def read_article(article):
    sentences = nltk.sent_tokenize(article)
    sentences = [sentence for sentence in sentences if len(sentence) > 10]  # Filter out very short sentences
    return sentences

# Function to compute sentence similarity based on cosine similarity
def sentence_similarity(sent1, sent2, stopwords):
    words1 = nltk.word_tokenize(sent1)
    words2 = nltk.word_tokenize (sent2)

    words1 = [word.lower() for word in words1 if word.isalnum()]
    words2 = [word.lower() for word in words2 if word.isalnum()]

    all_words = list(set(words1 + words2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)

    for word in words1:
        if word in stopwords:
            continue
        vector1[all_words.index(word)] += 1

    for word in words2:
        if word in stopwords:
            continue
        vector2[all_words.index(word)] += 1

    return 1 - cosine_distance(vector1, vector2)

# Function to create a similarity matrix of sentences
def build_similarity_matrix(sentences, stopwords):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))


    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i == j:  # Skip comparing a sentence to itself
                continue
            similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)

    return similarity_matrix

# Function to generate the article summary
def generate_summary(article, top_n=5):
    sentences = read_article(article)
    stop_words = set(stopwords.words('english'))
    sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)

    # Create a graph from the similarity matrix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)

    # Use the PageRank algorithm to rank the sentences
    scores = nx.pagerank(sentence_similarity_graph)

    # Sort the sentences by score
    ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)

    # Get the top N sentences as the summary
    summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
    return summary

# Streamlit web app
st.title("Article Summarizer")
user_article = st.text_area("Enter your article here:")
translate = st.checkbox("Translate Summary")
target_language = st.selectbox("Select Target Language", ["English", "French", "Spanish", "German"])

if st.button("Summarize"):
    if user_article:
        summary = generate_summary(user_article)
        st.subheader("Summary:")
        st.write(summary)
        
        if translate:
            if target_language == "English":
                target_language_code = "en"
            elif target_language == "French":
                target_language_code = "fr"
            elif target_language == "Spanish":
                target_language_code = "es"
            elif target_language == "German":
                target_language_code = "de"
            
            translator = Translator()
            translated_summary = translator.translate(summary, dest=target_language_code)
            st.subheader("Translated Summary:")
            st.write(translated_summary.text)
    else:
        st.warning("Please enter an article to summarize.")