Spaces:

Ahmad-Moiz
/

article-translate-summerize

Sleeping

File size: 3,902 Bytes

32dff32
c13fd5d
 
 
 
 
d9f1d02
8a890e9
1e7e8be
c13fd5d
 
 
 
 
9748ff4
c13fd5d
 
 
 
9682bdc
c13fd5d
 
 
 
c85d7fb
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e598b9
aab69e8
c13fd5d
 
b29e8af
c13fd5d
 
 
 
 
 
 
 
 
 
 
 
 
 
c6a64cf
c13fd5d
9748ff4
8a890e9
 
1e7e8be
e7a30ef
d50e4bc
9682bdc
 
 
 
 
d790243
1e7e8be
9682bdc
 
b47f89f
9682bdc
1e7e8be
9682bdc
 
 
1e7e8be
 
9682bdc
 
 
 
 
 
 
 
 
1e7e8be
d50e4bc
8de644b
1e7e8be
d50e4bc
8de644b
d50e4bc
8a890e9
1e7e8be
8a890e9
c13fd5d
 
d50e4bc
8a890e9
9682bdc
1e7e8be
d9f1d02
9a6d2b3
4a3dc04
9a6d2b3
4a3dc04
9a6d2b3
4a3dc04
9a6d2b3
4a3dc04
9682bdc
4a3dc04
 
d50e4bc
4a3dc04
8a890e9
d50e4bc

import streamlit as st
import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx
from googletrans import Translator

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

def read_article(article):
    sentences = nltk.sent_tokenize(article)
    sentences = [sentence for sentence in sentences if len(sentence) > 10]  # filter very short sentences
    return sentences

def sentence_similarity(sent1, sent2, stopwords):
    words1 = nltk.word_tokenize(sent1)
    words2 = nltk.word_tokenize(sent2)

    words1 = [word.lower() for word in words1 if word.isalnum()]
    words2 = [word.lower() for word in words2 if word.isalnum()]

    all_words = list(set(words1 + words2))

    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)

    for word in words1:
        if word in stopwords:
            continue
        vector1[all_words.index(word)] += 1

    for word in words2:
        if word in stopwords:
            continue
        vector2[all_words.index(word)] += 1

    return 1 - cosine_distance(vector1, vector2)

def build_similarity_matrix(sentences, stopwords):
    similarity_matrix = np.zeros((len(sentences), len(sentences)))

    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i == j:  
                continue
            similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)

    return similarity_matrix

def generate_summary(article, top_n=5):
    sentences = read_article(article)
    stop_words = set(stopwords.words('english'))
    sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)

    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)

    scores = nx.pagerank(sentence_similarity_graph)

    ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)

    summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
    return summary

# Set page configuration and styles
st.set_page_config(page_title="Article Summarizer", page_icon="✍️")
st.title("We sumerize and translate your articles")

st.markdown(
    """
    <style>
    .stApp {
        background-color: orange;
        color: #333;
    }
    .stButton button {
        background-color: black;
        color: white;
        border-radius: 5px;
    }
    .stTextInput input {
        color: #333;
        border: 1px solid #008CBA;
        border-radius: 5px;
    }
    .stText {
        color: #333;
    }
    </style>
    """,
    unsafe_allow_html=True,
)

# Input text area
user_article = st.text_area("✒️ Enter your article here:", height=100)

# Translation options
translate = st.checkbox("🌐 Translate Summary")
if translate:
    target_language = st.selectbox("🌎 Select Target Language", ["🇺🇸 English", "🇫🇷 French", "🇪🇸 Spanish", "🇩🇪 German"])

# Summarize button
if st.button("Summarize"):
    if user_article:
        summary = generate_summary(user_article)
        st.subheader("📄 Summary:")
        st.write(summary)

        # Translation logic
        if translate:
            if target_language == "🇺🇸 English":
                target_language_code = "en"
            elif target_language == "🇫🇷 French":
                target_language_code = "fr"
            elif target_language == "🇪🇸 Spanish":
                target_language_code = "es"
            elif target_language == "🇩🇪 German":
                target_language_code = "de"

            translator = Translator()
            translated_summary = translator.translate(summary, dest=target_language_code)
            st.subheader("🌐 Translated Summary:")
            st.write(translated_summary.text)
    else:
        st.warning("🚫 Please enter an article to summarize.")