Ahmad-Moiz commited on
Commit
b29e8af
·
1 Parent(s): 9748ff4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -12
app.py CHANGED
@@ -6,17 +6,14 @@ import numpy as np
6
  import networkx as nx
7
  from googletrans import Translator
8
 
9
- # Download NLTK resources
10
  nltk.download('punkt')
11
  nltk.download('stopwords')
12
 
13
- # Function to read and preprocess the article
14
  def read_article(article):
15
  sentences = nltk.sent_tokenize(article)
16
  sentences = [sentence for sentence in sentences if len(sentence) > 10] # filter very short sentences
17
  return sentences
18
 
19
- # Function to compute sentence similarity based on cosine similarity
20
  def sentence_similarity(sent1, sent2, stopwords):
21
  words1 = nltk.word_tokenize(sent1)
22
  words2 = nltk.word_tokenize(sent2)
@@ -41,42 +38,34 @@ def sentence_similarity(sent1, sent2, stopwords):
41
 
42
  return 1 - cosine_distance(vector1, vector2)
43
 
44
- # Function to create a similarity matrix of sentences
45
  def build_similarity_matrix(sentences, stopwords):
46
  similarity_matrix = np.zeros((len(sentences), len(sentences)))
47
 
48
  for i in range(len(sentences)):
49
  for j in range(len(sentences)):
50
- if i == j: # Skip comparing a sentence to itself
51
  continue
52
  similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)
53
 
54
  return similarity_matrix
55
 
56
- # Function to generate the article summary
57
  def generate_summary(article, top_n=5):
58
  sentences = read_article(article)
59
  stop_words = set(stopwords.words('english'))
60
  sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)
61
 
62
- # Create a graph from the similarity matrix
63
  sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
64
 
65
- # Use the PageRank algorithm to rank the sentences
66
  scores = nx.pagerank(sentence_similarity_graph)
67
 
68
- # Sort the sentences by score
69
  ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)
70
 
71
- # Get the top N sentences as the summary
72
  summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
73
  return summary
74
 
75
- # Streamlit web app with improved styling and icons
76
  st.set_page_config(page_title="Article Summarizer", page_icon="✍️")
77
  st.title("Article Summarizer")
78
 
79
- # Custom CSS to style the app with icons
80
  st.markdown(
81
  """
82
  <style>
 
6
  import networkx as nx
7
  from googletrans import Translator
8
 
 
9
  nltk.download('punkt')
10
  nltk.download('stopwords')
11
 
 
12
  def read_article(article):
13
  sentences = nltk.sent_tokenize(article)
14
  sentences = [sentence for sentence in sentences if len(sentence) > 10] # filter very short sentences
15
  return sentences
16
 
 
17
  def sentence_similarity(sent1, sent2, stopwords):
18
  words1 = nltk.word_tokenize(sent1)
19
  words2 = nltk.word_tokenize(sent2)
 
38
 
39
  return 1 - cosine_distance(vector1, vector2)
40
 
 
41
  def build_similarity_matrix(sentences, stopwords):
42
  similarity_matrix = np.zeros((len(sentences), len(sentences)))
43
 
44
  for i in range(len(sentences)):
45
  for j in range(len(sentences)):
46
+ if i == j:
47
  continue
48
  similarity_matrix[i][j] = sentence_similarity(sentences[i], sentences[j], stopwords)
49
 
50
  return similarity_matrix
51
 
 
52
  def generate_summary(article, top_n=5):
53
  sentences = read_article(article)
54
  stop_words = set(stopwords.words('english'))
55
  sentence_similarity_matrix = build_similarity_matrix(sentences, stop_words)
56
 
 
57
  sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_matrix)
58
 
 
59
  scores = nx.pagerank(sentence_similarity_graph)
60
 
 
61
  ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(sentences)), reverse=True)
62
 
 
63
  summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
64
  return summary
65
 
 
66
  st.set_page_config(page_title="Article Summarizer", page_icon="✍️")
67
  st.title("Article Summarizer")
68
 
 
69
  st.markdown(
70
  """
71
  <style>