murtaza2801 commited on
Commit
ad80a31
·
verified ·
1 Parent(s): be615af

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +90 -0
  2. utils.py +145 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ nltk.download('punkt_tab')
3
+ nltk.download('averaged_perceptron_tagger_eng')
4
+ import streamlit as st
5
+ from streamlit_lottie import st_lottie
6
+ import requests
7
+ import time
8
+ from utils import (
9
+ get_bing_news_articles,
10
+ analyze_sentiment,
11
+ extract_topics,
12
+ comparative_analysis,
13
+ convert_text_to_hindi_tts,
14
+ )
15
+ from collections import Counter
16
+ # Load Lottie Animation
17
+ def load_lottie_url(url):
18
+ r = requests.get(url)
19
+ if r.status_code != 200:
20
+ return None
21
+ return r.json()
22
+
23
+ lottie_animation = load_lottie_url("https://lottie.host/d02e4bd8-cd9c-401e-b143-17fc0ad924a8/o2dLZzU9oO.json")
24
+
25
+ # UI Layout
26
+ st_lottie(lottie_animation, height=200)
27
+ st.markdown("<h1 style='text-align: center; color: #4CAF50;'>Sentiment Analysis Dashboard</h1>", unsafe_allow_html=True)
28
+
29
+
30
+ st.title("News Summarization & Sentiment Analysis with Hindi TTS")
31
+ st.write("Enter a company name to fetch news articles, analyze sentiment, and generate a Hindi summary.")
32
+
33
+ company = st.text_input("Company Name", "Enter Any Company Name")
34
+
35
+ if st.button("Generate Report"):
36
+ with st.spinner("Fetching news articles..."):
37
+ articles = get_bing_news_articles(company, num_articles=10)
38
+
39
+ if not articles:
40
+ st.error("No articles found or there was an error fetching the articles.")
41
+ else:
42
+ # Process each article: perform sentiment analysis.
43
+ for article in articles:
44
+ combined_text = article["title"]
45
+ if article["summary"]:
46
+ combined_text += ". " + article["summary"]
47
+ sentiment, scores = analyze_sentiment(combined_text)
48
+ article["sentiment"] = sentiment
49
+ article["sentiment_scores"] = scores
50
+ # Topics are still extracted but not used in the final summary.
51
+ article["topics"] = extract_topics(combined_text)
52
+ time.sleep(0.5)
53
+
54
+ # Display individual article details.
55
+ st.subheader("Extracted Articles")
56
+ for idx, article in enumerate(articles, start=1):
57
+ st.markdown(f"**Article {idx}:**")
58
+ st.write("Title:", article["title"])
59
+ st.write("Summary:", article["summary"])
60
+ st.write("Source:", article["source"])
61
+ st.write("URL:", article["url"])
62
+ st.write("Sentiment:", article["sentiment"])
63
+ st.markdown("---")
64
+
65
+ # Perform comparative analysis for internal metrics (sentiment distribution, coverage insights)
66
+ analysis = comparative_analysis(articles)
67
+ st.subheader("Comparative Analysis")
68
+ st.write("**Sentiment Distribution:**", analysis["Sentiment Distribution"])
69
+ st.write("**Coverage Differences:**", analysis["Coverage Differences"])
70
+
71
+ # Create a final Hindi summary report that aggregates all the articles.
72
+ total_articles = len(articles)
73
+ dist = analysis["Sentiment Distribution"]
74
+ final_summary = (
75
+ f"कुल {total_articles} लेखों में से, {dist.get('Positive', 0)} लेख सकारात्मक, "
76
+ f"{dist.get('Negative', 0)} लेख नकारात्मक, और {dist.get('Neutral', 0)} लेख तटस्थ हैं।\n"
77
+ "कई लेखों में विक्रय में वृद्धि और आर्थिक विकास पर जोर दिया गया है, जबकि कुछ लेखों में नियामकीय चुनौतियाँ और कानूनी मुद्दों पर चर्चा की गई है।\n"
78
+ "संपूर्ण रूप से, यह रिपोर्ट दर्शाती है कि कंपनी का समाचार कवरेज मुख्य रूप से सकारात्मक है, "
79
+ "जो संभावित आर्थिक विकास के संकेत देता है।"
80
+ )
81
+
82
+ st.subheader("Final Summary Report")
83
+ st.markdown(final_summary)
84
+
85
+ # Convert the final summary into Hindi speech.
86
+ with st.spinner("Generating Hindi TTS audio..."):
87
+ audio_file = convert_text_to_hindi_tts(final_summary, output_file="tesla_summary_hi.mp3")
88
+
89
+ st.success("Audio summary generated!")
90
+ st.audio(audio_file)
utils.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ nltk.download('punkt') # Download the required resource
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ import time
7
+ from nltk.sentiment import SentimentIntensityAnalyzer
8
+ from nltk.tokenize import word_tokenize
9
+ from nltk.corpus import stopwords
10
+ from collections import Counter
11
+ from gtts import gTTS
12
+ import os
13
+ import platform
14
+
15
+ # Download required NLTK data files.
16
+ nltk.download('vader_lexicon')
17
+
18
+ nltk.download('averaged_perceptron_tagger')
19
+ nltk.download('stopwords')
20
+
21
+ def get_bing_news_articles(company_name, num_articles=10):
22
+ """
23
+ Scrapes Bing News search results for a given company name.
24
+ """
25
+ query = company_name.replace(" ", "+")
26
+ url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6"
27
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
28
+ response = requests.get(url, headers=headers)
29
+ if response.status_code != 200:
30
+ return []
31
+ soup = BeautifulSoup(response.text, "html.parser")
32
+ articles = []
33
+ news_cards = soup.find_all("div", class_="news-card")
34
+ for card in news_cards:
35
+ title_tag = card.find("a", class_="title")
36
+ if not title_tag:
37
+ continue
38
+ title = title_tag.get_text(strip=True)
39
+ article_url = title_tag.get("href")
40
+ snippet_tag = card.find("div", class_="snippet")
41
+ snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
42
+ source_tag = card.find("div", class_="source")
43
+ source = source_tag.get_text(strip=True) if source_tag else ""
44
+ articles.append({
45
+ "title": title,
46
+ "summary": snippet,
47
+ "url": article_url,
48
+ "source": source
49
+ })
50
+ if len(articles) >= num_articles:
51
+ break
52
+ return articles
53
+
54
+ def analyze_sentiment(text):
55
+ """
56
+ Analyzes the sentiment of the given text using NLTK's VADER.
57
+ Returns:
58
+ sentiment (str): "Positive", "Negative", or "Neutral"
59
+ scores (dict): The full set of polarity scores.
60
+ """
61
+ sia = SentimentIntensityAnalyzer()
62
+ scores = sia.polarity_scores(text)
63
+ compound = scores["compound"]
64
+ if compound >= 0.05:
65
+ sentiment = "Positive"
66
+ elif compound <= -0.05:
67
+ sentiment = "Negative"
68
+ else:
69
+ sentiment = "Neutral"
70
+ return sentiment, scores
71
+
72
+ def extract_topics(text):
73
+ """
74
+ Extracts topics from the input text using basic noun extraction.
75
+ Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns.
76
+ """
77
+ text = text.lower()
78
+ tokens = word_tokenize(text)
79
+ stop_words = set(stopwords.words("english"))
80
+ filtered = [word for word in tokens if word.isalpha() and word not in stop_words]
81
+ tagged = nltk.pos_tag(filtered)
82
+ nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]]
83
+ return list(set(nouns))
84
+
85
+ def comparative_analysis(articles):
86
+ """
87
+ Performs comparative analysis across articles.
88
+ """
89
+ sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
90
+ sales_keywords = {"sales", "growth", "record", "profit"}
91
+ regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"}
92
+ sales_count = 0
93
+ reg_count = 0
94
+ all_topics = []
95
+ for article in articles:
96
+ sentiment = article.get("sentiment", "Neutral")
97
+ sentiment_distribution[sentiment] += 1
98
+ combined_text = f"{article['title']} {article['summary']}".lower()
99
+ if any(keyword in combined_text for keyword in sales_keywords):
100
+ sales_count += 1
101
+ if any(keyword in combined_text for keyword in regulatory_keywords):
102
+ reg_count += 1
103
+ topics = extract_topics(combined_text)
104
+ article["topics"] = topics
105
+ all_topics.extend(topics)
106
+ if sales_count > reg_count:
107
+ coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).")
108
+ elif reg_count > sales_count:
109
+ coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).")
110
+ else:
111
+ coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).")
112
+ topic_counter = Counter(all_topics)
113
+ common_topics = [topic for topic, count in topic_counter.items() if count > 1]
114
+ unique_topics = {}
115
+ for i, article in enumerate(articles, start=1):
116
+ unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1]
117
+ unique_topics[f"Article {i}"] = unique
118
+ analysis = {
119
+ "Sentiment Distribution": sentiment_distribution,
120
+ "Coverage Differences": coverage_insight,
121
+ "Topic Overlap": {
122
+ "Common Topics": common_topics,
123
+ "Unique Topics": unique_topics
124
+ }
125
+ }
126
+ return analysis
127
+
128
+ def convert_text_to_hindi_tts(text, output_file="output.mp3"):
129
+ """
130
+ Converts the input text into Hindi speech using gTTS and saves it as an MP3 file.
131
+ """
132
+ tts = gTTS(text=text, lang='hi', slow=False)
133
+ tts.save(output_file)
134
+ return output_file
135
+
136
+ def play_audio(file_path):
137
+ """
138
+ Plays an audio file using the system's default media player.
139
+ """
140
+ if platform.system() == "Windows":
141
+ os.startfile(file_path)
142
+ elif platform.system() == "Darwin":
143
+ os.system(f"open {file_path}")
144
+ else:
145
+ os.system(f"mpg123 {file_path}")