murtaza2801's picture
Upload 2 files
ad80a31 verified
import nltk
nltk.download('punkt') # Download the required resource
import requests
from bs4 import BeautifulSoup
import time
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
from gtts import gTTS
import os
import platform
# Download required NLTK data files.
nltk.download('vader_lexicon')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
def get_bing_news_articles(company_name, num_articles=10):
"""
Scrapes Bing News search results for a given company name.
"""
query = company_name.replace(" ", "+")
url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
response = requests.get(url, headers=headers)
if response.status_code != 200:
return []
soup = BeautifulSoup(response.text, "html.parser")
articles = []
news_cards = soup.find_all("div", class_="news-card")
for card in news_cards:
title_tag = card.find("a", class_="title")
if not title_tag:
continue
title = title_tag.get_text(strip=True)
article_url = title_tag.get("href")
snippet_tag = card.find("div", class_="snippet")
snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
source_tag = card.find("div", class_="source")
source = source_tag.get_text(strip=True) if source_tag else ""
articles.append({
"title": title,
"summary": snippet,
"url": article_url,
"source": source
})
if len(articles) >= num_articles:
break
return articles
def analyze_sentiment(text):
"""
Analyzes the sentiment of the given text using NLTK's VADER.
Returns:
sentiment (str): "Positive", "Negative", or "Neutral"
scores (dict): The full set of polarity scores.
"""
sia = SentimentIntensityAnalyzer()
scores = sia.polarity_scores(text)
compound = scores["compound"]
if compound >= 0.05:
sentiment = "Positive"
elif compound <= -0.05:
sentiment = "Negative"
else:
sentiment = "Neutral"
return sentiment, scores
def extract_topics(text):
"""
Extracts topics from the input text using basic noun extraction.
Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns.
"""
text = text.lower()
tokens = word_tokenize(text)
stop_words = set(stopwords.words("english"))
filtered = [word for word in tokens if word.isalpha() and word not in stop_words]
tagged = nltk.pos_tag(filtered)
nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]]
return list(set(nouns))
def comparative_analysis(articles):
"""
Performs comparative analysis across articles.
"""
sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
sales_keywords = {"sales", "growth", "record", "profit"}
regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"}
sales_count = 0
reg_count = 0
all_topics = []
for article in articles:
sentiment = article.get("sentiment", "Neutral")
sentiment_distribution[sentiment] += 1
combined_text = f"{article['title']} {article['summary']}".lower()
if any(keyword in combined_text for keyword in sales_keywords):
sales_count += 1
if any(keyword in combined_text for keyword in regulatory_keywords):
reg_count += 1
topics = extract_topics(combined_text)
article["topics"] = topics
all_topics.extend(topics)
if sales_count > reg_count:
coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).")
elif reg_count > sales_count:
coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).")
else:
coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).")
topic_counter = Counter(all_topics)
common_topics = [topic for topic, count in topic_counter.items() if count > 1]
unique_topics = {}
for i, article in enumerate(articles, start=1):
unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1]
unique_topics[f"Article {i}"] = unique
analysis = {
"Sentiment Distribution": sentiment_distribution,
"Coverage Differences": coverage_insight,
"Topic Overlap": {
"Common Topics": common_topics,
"Unique Topics": unique_topics
}
}
return analysis
def convert_text_to_hindi_tts(text, output_file="output.mp3"):
"""
Converts the input text into Hindi speech using gTTS and saves it as an MP3 file.
"""
tts = gTTS(text=text, lang='hi', slow=False)
tts.save(output_file)
return output_file
def play_audio(file_path):
"""
Plays an audio file using the system's default media player.
"""
if platform.system() == "Windows":
os.startfile(file_path)
elif platform.system() == "Darwin":
os.system(f"open {file_path}")
else:
os.system(f"mpg123 {file_path}")