import nltk nltk.download('punkt') # Download the required resource import requests from bs4 import BeautifulSoup import time from nltk.sentiment import SentimentIntensityAnalyzer from nltk.tokenize import word_tokenize from nltk.corpus import stopwords from collections import Counter from gtts import gTTS import os import platform # Download required NLTK data files. nltk.download('vader_lexicon') nltk.download('averaged_perceptron_tagger') nltk.download('stopwords') def get_bing_news_articles(company_name, num_articles=10): """ Scrapes Bing News search results for a given company name. """ query = company_name.replace(" ", "+") url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6" headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"} response = requests.get(url, headers=headers) if response.status_code != 200: return [] soup = BeautifulSoup(response.text, "html.parser") articles = [] news_cards = soup.find_all("div", class_="news-card") for card in news_cards: title_tag = card.find("a", class_="title") if not title_tag: continue title = title_tag.get_text(strip=True) article_url = title_tag.get("href") snippet_tag = card.find("div", class_="snippet") snippet = snippet_tag.get_text(strip=True) if snippet_tag else "" source_tag = card.find("div", class_="source") source = source_tag.get_text(strip=True) if source_tag else "" articles.append({ "title": title, "summary": snippet, "url": article_url, "source": source }) if len(articles) >= num_articles: break return articles def analyze_sentiment(text): """ Analyzes the sentiment of the given text using NLTK's VADER. Returns: sentiment (str): "Positive", "Negative", or "Neutral" scores (dict): The full set of polarity scores. """ sia = SentimentIntensityAnalyzer() scores = sia.polarity_scores(text) compound = scores["compound"] if compound >= 0.05: sentiment = "Positive" elif compound <= -0.05: sentiment = "Negative" else: sentiment = "Neutral" return sentiment, scores def extract_topics(text): """ Extracts topics from the input text using basic noun extraction. Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns. """ text = text.lower() tokens = word_tokenize(text) stop_words = set(stopwords.words("english")) filtered = [word for word in tokens if word.isalpha() and word not in stop_words] tagged = nltk.pos_tag(filtered) nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]] return list(set(nouns)) def comparative_analysis(articles): """ Performs comparative analysis across articles. """ sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0} sales_keywords = {"sales", "growth", "record", "profit"} regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"} sales_count = 0 reg_count = 0 all_topics = [] for article in articles: sentiment = article.get("sentiment", "Neutral") sentiment_distribution[sentiment] += 1 combined_text = f"{article['title']} {article['summary']}".lower() if any(keyword in combined_text for keyword in sales_keywords): sales_count += 1 if any(keyword in combined_text for keyword in regulatory_keywords): reg_count += 1 topics = extract_topics(combined_text) article["topics"] = topics all_topics.extend(topics) if sales_count > reg_count: coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).") elif reg_count > sales_count: coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).") else: coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).") topic_counter = Counter(all_topics) common_topics = [topic for topic, count in topic_counter.items() if count > 1] unique_topics = {} for i, article in enumerate(articles, start=1): unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1] unique_topics[f"Article {i}"] = unique analysis = { "Sentiment Distribution": sentiment_distribution, "Coverage Differences": coverage_insight, "Topic Overlap": { "Common Topics": common_topics, "Unique Topics": unique_topics } } return analysis def convert_text_to_hindi_tts(text, output_file="output.mp3"): """ Converts the input text into Hindi speech using gTTS and saves it as an MP3 file. """ tts = gTTS(text=text, lang='hi', slow=False) tts.save(output_file) return output_file def play_audio(file_path): """ Plays an audio file using the system's default media player. """ if platform.system() == "Windows": os.startfile(file_path) elif platform.system() == "Darwin": os.system(f"open {file_path}") else: os.system(f"mpg123 {file_path}")