File size: 5,581 Bytes
ad80a31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import nltk
nltk.download('punkt')  # Download the required resource

import requests
from bs4 import BeautifulSoup
import time
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
from gtts import gTTS
import os
import platform

# Download required NLTK data files.
nltk.download('vader_lexicon')

nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

def get_bing_news_articles(company_name, num_articles=10):
    """

    Scrapes Bing News search results for a given company name.

    """
    query = company_name.replace(" ", "+")
    url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return []
    soup = BeautifulSoup(response.text, "html.parser")
    articles = []
    news_cards = soup.find_all("div", class_="news-card")
    for card in news_cards:
        title_tag = card.find("a", class_="title")
        if not title_tag:
            continue
        title = title_tag.get_text(strip=True)
        article_url = title_tag.get("href")
        snippet_tag = card.find("div", class_="snippet")
        snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
        source_tag = card.find("div", class_="source")
        source = source_tag.get_text(strip=True) if source_tag else ""
        articles.append({
            "title": title,
            "summary": snippet,
            "url": article_url,
            "source": source
        })
        if len(articles) >= num_articles:
            break
    return articles

def analyze_sentiment(text):
    """

    Analyzes the sentiment of the given text using NLTK's VADER.

    Returns:

        sentiment (str): "Positive", "Negative", or "Neutral"

        scores (dict): The full set of polarity scores.

    """
    sia = SentimentIntensityAnalyzer()
    scores = sia.polarity_scores(text)
    compound = scores["compound"]
    if compound >= 0.05:
        sentiment = "Positive"
    elif compound <= -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
    return sentiment, scores

def extract_topics(text):
    """

    Extracts topics from the input text using basic noun extraction.

    Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns.

    """
    text = text.lower()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    filtered = [word for word in tokens if word.isalpha() and word not in stop_words]
    tagged = nltk.pos_tag(filtered)
    nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]]
    return list(set(nouns))

def comparative_analysis(articles):
    """

    Performs comparative analysis across articles.

    """
    sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
    sales_keywords = {"sales", "growth", "record", "profit"}
    regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"}
    sales_count = 0
    reg_count = 0
    all_topics = []
    for article in articles:
        sentiment = article.get("sentiment", "Neutral")
        sentiment_distribution[sentiment] += 1
        combined_text = f"{article['title']} {article['summary']}".lower()
        if any(keyword in combined_text for keyword in sales_keywords):
            sales_count += 1
        if any(keyword in combined_text for keyword in regulatory_keywords):
            reg_count += 1
        topics = extract_topics(combined_text)
        article["topics"] = topics
        all_topics.extend(topics)
    if sales_count > reg_count:
        coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).")
    elif reg_count > sales_count:
        coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).")
    else:
        coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).")
    topic_counter = Counter(all_topics)
    common_topics = [topic for topic, count in topic_counter.items() if count > 1]
    unique_topics = {}
    for i, article in enumerate(articles, start=1):
        unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1]
        unique_topics[f"Article {i}"] = unique
    analysis = {
        "Sentiment Distribution": sentiment_distribution,
        "Coverage Differences": coverage_insight,
        "Topic Overlap": {
            "Common Topics": common_topics,
            "Unique Topics": unique_topics
        }
    }
    return analysis

def convert_text_to_hindi_tts(text, output_file="output.mp3"):
    """

    Converts the input text into Hindi speech using gTTS and saves it as an MP3 file.

    """
    tts = gTTS(text=text, lang='hi', slow=False)
    tts.save(output_file)
    return output_file

def play_audio(file_path):
    """

    Plays an audio file using the system's default media player.

    """
    if platform.system() == "Windows":
        os.startfile(file_path)
    elif platform.system() == "Darwin":
        os.system(f"open {file_path}")
    else:
        os.system(f"mpg123 {file_path}")