Spaces:

murtaza2801
/

Summarization_Audio

Sleeping

File size: 5,581 Bytes

ad80a31

import nltk
nltk.download('punkt')  # Download the required resource

import requests
from bs4 import BeautifulSoup
import time
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from collections import Counter
from gtts import gTTS
import os
import platform

# Download required NLTK data files.
nltk.download('vader_lexicon')

nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')

def get_bing_news_articles(company_name, num_articles=10):
    """

    Scrapes Bing News search results for a given company name.

    """
    query = company_name.replace(" ", "+")
    url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6"
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        return []
    soup = BeautifulSoup(response.text, "html.parser")
    articles = []
    news_cards = soup.find_all("div", class_="news-card")
    for card in news_cards:
        title_tag = card.find("a", class_="title")
        if not title_tag:
            continue
        title = title_tag.get_text(strip=True)
        article_url = title_tag.get("href")
        snippet_tag = card.find("div", class_="snippet")
        snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
        source_tag = card.find("div", class_="source")
        source = source_tag.get_text(strip=True) if source_tag else ""
        articles.append({
            "title": title,
            "summary": snippet,
            "url": article_url,
            "source": source
        })
        if len(articles) >= num_articles:
            break
    return articles

def analyze_sentiment(text):
    """

    Analyzes the sentiment of the given text using NLTK's VADER.

    Returns:

        sentiment (str): "Positive", "Negative", or "Neutral"

        scores (dict): The full set of polarity scores.

    """
    sia = SentimentIntensityAnalyzer()
    scores = sia.polarity_scores(text)
    compound = scores["compound"]
    if compound >= 0.05:
        sentiment = "Positive"
    elif compound <= -0.05:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"
    return sentiment, scores

def extract_topics(text):
    """

    Extracts topics from the input text using basic noun extraction.

    Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns.

    """
    text = text.lower()
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words("english"))
    filtered = [word for word in tokens if word.isalpha() and word not in stop_words]
    tagged = nltk.pos_tag(filtered)
    nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]]
    return list(set(nouns))

def comparative_analysis(articles):
    """

    Performs comparative analysis across articles.

    """
    sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
    sales_keywords = {"sales", "growth", "record", "profit"}
    regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"}
    sales_count = 0
    reg_count = 0
    all_topics = []
    for article in articles:
        sentiment = article.get("sentiment", "Neutral")
        sentiment_distribution[sentiment] += 1
        combined_text = f"{article['title']} {article['summary']}".lower()
        if any(keyword in combined_text for keyword in sales_keywords):
            sales_count += 1
        if any(keyword in combined_text for keyword in regulatory_keywords):
            reg_count += 1
        topics = extract_topics(combined_text)
        article["topics"] = topics
        all_topics.extend(topics)
    if sales_count > reg_count:
        coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).")
    elif reg_count > sales_count:
        coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).")
    else:
        coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).")
    topic_counter = Counter(all_topics)
    common_topics = [topic for topic, count in topic_counter.items() if count > 1]
    unique_topics = {}
    for i, article in enumerate(articles, start=1):
        unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1]
        unique_topics[f"Article {i}"] = unique
    analysis = {
        "Sentiment Distribution": sentiment_distribution,
        "Coverage Differences": coverage_insight,
        "Topic Overlap": {
            "Common Topics": common_topics,
            "Unique Topics": unique_topics
        }
    }
    return analysis

def convert_text_to_hindi_tts(text, output_file="output.mp3"):
    """

    Converts the input text into Hindi speech using gTTS and saves it as an MP3 file.

    """
    tts = gTTS(text=text, lang='hi', slow=False)
    tts.save(output_file)
    return output_file

def play_audio(file_path):
    """

    Plays an audio file using the system's default media player.

    """
    if platform.system() == "Windows":
        os.startfile(file_path)
    elif platform.system() == "Darwin":
        os.system(f"open {file_path}")
    else:
        os.system(f"mpg123 {file_path}")