Spaces:

murtaza2801
/

Summarization_Audio

Sleeping

App Files Files Community

murtaza2801 commited on Mar 23

Commit

ad80a31

verified ·

1 Parent(s): be615af

Upload 2 files

Browse files

Files changed (2) hide show

app.py +90 -0
utils.py +145 -0

app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import nltk
+nltk.download('punkt_tab')
+nltk.download('averaged_perceptron_tagger_eng')
+import streamlit as st
+from streamlit_lottie import st_lottie
+import requests
+import time
+from utils import (
+    get_bing_news_articles,
+    analyze_sentiment,
+    extract_topics,
+    comparative_analysis,
+    convert_text_to_hindi_tts,
+)
+from collections import Counter
+# Load Lottie Animation
+def load_lottie_url(url):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+lottie_animation = load_lottie_url("https://lottie.host/d02e4bd8-cd9c-401e-b143-17fc0ad924a8/o2dLZzU9oO.json")
+# UI Layout
+st_lottie(lottie_animation, height=200)
+st.markdown("<h1 style='text-align: center; color: #4CAF50;'>Sentiment Analysis Dashboard</h1>", unsafe_allow_html=True)
+st.title("News Summarization & Sentiment Analysis with Hindi TTS")
+st.write("Enter a company name to fetch news articles, analyze sentiment, and generate a Hindi summary.")
+company = st.text_input("Company Name", "Enter Any Company Name")
+if st.button("Generate Report"):
+    with st.spinner("Fetching news articles..."):
+        articles = get_bing_news_articles(company, num_articles=10)
+    if not articles:
+        st.error("No articles found or there was an error fetching the articles.")
+    else:
+        # Process each article: perform sentiment analysis.
+        for article in articles:
+            combined_text = article["title"]
+            if article["summary"]:
+                combined_text += ". " + article["summary"]
+            sentiment, scores = analyze_sentiment(combined_text)
+            article["sentiment"] = sentiment
+            article["sentiment_scores"] = scores
+            # Topics are still extracted but not used in the final summary.
+            article["topics"] = extract_topics(combined_text)
+            time.sleep(0.5)
+        # Display individual article details.
+        st.subheader("Extracted Articles")
+        for idx, article in enumerate(articles, start=1):
+            st.markdown(f"**Article {idx}:**")
+            st.write("Title:", article["title"])
+            st.write("Summary:", article["summary"])
+            st.write("Source:", article["source"])
+            st.write("URL:", article["url"])
+            st.write("Sentiment:", article["sentiment"])
+            st.markdown("---")
+        # Perform comparative analysis for internal metrics (sentiment distribution, coverage insights)
+        analysis = comparative_analysis(articles)
+        st.subheader("Comparative Analysis")
+        st.write("**Sentiment Distribution:**", analysis["Sentiment Distribution"])
+        st.write("**Coverage Differences:**", analysis["Coverage Differences"])
+        # Create a final Hindi summary report that aggregates all the articles.
+        total_articles = len(articles)
+        dist = analysis["Sentiment Distribution"]
+        final_summary = (
+            f"कुल {total_articles} लेखों में से, {dist.get('Positive', 0)} लेख सकारात्मक, "
+            f"{dist.get('Negative', 0)} लेख नकारात्मक, और {dist.get('Neutral', 0)} लेख तटस्थ हैं।\n"
+            "कई लेखों में विक्रय में वृद्धि और आर्थिक विकास पर जोर दिया गया है, जबकि कुछ लेखों में नियामकीय चुनौतियाँ और कानूनी मुद्दों पर चर्चा की गई है।\n"
+            "संपूर्ण रूप से, यह रिपोर्ट दर्शाती है कि कंपनी का समाचार कवरेज मुख्य रूप से सकारात्मक है, "
+            "जो संभावित आर्थिक विकास के संकेत देता है।"
+        )
+        st.subheader("Final Summary Report")
+        st.markdown(final_summary)
+        # Convert the final summary into Hindi speech.
+        with st.spinner("Generating Hindi TTS audio..."):
+            audio_file = convert_text_to_hindi_tts(final_summary, output_file="tesla_summary_hi.mp3")
+        st.success("Audio summary generated!")
+        st.audio(audio_file)

utils.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import nltk
+nltk.download('punkt')  # Download the required resource
+import requests
+from bs4 import BeautifulSoup
+import time
+from nltk.sentiment import SentimentIntensityAnalyzer
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from collections import Counter
+from gtts import gTTS
+import os
+import platform
+# Download required NLTK data files.
+nltk.download('vader_lexicon')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('stopwords')
+def get_bing_news_articles(company_name, num_articles=10):
+    """
+    Scrapes Bing News search results for a given company name.
+    """
+    query = company_name.replace(" ", "+")
+    url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6"
+    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
+    response = requests.get(url, headers=headers)
+    if response.status_code != 200:
+        return []
+    soup = BeautifulSoup(response.text, "html.parser")
+    articles = []
+    news_cards = soup.find_all("div", class_="news-card")
+    for card in news_cards:
+        title_tag = card.find("a", class_="title")
+        if not title_tag:
+            continue
+        title = title_tag.get_text(strip=True)
+        article_url = title_tag.get("href")
+        snippet_tag = card.find("div", class_="snippet")
+        snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
+        source_tag = card.find("div", class_="source")
+        source = source_tag.get_text(strip=True) if source_tag else ""
+        articles.append({
+            "title": title,
+            "summary": snippet,
+            "url": article_url,
+            "source": source
+        })
+        if len(articles) >= num_articles:
+            break
+    return articles
+def analyze_sentiment(text):
+    """
+    Analyzes the sentiment of the given text using NLTK's VADER.
+    Returns:
+        sentiment (str): "Positive", "Negative", or "Neutral"
+        scores (dict): The full set of polarity scores.
+    """
+    sia = SentimentIntensityAnalyzer()
+    scores = sia.polarity_scores(text)
+    compound = scores["compound"]
+    if compound >= 0.05:
+        sentiment = "Positive"
+    elif compound <= -0.05:
+        sentiment = "Negative"
+    else:
+        sentiment = "Neutral"
+    return sentiment, scores
+def extract_topics(text):
+    """
+    Extracts topics from the input text using basic noun extraction.
+    Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns.
+    """
+    text = text.lower()
+    tokens = word_tokenize(text)
+    stop_words = set(stopwords.words("english"))
+    filtered = [word for word in tokens if word.isalpha() and word not in stop_words]
+    tagged = nltk.pos_tag(filtered)
+    nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]]
+    return list(set(nouns))
+def comparative_analysis(articles):
+    """
+    Performs comparative analysis across articles.
+    """
+    sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
+    sales_keywords = {"sales", "growth", "record", "profit"}
+    regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"}
+    sales_count = 0
+    reg_count = 0
+    all_topics = []
+    for article in articles:
+        sentiment = article.get("sentiment", "Neutral")
+        sentiment_distribution[sentiment] += 1
+        combined_text = f"{article['title']} {article['summary']}".lower()
+        if any(keyword in combined_text for keyword in sales_keywords):
+            sales_count += 1
+        if any(keyword in combined_text for keyword in regulatory_keywords):
+            reg_count += 1
+        topics = extract_topics(combined_text)
+        article["topics"] = topics
+        all_topics.extend(topics)
+    if sales_count > reg_count:
+        coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).")
+    elif reg_count > sales_count:
+        coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).")
+    else:
+        coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).")
+    topic_counter = Counter(all_topics)
+    common_topics = [topic for topic, count in topic_counter.items() if count > 1]
+    unique_topics = {}
+    for i, article in enumerate(articles, start=1):
+        unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1]
+        unique_topics[f"Article {i}"] = unique
+    analysis = {
+        "Sentiment Distribution": sentiment_distribution,
+        "Coverage Differences": coverage_insight,
+        "Topic Overlap": {
+            "Common Topics": common_topics,
+            "Unique Topics": unique_topics
+        }
+    }
+    return analysis
+def convert_text_to_hindi_tts(text, output_file="output.mp3"):
+    """
+    Converts the input text into Hindi speech using gTTS and saves it as an MP3 file.
+    """
+    tts = gTTS(text=text, lang='hi', slow=False)
+    tts.save(output_file)
+    return output_file
+def play_audio(file_path):
+    """
+    Plays an audio file using the system's default media player.
+    """
+    if platform.system() == "Windows":
+        os.startfile(file_path)
+    elif platform.system() == "Darwin":
+        os.system(f"open {file_path}")
+    else:
+        os.system(f"mpg123 {file_path}")