Spaces:

murtaza2801
/

Akaike

Configuration error

App Files Files Community

murtaza2801 commited on Mar 20

Commit

5811516

verified ·

1 Parent(s): bc33717

Upload 3 files

Browse files

Files changed (3) hide show

api.py +35 -0
app.py +79 -0
utils.py +148 -0

api.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.responses import FileResponse
+import time
+from utils import (
+    get_bing_news_articles,
+    analyze_sentiment,
+    extract_topics,
+    comparative_analysis,
+    convert_text_to_hindi_tts,
+)
+app = FastAPI(title="News Summarization & TTS API")
+@app.get("/news")
+def get_news(company: str, num_articles: int = 10):
+    articles = get_bing_news_articles(company, num_articles=num_articles)
+    if not articles:
+        raise HTTPException(status_code=404, detail="No articles found.")
+    for article in articles:
+        combined_text = article["title"]
+        if article["summary"]:
+            combined_text += ". " + article["summary"]
+        sentiment, scores = analyze_sentiment(combined_text)
+        article["sentiment"] = sentiment
+        article["sentiment_scores"] = scores
+        article["topics"] = extract_topics(combined_text)
+        time.sleep(0.5)
+    analysis = comparative_analysis(articles)
+    return {"articles": articles, "analysis": analysis}
+@app.get("/tts")
+def get_tts(text: str):
+    output_file = "output.mp3"
+    convert_text_to_hindi_tts(text, output_file=output_file)
+    return FileResponse(output_file, media_type="audio/mpeg", filename=output_file)

app.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import streamlit as st
+import time
+from utils import (
+    get_bing_news_articles,
+    analyze_sentiment,
+    extract_topics,
+    comparative_analysis,
+    convert_text_to_hindi_tts,
+)
+from collections import Counter
+from googletrans import Translator
+# Initialize the translator.
+translator = Translator()
+st.title("News Summarization & Sentiment Analysis with Automatic Hindi Translation & TTS")
+st.write("Enter a company name to fetch news articles, analyze sentiment, and generate a final summary automatically converted to Hindi.")
+company = st.text_input("Company Name", "Tesla")
+if st.button("Generate Report"):
+    with st.spinner("Fetching news articles..."):
+        articles = get_bing_news_articles(company, num_articles=10)
+    if not articles:
+        st.error("No articles found or there was an error fetching the articles.")
+    else:
+        # Process each article: perform sentiment analysis.
+        for article in articles:
+            combined_text = article["title"]
+            if article["summary"]:
+                combined_text += ". " + article["summary"]
+            sentiment, scores = analyze_sentiment(combined_text)
+            article["sentiment"] = sentiment
+            article["sentiment_scores"] = scores
+            # Topics are extracted for internal analysis but not used in the final summary.
+            article["topics"] = extract_topics(combined_text)
+            time.sleep(0.5)
+        # Display extracted articles.
+        st.subheader("Extracted Articles")
+        for idx, article in enumerate(articles, start=1):
+            st.markdown(f"**Article {idx}:**")
+            st.write("Title:", article["title"])
+            st.write("Summary:", article["summary"])
+            st.write("Source:", article["source"])
+            st.write("URL:", article["url"])
+            st.write("Sentiment:", article["sentiment"])
+            st.markdown("---")
+        # Perform comparative analysis for internal metrics.
+        analysis = comparative_analysis(articles)
+        st.subheader("Comparative Analysis")
+        st.write("**Sentiment Distribution:**", analysis["Sentiment Distribution"])
+        st.write("**Coverage Differences:**", analysis["Coverage Differences"])
+        # Create a final summary report in English.
+        total_articles = len(articles)
+        dist = analysis["Sentiment Distribution"]
+        final_summary_en = (
+            f"Out of a total of {total_articles} articles, {dist.get('Positive', 0)} articles are positive, "
+            f"{dist.get('Negative', 0)} are negative, and {dist.get('Neutral', 0)} are neutral. "
+            "Many articles emphasize sales growth and financial development, while some discuss regulatory challenges and legal issues. "
+            "Overall, the news coverage of the company is predominantly positive, suggesting potential market growth."
+        )
+        # Automatically translate the final summary to Hindi.
+        translation = translator.translate(final_summary_en, dest='hi')
+        final_summary_hi = translation.text
+        st.subheader("Final Summary Report (Hindi)")
+        st.markdown(final_summary_hi)
+        # Convert the Hindi summary into speech.
+        with st.spinner("Generating Hindi TTS audio..."):
+            audio_file = convert_text_to_hindi_tts(final_summary_hi, output_file="summary_hi.mp3")
+        st.success("Audio summary generated!")
+        st.audio(audio_file)

utils.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import requests
+from bs4 import BeautifulSoup
+import time
+import nltk
+from nltk.sentiment import SentimentIntensityAnalyzer
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from collections import Counter
+from gtts import gTTS
+import os
+import platform
+# Download required NLTK data files (if not already available).
+nltk.download('vader_lexicon')
+nltk.download('punkt')
+nltk.download('averaged_perceptron_tagger')
+nltk.download('stopwords')
+def get_bing_news_articles(company_name, num_articles=10):
+    """
+    Scrapes Bing News search results for a given company name.
+    Returns a list of articles with metadata: title, summary, URL, and source.
+    """
+    query = company_name.replace(" ", "+")
+    url = f"https://www.bing.com/news/search?q={query}&FORM=HDRSC6"
+    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"}
+    response = requests.get(url, headers=headers)
+    if response.status_code != 200:
+        return []
+    soup = BeautifulSoup(response.text, "html.parser")
+    articles = []
+    news_cards = soup.find_all("div", class_="news-card")
+    for card in news_cards:
+        title_tag = card.find("a", class_="title")
+        if not title_tag:
+            continue
+        title = title_tag.get_text(strip=True)
+        article_url = title_tag.get("href")
+        snippet_tag = card.find("div", class_="snippet")
+        snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
+        source_tag = card.find("div", class_="source")
+        source = source_tag.get_text(strip=True) if source_tag else ""
+        articles.append({
+            "title": title,
+            "summary": snippet,
+            "url": article_url,
+            "source": source
+        })
+        if len(articles) >= num_articles:
+            break
+    return articles
+def analyze_sentiment(text):
+    """
+    Analyzes the sentiment of the given text using NLTK's VADER.
+    Returns:
+        sentiment (str): "Positive", "Negative", or "Neutral"
+        scores (dict): The full set of polarity scores.
+    """
+    sia = SentimentIntensityAnalyzer()
+    scores = sia.polarity_scores(text)
+    compound = scores["compound"]
+    if compound >= 0.05:
+        sentiment = "Positive"
+    elif compound <= -0.05:
+        sentiment = "Negative"
+    else:
+        sentiment = "Neutral"
+    return sentiment, scores
+def extract_topics(text):
+    """
+    Extracts topics from the input text using basic noun extraction.
+    Tokenizes the text, removes stopwords and punctuation, and returns a list of unique nouns.
+    """
+    text = text.lower()
+    tokens = word_tokenize(text)
+    stop_words = set(stopwords.words("english"))
+    filtered = [word for word in tokens if word.isalpha() and word not in stop_words]
+    tagged = nltk.pos_tag(filtered)
+    nouns = [word for word, pos in tagged if pos in ["NN", "NNS", "NNP", "NNPS"]]
+    return list(set(nouns))
+def comparative_analysis(articles):
+    """
+    Performs comparative analysis across articles.
+    Returns a dictionary with:
+      - Sentiment Distribution: Count of articles per sentiment.
+      - Coverage Differences: Insights based on keyword presence.
+      - Topic Overlap: Common topics and unique topics per article.
+    """
+    sentiment_distribution = {"Positive": 0, "Negative": 0, "Neutral": 0}
+    sales_keywords = {"sales", "growth", "record", "profit"}
+    regulatory_keywords = {"regulation", "regulatory", "scrutiny", "lawsuit", "legal", "compliance"}
+    sales_count = 0
+    reg_count = 0
+    all_topics = []
+    for article in articles:
+        sentiment = article.get("sentiment", "Neutral")
+        sentiment_distribution[sentiment] += 1
+        combined_text = f"{article['title']} {article['summary']}".lower()
+        if any(keyword in combined_text for keyword in sales_keywords):
+            sales_count += 1
+        if any(keyword in combined_text for keyword in regulatory_keywords):
+            reg_count += 1
+        topics = extract_topics(combined_text)
+        article["topics"] = topics
+        all_topics.extend(topics)
+    if sales_count > reg_count:
+        coverage_insight = (f"More articles ({sales_count}) emphasize sales and financial growth compared to regulatory concerns ({reg_count}).")
+    elif reg_count > sales_count:
+        coverage_insight = (f"More articles ({reg_count}) focus on regulatory or legal challenges compared to sales aspects ({sales_count}).")
+    else:
+        coverage_insight = (f"An equal number of articles emphasize sales/growth and regulatory issues ({sales_count} each).")
+    topic_counter = Counter(all_topics)
+    common_topics = [topic for topic, count in topic_counter.items() if count > 1]
+    unique_topics = {}
+    for i, article in enumerate(articles, start=1):
+        unique = [topic for topic in article.get("topics", []) if topic_counter[topic] == 1]
+        unique_topics[f"Article {i}"] = unique
+    analysis = {
+        "Sentiment Distribution": sentiment_distribution,
+        "Coverage Differences": coverage_insight,
+        "Topic Overlap": {
+            "Common Topics": common_topics,
+            "Unique Topics": unique_topics
+        }
+    }
+    return analysis
+def convert_text_to_hindi_tts(text, output_file="output.mp3"):
+    """
+    Converts the input text into Hindi speech using gTTS and saves it as an MP3 file.
+    """
+    tts = gTTS(text=text, lang='hi', slow=False)
+    tts.save(output_file)
+    return output_file
+def play_audio(file_path):
+    """
+    Plays an audio file using the system's default media player.
+    """
+    if platform.system() == "Windows":
+        os.startfile(file_path)
+    elif platform.system() == "Darwin":
+        os.system(f"open {file_path}")
+    else:
+        os.system(f"mpg123 {file_path}")