Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from transformers import pipeline | |
| import pandas as pd | |
| from collections import defaultdict | |
| from datetime import date | |
| import matplotlib.pyplot as plt | |
| import http.client, urllib.parse | |
| from GoogleNews import GoogleNews | |
| from langchain_openai import ChatOpenAI | |
| import pandas as pd | |
| import praw | |
| from datetime import datetime | |
| import numpy as np | |
| load_dotenv() | |
| def fetch_news(topic): | |
| """ Fetches news articles within a specified date range. | |
| Args: | |
| - topic (str): Topic of interest | |
| Returns: | |
| - list: A list of dictionaries containing news. """ | |
| load_dotenv() | |
| days_to_fetch_news = os.environ["DAYS_TO_FETCH_NEWS"] | |
| googlenews = GoogleNews() | |
| googlenews.set_period(days_to_fetch_news) | |
| googlenews.get_news(topic) | |
| news_json=googlenews.get_texts() | |
| urls=googlenews.get_links() | |
| no_of_news_articles_to_fetch = os.environ["NO_OF_NEWS_ARTICLES_TO_FETCH"] | |
| news_article_list = [] | |
| counter = 0 | |
| for article in news_json: | |
| if(counter >= int(no_of_news_articles_to_fetch)): | |
| break | |
| relevant_info = { | |
| 'News_Article': article, | |
| 'URL': urls[counter] | |
| } | |
| news_article_list.append(relevant_info) | |
| counter+=1 | |
| return news_article_list | |
| def fetch_reddit_news(topic): | |
| load_dotenv() | |
| REDDIT_USER_AGENT= os.environ["REDDIT_USER_AGENT"] | |
| REDDIT_CLIENT_ID= os.environ["REDDIT_CLIENT_ID"] | |
| REDDIT_CLIENT_SECRET= os.environ["REDDIT_CLIENT_SECRET"] | |
| #https://medium.com/geekculture/a-complete-guide-to-web-scraping-reddit-with-python-16e292317a52 | |
| user_agent = REDDIT_USER_AGENT | |
| reddit = praw.Reddit ( | |
| client_id= REDDIT_CLIENT_ID, | |
| client_secret= REDDIT_CLIENT_SECRET, | |
| user_agent=user_agent | |
| ) | |
| headlines = set ( ) | |
| for submission in reddit.subreddit('nova').search(topic,time_filter='week'): | |
| headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url) | |
| if len(headlines)<10: | |
| for submission in reddit.subreddit('nova').search(topic,time_filter='year'): | |
| headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url) | |
| if len(headlines)<10: | |
| for submission in reddit.subreddit('nova').search(topic): #,time_filter='week'): | |
| headlines.add(submission.title + ', Date: ' +datetime.utcfromtimestamp(int(submission.created_utc)).strftime('%Y-%m-%d %H:%M:%S') + ', URL:' +submission.url) | |
| return headlines | |
| def analyze_sentiment(article): | |
| """ | |
| Analyzes the sentiment of a given news article. | |
| Args: | |
| - news_article (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys. | |
| Returns: | |
| - dict: A dictionary containing sentiment analysis results. | |
| """ | |
| #Analyze sentiment using default model | |
| #classifier = pipeline('sentiment-analysis') | |
| #Analyze sentiment using specific model | |
| classifier = pipeline(model='tabularisai/robust-sentiment-analysis') #mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis') | |
| sentiment_result = classifier(str(article)) | |
| analysis_result = { | |
| 'News_Article': article, | |
| 'Sentiment': sentiment_result | |
| } | |
| return analysis_result | |
| def generate_summary_of_sentiment(sentiment_analysis_results): #, dominant_sentiment): | |
| news_article_sentiment = str(sentiment_analysis_results) | |
| print("News article sentiment : " + news_article_sentiment) | |
| os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_API_KEY"] | |
| model = ChatOpenAI( | |
| model="gpt-4o", | |
| temperature=0, | |
| max_tokens=None, | |
| timeout=None, | |
| max_retries=2, | |
| # api_key="...", # if you prefer to pass api key in directly instaed of using env vars | |
| # base_url="...", | |
| # organization="...", | |
| # other params... | |
| ) | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that looks at all news articles, their sentiment, along with domainant sentiment and generates a summary rationalizing dominant sentiment. At the end of the summary, add URL links with dates for all the articles in the markdown format for streamlit. Example of adding the URLs: The Check out the links: [link](%s) % url, 2024-03-01 "}, | |
| {"role": "user", "content": f"News articles and their sentiments: {news_article_sentiment}"} #, and dominant sentiment is: {dominant_sentiment}"} | |
| ] | |
| response = model.invoke(messages) | |
| summary = response.content | |
| print ("+++++++++++++++++++++++++++++++++++++++++++++++") | |
| print(summary) | |
| print ("+++++++++++++++++++++++++++++++++++++++++++++++") | |
| return summary | |
| def plot_sentiment_graph(sentiment_analysis_results): | |
| """ | |
| Plots a sentiment analysis graph | |
| Args: | |
| - sentiment_analysis_result): (dict): Dictionary containing 'Review Title : Summary', 'Rating', and 'Sentiment' keys. | |
| Returns: | |
| - dict: A dictionary containing sentiment analysis results. | |
| """ | |
| df = pd.DataFrame(sentiment_analysis_results) | |
| print(df) | |
| #Group by Rating, sentiment value count | |
| grouped = df['Sentiment'].value_counts() | |
| sentiment_counts = df['Sentiment'].value_counts() | |
| # Plotting pie chart | |
| # fig = plt.figure(figsize=(5, 3)) | |
| # plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=140) | |
| # plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. | |
| #Open below when u running this program locally and c | |
| #plt.show() | |
| return sentiment_counts | |
| def get_dominant_sentiment (sentiment_analysis_results): | |
| """ | |
| Returns overall sentiment, negative or positive or neutral depending on the count of negative sentiment vs positive sentiment | |
| Args: | |
| - sentiment_analysis_result): (dict): Dictionary containing 'summary', 'headline', and 'created_at' keys. | |
| Returns: | |
| - dict: A dictionary containing sentiment analysis results. | |
| """ | |
| df = pd.DataFrame(sentiment_analysis_results) | |
| # Group by the 'sentiment' column and count the occurrences of each sentiment value | |
| print(df) | |
| print(df['Sentiment']) | |
| sentiment_counts = df['Sentiment'].value_counts().reset_index() | |
| sentiment_counts.columns = ['sentiment', 'count'] | |
| print(sentiment_counts) | |
| # Find the sentiment with the highest count | |
| dominant_sentiment = sentiment_counts.loc[sentiment_counts['count'].idxmax()] | |
| return dominant_sentiment['sentiment'] | |
| #starting point of the program | |
| if __name__ == '__main__': | |
| #fetch news | |
| news_articles = fetch_news('AAPL') | |
| analysis_results = [] | |
| #Perform sentiment analysis for each product review | |
| for article in news_articles: | |
| sentiment_analysis_result = analyze_sentiment(article['News_Article']) | |
| # Display sentiment analysis results | |
| print(f'News Article: {sentiment_analysis_result["News_Article"]} : Sentiment: {sentiment_analysis_result["Sentiment"]}', '\n') | |
| result = { | |
| 'News_Article': sentiment_analysis_result["News_Article"], | |
| 'Sentiment': sentiment_analysis_result["Sentiment"][0]['label'] | |
| } | |
| analysis_results.append(result) | |
| #Graph dominant sentiment based on sentiment analysis data of reviews | |
| dominant_sentiment = get_dominant_sentiment(analysis_results) | |
| print(dominant_sentiment) | |
| #Plot graph | |
| plot_sentiment_graph(analysis_results) | |