cutechicken commited on
Commit
4a2e1bf
ยท
verified ยท
1 Parent(s): e11aa8c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +312 -0
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import torch
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ from GoogleNews import GoogleNews
8
+ from transformers import pipeline
9
+ from datetime import datetime, timedelta
10
+ import matplotlib
11
+ matplotlib.use('Agg')
12
+
13
+ # Set up logging
14
+ logging.basicConfig(
15
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
16
+ )
17
+
18
+ SENTIMENT_ANALYSIS_MODEL = (
19
+ "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
20
+ )
21
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
22
+ logging.info(f"Using device: {DEVICE}")
23
+ logging.info("Initializing sentiment analysis model...")
24
+ sentiment_analyzer = pipeline(
25
+ "sentiment-analysis", model=SENTIMENT_ANALYSIS_MODEL, device=DEVICE
26
+ )
27
+ logging.info("Model initialized successfully")
28
+
29
+ def fetch_articles(query, max_articles=30):
30
+ try:
31
+ logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
32
+ googlenews = GoogleNews(lang="en")
33
+ googlenews.search(query)
34
+
35
+ # ์ฒซ ํŽ˜์ด์ง€ ๊ฒฐ๊ณผ ๊ฐ€์ ธ์˜ค๊ธฐ
36
+ articles = googlenews.result()
37
+
38
+ # ๋ชฉํ‘œ ๊ธฐ์‚ฌ ์ˆ˜์— ๋„๋‹ฌํ•  ๋•Œ๊นŒ์ง€ ์ถ”๊ฐ€ ํŽ˜์ด์ง€ ๊ฐ€์ ธ์˜ค๊ธฐ
39
+ page = 2
40
+ while len(articles) < max_articles and page <= 10: # ์ตœ๋Œ€ 10ํŽ˜์ด์ง€๊นŒ์ง€๋งŒ ์‹œ๋„
41
+ logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
42
+ googlenews.get_page(page)
43
+ page_results = googlenews.result()
44
+
45
+ # ์ƒˆ ๊ฒฐ๊ณผ๊ฐ€ ์—†์œผ๋ฉด ์ค‘๋‹จ
46
+ if not page_results:
47
+ logging.info(f"No more results found after page {page-1}")
48
+ break
49
+
50
+ articles.extend(page_results)
51
+ page += 1
52
+
53
+ # ์ตœ๋Œ€ ๊ธฐ์‚ฌ ์ˆ˜๋กœ ์ œํ•œ
54
+ articles = articles[:max_articles]
55
+
56
+ logging.info(f"Successfully fetched {len(articles)} articles")
57
+ return articles
58
+ except Exception as e:
59
+ logging.error(
60
+ f"Error while searching articles for query: '{query}'. Error: {e}"
61
+ )
62
+ raise gr.Error(
63
+ f"Unable to search articles for query: '{query}'. Try again later...",
64
+ duration=5,
65
+ )
66
+
67
+ def analyze_article_sentiment(article):
68
+ logging.info(f"Analyzing sentiment for article: {article['title']}")
69
+ sentiment = sentiment_analyzer(article["desc"])[0]
70
+ article["sentiment"] = sentiment
71
+ return article
72
+
73
+ def calculate_time_weight(article_date_str):
74
+ """
75
+ ๊ธฐ์‚ฌ ์‹œ๊ฐ„ ๊ธฐ์ค€์œผ๋กœ ๊ฐ€์ค‘์น˜ ๊ณ„์‚ฐ
76
+ - 1์‹œ๊ฐ„๋‹น 1%์”ฉ ๊ฐ์†Œ, ์ตœ๋Œ€ 24์‹œ๊ฐ„๊นŒ์ง€๋งŒ ๊ณ ๋ ค
77
+ - 1์‹œ๊ฐ„ ๋‚ด ๊ธฐ์‚ฌ: 24% ๊ฐ€์ค‘์น˜
78
+ - 10์‹œ๊ฐ„ ์ „ ๊ธฐ์‚ฌ: 15% ๊ฐ€์ค‘์น˜
79
+ - 24์‹œ๊ฐ„ ์ด์ƒ ์ „ ๊ธฐ์‚ฌ: 1% ๊ฐ€์ค‘์น˜
80
+ """
81
+ try:
82
+ # ๊ธฐ์‚ฌ ๋‚ ์งœ ๋ฌธ์ž์—ด ํŒŒ์‹ฑ (๋‹ค์–‘ํ•œ ํ˜•์‹ ์ฒ˜๋ฆฌ)
83
+ date_formats = [
84
+ '%a, %d %b %Y %H:%M:%S %z', # ๊ธฐ๋ณธ GoogleNews ํ˜•์‹
85
+ '%Y-%m-%d %H:%M:%S',
86
+ '%a, %d %b %Y %H:%M:%S',
87
+ '%Y-%m-%dT%H:%M:%S%z',
88
+ '%a %b %d, %Y',
89
+ '%d %b %Y'
90
+ ]
91
+
92
+ parsed_date = None
93
+ for format_str in date_formats:
94
+ try:
95
+ parsed_date = datetime.strptime(article_date_str, format_str)
96
+ break
97
+ except ValueError:
98
+ continue
99
+
100
+ # ์–ด๋–ค ํ˜•์‹์œผ๋กœ๋„ ํŒŒ์‹ฑํ•  ์ˆ˜ ์—†์œผ๋ฉด ํ˜„์žฌ ์‹œ๊ฐ„ ๊ธฐ์ค€ 24์‹œ๊ฐ„ ์ „์œผ๋กœ ๊ฐ€์ •
101
+ if parsed_date is None:
102
+ logging.warning(f"Could not parse date: {article_date_str}, using default 24h ago")
103
+ return 0.01 # ์ตœ์†Œ ๊ฐ€์ค‘์น˜ 1%
104
+
105
+ # ํ˜„์žฌ ์‹œ๊ฐ„๊ณผ์˜ ์ฐจ์ด ๊ณ„์‚ฐ (์‹œ๊ฐ„ ๋‹จ์œ„)
106
+ now = datetime.now()
107
+ if parsed_date.tzinfo is not None:
108
+ now = now.replace(tzinfo=parsed_date.tzinfo)
109
+
110
+ hours_diff = (now - parsed_date).total_seconds() / 3600
111
+
112
+ # 24์‹œ๊ฐ„ ์ด๋‚ด์ธ ๊ฒฝ์šฐ๋งŒ ๊ณ ๋ ค
113
+ if hours_diff <= 24:
114
+ weight = 0.24 - (0.01 * int(hours_diff)) # 1์‹œ๊ฐ„๋‹น 1%์”ฉ ๊ฐ์†Œ
115
+ return max(0.01, weight) # ์ตœ์†Œ 1% ๋ณด์žฅ
116
+ else:
117
+ return 0.01 # 24์‹œ๊ฐ„ ์ด์ƒ ์ง€๋‚œ ๊ธฐ์‚ฌ๋Š” 1% ๊ฐ€์ค‘์น˜
118
+ except Exception as e:
119
+ logging.error(f"Error calculating time weight: {e}")
120
+ return 0.01 # ์˜ค๋ฅ˜ ๋ฐœ์ƒ ์‹œ ์ตœ์†Œ ๊ฐ€์ค‘์น˜ ์ ์šฉ
121
+
122
+ def calculate_sentiment_score(sentiment_label, time_weight):
123
+ """
124
+ ๊ฐ์„ฑ ๋ ˆ์ด๋ธ”์— ๋”ฐ๋ฅธ ๊ธฐ๋ณธ ์ ์ˆ˜ ๊ณ„์‚ฐ ๋ฐ ์‹œ๊ฐ„ ๊ฐ€์ค‘์น˜ ์ ์šฉ
125
+ - positive: +3์ 
126
+ - neutral: 0์ 
127
+ - negative: -3์ 
128
+ """
129
+ base_score = {
130
+ 'positive': 3,
131
+ 'neutral': 0,
132
+ 'negative': -3
133
+ }.get(sentiment_label, 0)
134
+
135
+ # ๊ฐ€์ค‘์น˜๋ฅผ ์ ์šฉํ•œ ์ถ”๊ฐ€ ์ ์ˆ˜ ๊ณ„์‚ฐ
136
+ weighted_addition = base_score * time_weight
137
+
138
+ return base_score, weighted_addition
139
+
140
+ def analyze_asset_sentiment(asset_name):
141
+ logging.info(f"Starting sentiment analysis for asset: {asset_name}")
142
+ logging.info("Fetching up to 30 articles")
143
+ articles = fetch_articles(asset_name, max_articles=30)
144
+ logging.info("Analyzing sentiment of each article")
145
+ analyzed_articles = [analyze_article_sentiment(article) for article in articles]
146
+
147
+ # ๊ฐ ๊ธฐ์‚ฌ์— ๋Œ€ํ•œ ์‹œ๊ฐ„ ๊ฐ€์ค‘์น˜ ๋ฐ ๊ฐ์„ฑ ์ ์ˆ˜ ๊ณ„์‚ฐ
148
+ for article in analyzed_articles:
149
+ time_weight = calculate_time_weight(article["date"])
150
+ article["time_weight"] = time_weight
151
+
152
+ sentiment_label = article["sentiment"]["label"]
153
+ base_score, weighted_addition = calculate_sentiment_score(sentiment_label, time_weight)
154
+
155
+ article["base_score"] = base_score
156
+ article["weighted_addition"] = weighted_addition
157
+ article["total_score"] = base_score + weighted_addition
158
+
159
+ logging.info("Sentiment analysis completed")
160
+
161
+ # ์ข…ํ•ฉ ์ ์ˆ˜ ๊ณ„์‚ฐ ๋ฐ ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
162
+ sentiment_summary = create_sentiment_summary(analyzed_articles, asset_name)
163
+
164
+ return convert_to_dataframe(analyzed_articles), sentiment_summary
165
+
166
+ def create_sentiment_summary(analyzed_articles, asset_name):
167
+ """
168
+ ๊ฐ์„ฑ ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ์š”์•ฝํ•˜๊ณ  ๊ทธ๋ž˜ํ”„๋กœ ์‹œ๊ฐํ™”
169
+ """
170
+ total_articles = len(analyzed_articles)
171
+ positive_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "positive")
172
+ neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
173
+ negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
174
+
175
+ # ๊ธฐ๋ณธ ์ ์ˆ˜ ํ•ฉ๊ณ„
176
+ base_score_sum = sum(a["base_score"] for a in analyzed_articles)
177
+
178
+ # ๊ฐ€์ค‘์น˜ ์ ์šฉ ์ ์ˆ˜ ํ•ฉ๊ณ„
179
+ weighted_score_sum = sum(a["total_score"] for a in analyzed_articles)
180
+
181
+ # ๊ทธ๋ž˜ํ”„ ์ƒ์„ฑ
182
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
183
+
184
+ # 1. ๊ฐ์„ฑ ๋ถ„ํฌ ํŒŒ์ด ์ฐจํŠธ
185
+ labels = ['Positive', 'Neutral', 'Negative']
186
+ sizes = [positive_count, neutral_count, negative_count]
187
+ colors = ['green', 'gray', 'red']
188
+
189
+ ax1.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
190
+ ax1.axis('equal')
191
+ ax1.set_title(f'Sentiment Distribution for {asset_name}')
192
+
193
+ # 2. ์‹œ๊ฐ„๋ณ„ ๊ฐ€์ค‘์น˜ ์ ์šฉ ์ ์ˆ˜ (์ •๋ ฌ)
194
+ sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
195
+
196
+ # ์ตœ๋Œ€ ํ‘œ์‹œํ•  ๊ธฐ์‚ฌ ์ˆ˜ (๊ฐ€๋…์„ฑ์„ ์œ„ํ•ด)
197
+ max_display = min(15, len(sorted_articles))
198
+ display_articles = sorted_articles[:max_display]
199
+
200
+ dates = [a.get("date", "")[:10] for a in display_articles] # ๋‚ ์งœ ๋ถ€๋ถ„๋งŒ ํ‘œ์‹œ
201
+ scores = [a.get("total_score", 0) for a in display_articles]
202
+
203
+ # ์ ์ˆ˜์— ๋”ฐ๋ฅธ ์ƒ‰์ƒ ์„ค์ •
204
+ bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
205
+
206
+ bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
207
+ ax2.set_xticks(range(len(dates)))
208
+ ax2.set_xticklabels(dates, rotation=45, ha='right')
209
+ ax2.set_ylabel('Weighted Sentiment Score')
210
+ ax2.set_title(f'Recent Article Scores for {asset_name}')
211
+ ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
212
+
213
+ # ์š”์•ฝ ํ…์ŠคํŠธ ์ถ”๊ฐ€
214
+ summary_text = f"""
215
+ Analysis Summary for {asset_name}:
216
+ Total Articles: {total_articles}
217
+ Positive: {positive_count} ({positive_count/total_articles*100:.1f}%)
218
+ Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
219
+ Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
220
+
221
+ Base Score Sum: {base_score_sum:.2f}
222
+ Weighted Score Sum: {weighted_score_sum:.2f}
223
+ """
224
+
225
+ plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
226
+
227
+ plt.tight_layout(rect=[0, 0.1, 1, 0.95])
228
+
229
+ # ์ด๋ฏธ์ง€ ์ €์žฅ
230
+ fig_path = f"sentiment_summary_{asset_name.replace(' ', '_')}.png"
231
+ plt.savefig(fig_path)
232
+ plt.close()
233
+
234
+ return fig_path
235
+
236
+ def convert_to_dataframe(analyzed_articles):
237
+ df = pd.DataFrame(analyzed_articles)
238
+ df["Title"] = df.apply(
239
+ lambda row: f'<a href="{row["link"]}" target="_blank">{row["title"]}</a>',
240
+ axis=1,
241
+ )
242
+ df["Description"] = df["desc"]
243
+ df["Date"] = df["date"]
244
+
245
+ def sentiment_badge(sentiment):
246
+ colors = {
247
+ "negative": "red",
248
+ "neutral": "gray",
249
+ "positive": "green",
250
+ }
251
+ color = colors.get(sentiment, "grey")
252
+ return f'<span style="background-color: {color}; color: white; padding: 2px 6px; border-radius: 4px;">{sentiment}</span>'
253
+
254
+ df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
255
+
256
+ # ์ ์ˆ˜ ์ปฌ๋Ÿผ ์ถ”๊ฐ€
257
+ df["Base Score"] = df["base_score"]
258
+ df["Weight"] = df["time_weight"].apply(lambda x: f"{x*100:.0f}%")
259
+ df["Total Score"] = df["total_score"].apply(lambda x: f"{x:.2f}")
260
+
261
+ return df[["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"]]
262
+
263
+ with gr.Blocks() as iface:
264
+ gr.Markdown("# Trading Asset Sentiment Analysis")
265
+ gr.Markdown(
266
+ "Enter the name of a trading asset, and I'll fetch recent articles and analyze their sentiment!"
267
+ )
268
+
269
+ with gr.Row():
270
+ input_asset = gr.Textbox(
271
+ label="Asset Name",
272
+ lines=1,
273
+ placeholder="Enter the name of the trading asset...",
274
+ )
275
+
276
+ with gr.Row():
277
+ analyze_button = gr.Button("Analyze Sentiment", size="sm")
278
+
279
+ gr.Examples(
280
+ examples=[
281
+ "Bitcoin",
282
+ "Tesla",
283
+ "Apple",
284
+ "Amazon",
285
+ ],
286
+ inputs=input_asset,
287
+ )
288
+
289
+ with gr.Row():
290
+ with gr.Column():
291
+ with gr.Blocks():
292
+ gr.Markdown("## Sentiment Summary")
293
+ sentiment_summary = gr.Image(type="filepath", label="Sentiment Analysis Summary")
294
+
295
+ with gr.Row():
296
+ with gr.Column():
297
+ with gr.Blocks():
298
+ gr.Markdown("## Articles and Sentiment Analysis")
299
+ articles_output = gr.Dataframe(
300
+ headers=["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"],
301
+ datatype=["markdown", "html", "markdown", "markdown", "number", "markdown", "markdown"],
302
+ wrap=False,
303
+ )
304
+
305
+ analyze_button.click(
306
+ analyze_asset_sentiment,
307
+ inputs=[input_asset],
308
+ outputs=[articles_output, sentiment_summary],
309
+ )
310
+
311
+ logging.info("Launching Gradio interface")
312
+ iface.queue().launch()