cutechicken commited on
Commit
2d0696f
·
verified ·
1 Parent(s): 996aec5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -22
app.py CHANGED
@@ -26,7 +26,7 @@ sentiment_analyzer = pipeline(
26
  )
27
  logging.info("Model initialized successfully")
28
 
29
- def fetch_articles(query, max_articles=100):
30
  try:
31
  logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
32
  googlenews = GoogleNews(lang="en")
@@ -37,7 +37,7 @@ def fetch_articles(query, max_articles=100):
37
 
38
  # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
39
  page = 2
40
- while len(articles) < max_articles and page <= 20: # 최대 20페이지까지 시도
41
  logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
42
  googlenews.get_page(page)
43
  page_results = googlenews.result()
@@ -70,12 +70,68 @@ def analyze_article_sentiment(article):
70
  article["sentiment"] = sentiment
71
  return article
72
 
73
- def calculate_sentiment_score(sentiment_label):
74
  """
75
- 감성 레이블에 따른 기본 점수 계산
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  - positive: +3점
77
  - neutral: 0점
78
  - negative: -3점
 
 
 
 
 
79
  """
80
  base_score = {
81
  'positive': 3,
@@ -83,19 +139,29 @@ def calculate_sentiment_score(sentiment_label):
83
  'negative': -3
84
  }.get(sentiment_label, 0)
85
 
86
- return base_score
 
 
 
87
 
88
  def analyze_asset_sentiment(asset_name):
89
  logging.info(f"Starting sentiment analysis for asset: {asset_name}")
90
- logging.info("Fetching up to 100 articles")
91
- articles = fetch_articles(asset_name, max_articles=100)
92
  logging.info("Analyzing sentiment of each article")
93
  analyzed_articles = [analyze_article_sentiment(article) for article in articles]
94
 
95
- # 각 기사에 대한 감성 점수 계산 (가중치 없음)
96
  for article in analyzed_articles:
 
 
 
97
  sentiment_label = article["sentiment"]["label"]
98
- article["score"] = calculate_sentiment_score(sentiment_label)
 
 
 
 
99
 
100
  logging.info("Sentiment analysis completed")
101
 
@@ -113,8 +179,11 @@ def create_sentiment_summary(analyzed_articles, asset_name):
113
  neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
114
  negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
115
 
116
- # 점수 합계
117
- score_sum = sum(a["score"] for a in analyzed_articles)
 
 
 
118
 
119
  # 그래프 생성
120
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
@@ -128,15 +197,15 @@ def create_sentiment_summary(analyzed_articles, asset_name):
128
  ax1.axis('equal')
129
  ax1.set_title(f'Sentiment Distribution for {asset_name}')
130
 
131
- # 2. 날짜별 감성 점수 (정렬)
132
  sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
133
 
134
  # 최대 표시할 기사 수 (가독성을 위해)
135
- max_display = min(20, len(sorted_articles))
136
  display_articles = sorted_articles[:max_display]
137
 
138
  dates = [a.get("date", "")[:10] for a in display_articles] # 날짜 부분만 표시
139
- scores = [a.get("score", 0) for a in display_articles]
140
 
141
  # 점수에 따른 색상 설정
142
  bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
@@ -144,7 +213,7 @@ def create_sentiment_summary(analyzed_articles, asset_name):
144
  bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
145
  ax2.set_xticks(range(len(dates)))
146
  ax2.set_xticklabels(dates, rotation=45, ha='right')
147
- ax2.set_ylabel('Sentiment Score')
148
  ax2.set_title(f'Recent Article Scores for {asset_name}')
149
  ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
150
 
@@ -156,8 +225,8 @@ def create_sentiment_summary(analyzed_articles, asset_name):
156
  Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
157
  Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
158
 
159
- Total Score Sum: {score_sum:.2f}
160
- Average Score: {score_sum/total_articles:.2f}
161
  """
162
 
163
  plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
@@ -192,14 +261,16 @@ def convert_to_dataframe(analyzed_articles):
192
  df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
193
 
194
  # 점수 컬럼 추가
195
- df["Score"] = df["score"]
 
 
196
 
197
- return df[["Sentiment", "Title", "Description", "Date", "Score"]]
198
 
199
  with gr.Blocks() as iface:
200
  gr.Markdown("# Trading Asset Sentiment Analysis")
201
  gr.Markdown(
202
- "Enter the name of a trading asset, and I'll fetch up to 100 recent articles and analyze their sentiment!"
203
  )
204
 
205
  with gr.Row():
@@ -233,8 +304,8 @@ with gr.Blocks() as iface:
233
  with gr.Blocks():
234
  gr.Markdown("## Articles and Sentiment Analysis")
235
  articles_output = gr.Dataframe(
236
- headers=["Sentiment", "Title", "Description", "Date", "Score"],
237
- datatype=["markdown", "html", "markdown", "markdown", "number"],
238
  wrap=False,
239
  )
240
 
 
26
  )
27
  logging.info("Model initialized successfully")
28
 
29
+ def fetch_articles(query, max_articles=30):
30
  try:
31
  logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
32
  googlenews = GoogleNews(lang="en")
 
37
 
38
  # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
39
  page = 2
40
+ while len(articles) < max_articles and page <= 10: # 최대 10페이지까지만 시도
41
  logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
42
  googlenews.get_page(page)
43
  page_results = googlenews.result()
 
70
  article["sentiment"] = sentiment
71
  return article
72
 
73
+ def calculate_time_weight(article_date_str):
74
  """
75
+ 기사 시간 기준으로 가중치 계산
76
+ - 1시간 내 기사는 24% 가중치
77
+ - 시간이 지날수록 1%씩 감소 (최소 1%)
78
+ - 예: 1시간 내 기사 = 24%, 10시간 전 기사 = 15%, 24시간 전 기사 = 1%
79
+ - 24시간 이상이면 1%로 고정
80
+ """
81
+ try:
82
+ # 기사 날짜 문자열 파싱 (다양한 형식 처리)
83
+ date_formats = [
84
+ '%a, %d %b %Y %H:%M:%S %z', # 기본 GoogleNews 형식
85
+ '%Y-%m-%d %H:%M:%S',
86
+ '%a, %d %b %Y %H:%M:%S',
87
+ '%Y-%m-%dT%H:%M:%S%z',
88
+ '%a %b %d, %Y',
89
+ '%d %b %Y'
90
+ ]
91
+
92
+ parsed_date = None
93
+ for format_str in date_formats:
94
+ try:
95
+ parsed_date = datetime.strptime(article_date_str, format_str)
96
+ break
97
+ except ValueError:
98
+ continue
99
+
100
+ # 어떤 형식으로도 파싱할 수 없으면 현재 시간 기준 24시간 전으로 가정
101
+ if parsed_date is None:
102
+ logging.warning(f"Could not parse date: {article_date_str}, using default 24h ago")
103
+ return 0.01 # 최소 가중치 1%
104
+
105
+ # 현재 시간과의 차이 계산 (시간 단위)
106
+ now = datetime.now()
107
+ if parsed_date.tzinfo is not None:
108
+ now = now.replace(tzinfo=parsed_date.tzinfo)
109
+
110
+ hours_diff = (now - parsed_date).total_seconds() / 3600
111
+
112
+ # 24시간 이내인 경우만 고려
113
+ if hours_diff < 1: # 1시간 이내
114
+ return 0.24 # 24% 가중치
115
+ elif hours_diff < 24: # 1~23시간
116
+ # 1시간당 1%씩 감소 (1시간 = 24%, 2시간 = 23%, ...)
117
+ return max(0.01, 0.24 - ((hours_diff - 1) * 0.01))
118
+ else:
119
+ return 0.01 # 24시간 이상 지난 기사는 1% 가중치
120
+ except Exception as e:
121
+ logging.error(f"Error calculating time weight: {e}")
122
+ return 0.01 # 오류 발생 시 최소 가중치 적용
123
+
124
+ def calculate_sentiment_score(sentiment_label, time_weight):
125
+ """
126
+ 감성 레이블에 따른 기본 점수 계산 및 시간 가중치 적용
127
  - positive: +3점
128
  - neutral: 0점
129
  - negative: -3점
130
+
131
+ 시간 가중치는 백분율로 적용 (기본 점수에 가중치 % 만큼 추가)
132
+ 예:
133
+ - 1시간 내 긍정 기사: 3점 + (3 * 24%) = 3 + 0.72 = 3.72점
134
+ - 10시간 전 부정 기사: -3점 + (-3 * 15%) = -3 - 0.45 = -3.45점
135
  """
136
  base_score = {
137
  'positive': 3,
 
139
  'negative': -3
140
  }.get(sentiment_label, 0)
141
 
142
+ # 가중치를 적용한 추가 점수 계산
143
+ weighted_addition = base_score * time_weight
144
+
145
+ return base_score, weighted_addition
146
 
147
  def analyze_asset_sentiment(asset_name):
148
  logging.info(f"Starting sentiment analysis for asset: {asset_name}")
149
+ logging.info("Fetching up to 30 articles")
150
+ articles = fetch_articles(asset_name, max_articles=30)
151
  logging.info("Analyzing sentiment of each article")
152
  analyzed_articles = [analyze_article_sentiment(article) for article in articles]
153
 
154
+ # 각 기사에 대한 시간 가중치 및 감성 점수 계산
155
  for article in analyzed_articles:
156
+ time_weight = calculate_time_weight(article["date"])
157
+ article["time_weight"] = time_weight
158
+
159
  sentiment_label = article["sentiment"]["label"]
160
+ base_score, weighted_addition = calculate_sentiment_score(sentiment_label, time_weight)
161
+
162
+ article["base_score"] = base_score
163
+ article["weighted_addition"] = weighted_addition
164
+ article["total_score"] = base_score + weighted_addition
165
 
166
  logging.info("Sentiment analysis completed")
167
 
 
179
  neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
180
  negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
181
 
182
+ # 기본 점수 합계
183
+ base_score_sum = sum(a["base_score"] for a in analyzed_articles)
184
+
185
+ # 가중치 적용 점수 합계
186
+ weighted_score_sum = sum(a["total_score"] for a in analyzed_articles)
187
 
188
  # 그래프 생성
189
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
 
197
  ax1.axis('equal')
198
  ax1.set_title(f'Sentiment Distribution for {asset_name}')
199
 
200
+ # 2. 시간별 가중치 적용 점수 (정렬)
201
  sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
202
 
203
  # 최대 표시할 기사 수 (가독성을 위해)
204
+ max_display = min(15, len(sorted_articles))
205
  display_articles = sorted_articles[:max_display]
206
 
207
  dates = [a.get("date", "")[:10] for a in display_articles] # 날짜 부분만 표시
208
+ scores = [a.get("total_score", 0) for a in display_articles]
209
 
210
  # 점수에 따른 색상 설정
211
  bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
 
213
  bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
214
  ax2.set_xticks(range(len(dates)))
215
  ax2.set_xticklabels(dates, rotation=45, ha='right')
216
+ ax2.set_ylabel('Weighted Sentiment Score')
217
  ax2.set_title(f'Recent Article Scores for {asset_name}')
218
  ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
219
 
 
225
  Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
226
  Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
227
 
228
+ Base Score Sum: {base_score_sum:.2f}
229
+ Weighted Score Sum: {weighted_score_sum:.2f}
230
  """
231
 
232
  plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
 
261
  df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
262
 
263
  # 점수 컬럼 추가
264
+ df["Base Score"] = df["base_score"]
265
+ df["Weight"] = df["time_weight"].apply(lambda x: f"{x*100:.0f}%")
266
+ df["Total Score"] = df["total_score"].apply(lambda x: f"{x:.2f}")
267
 
268
+ return df[["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"]]
269
 
270
  with gr.Blocks() as iface:
271
  gr.Markdown("# Trading Asset Sentiment Analysis")
272
  gr.Markdown(
273
+ "Enter the name of a trading asset, and I'll fetch recent articles and analyze their sentiment!"
274
  )
275
 
276
  with gr.Row():
 
304
  with gr.Blocks():
305
  gr.Markdown("## Articles and Sentiment Analysis")
306
  articles_output = gr.Dataframe(
307
+ headers=["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"],
308
+ datatype=["markdown", "html", "markdown", "markdown", "number", "markdown", "markdown"],
309
  wrap=False,
310
  )
311