cutechicken commited on
Commit
996aec5
·
verified ·
1 Parent(s): 71c3ed0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -93
app.py CHANGED
@@ -26,7 +26,7 @@ sentiment_analyzer = pipeline(
26
  )
27
  logging.info("Model initialized successfully")
28
 
29
- def fetch_articles(query, max_articles=30):
30
  try:
31
  logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
32
  googlenews = GoogleNews(lang="en")
@@ -37,7 +37,7 @@ def fetch_articles(query, max_articles=30):
37
 
38
  # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
39
  page = 2
40
- while len(articles) < max_articles and page <= 10: # 최대 10페이지까지만 시도
41
  logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
42
  googlenews.get_page(page)
43
  page_results = googlenews.result()
@@ -70,68 +70,12 @@ def analyze_article_sentiment(article):
70
  article["sentiment"] = sentiment
71
  return article
72
 
73
- def calculate_time_weight(article_date_str):
74
  """
75
- 기사 시간 기준으로 가중치 계산
76
- - 1시간 내 기사는 24% 가중치
77
- - 시간이 지날수록 1%씩 감소 (최소 1%)
78
- - 예: 1시간 내 기사 = 24%, 10시간 전 기사 = 15%, 24시간 전 기사 = 1%
79
- - 24시간 이상이면 1%로 고정
80
- """
81
- try:
82
- # 기사 날짜 문자열 파싱 (다양한 형식 처리)
83
- date_formats = [
84
- '%a, %d %b %Y %H:%M:%S %z', # 기본 GoogleNews 형식
85
- '%Y-%m-%d %H:%M:%S',
86
- '%a, %d %b %Y %H:%M:%S',
87
- '%Y-%m-%dT%H:%M:%S%z',
88
- '%a %b %d, %Y',
89
- '%d %b %Y'
90
- ]
91
-
92
- parsed_date = None
93
- for format_str in date_formats:
94
- try:
95
- parsed_date = datetime.strptime(article_date_str, format_str)
96
- break
97
- except ValueError:
98
- continue
99
-
100
- # 어떤 형식으로도 파싱할 수 없으면 현재 시간 기준 24시간 전으로 가정
101
- if parsed_date is None:
102
- logging.warning(f"Could not parse date: {article_date_str}, using default 24h ago")
103
- return 0.01 # 최소 가중치 1%
104
-
105
- # 현재 시간과의 차이 계산 (시간 단위)
106
- now = datetime.now()
107
- if parsed_date.tzinfo is not None:
108
- now = now.replace(tzinfo=parsed_date.tzinfo)
109
-
110
- hours_diff = (now - parsed_date).total_seconds() / 3600
111
-
112
- # 24시간 이내인 경우만 고려
113
- if hours_diff < 1: # 1시간 이내
114
- return 0.24 # 24% 가중치
115
- elif hours_diff < 24: # 1~23시간
116
- # 1시간당 1%씩 감소 (1시간 = 24%, 2시간 = 23%, ...)
117
- return max(0.01, 0.24 - ((hours_diff - 1) * 0.01))
118
- else:
119
- return 0.01 # 24시간 이상 지난 기사는 1% 가중치
120
- except Exception as e:
121
- logging.error(f"Error calculating time weight: {e}")
122
- return 0.01 # 오류 발생 시 최소 가중치 적용
123
-
124
- def calculate_sentiment_score(sentiment_label, time_weight):
125
- """
126
- 감성 레이블에 따른 기본 점수 계산 및 시간 가중치 적용
127
  - positive: +3점
128
  - neutral: 0점
129
  - negative: -3점
130
-
131
- 시간 가중치는 백분율로 적용 (기본 점수에 가중치 % 만큼 추가)
132
- 예:
133
- - 1시간 내 긍정 기사: 3점 + (3 * 24%) = 3 + 0.72 = 3.72점
134
- - 10시간 전 부정 기사: -3점 + (-3 * 15%) = -3 - 0.45 = -3.45점
135
  """
136
  base_score = {
137
  'positive': 3,
@@ -139,29 +83,19 @@ def calculate_sentiment_score(sentiment_label, time_weight):
139
  'negative': -3
140
  }.get(sentiment_label, 0)
141
 
142
- # 가중치를 적용한 추가 점수 계산
143
- weighted_addition = base_score * time_weight
144
-
145
- return base_score, weighted_addition
146
 
147
  def analyze_asset_sentiment(asset_name):
148
  logging.info(f"Starting sentiment analysis for asset: {asset_name}")
149
- logging.info("Fetching up to 30 articles")
150
- articles = fetch_articles(asset_name, max_articles=30)
151
  logging.info("Analyzing sentiment of each article")
152
  analyzed_articles = [analyze_article_sentiment(article) for article in articles]
153
 
154
- # 각 기사에 대한 시간 가중치 및 감성 점수 계산
155
  for article in analyzed_articles:
156
- time_weight = calculate_time_weight(article["date"])
157
- article["time_weight"] = time_weight
158
-
159
  sentiment_label = article["sentiment"]["label"]
160
- base_score, weighted_addition = calculate_sentiment_score(sentiment_label, time_weight)
161
-
162
- article["base_score"] = base_score
163
- article["weighted_addition"] = weighted_addition
164
- article["total_score"] = base_score + weighted_addition
165
 
166
  logging.info("Sentiment analysis completed")
167
 
@@ -179,11 +113,8 @@ def create_sentiment_summary(analyzed_articles, asset_name):
179
  neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
180
  negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
181
 
182
- # 기본 점수 합계
183
- base_score_sum = sum(a["base_score"] for a in analyzed_articles)
184
-
185
- # 가중치 적용 점수 합계
186
- weighted_score_sum = sum(a["total_score"] for a in analyzed_articles)
187
 
188
  # 그래프 생성
189
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
@@ -197,15 +128,15 @@ def create_sentiment_summary(analyzed_articles, asset_name):
197
  ax1.axis('equal')
198
  ax1.set_title(f'Sentiment Distribution for {asset_name}')
199
 
200
- # 2. 시간별 가중치 적용 점수 (정렬)
201
  sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
202
 
203
  # 최대 표시할 기사 수 (가독성을 위해)
204
- max_display = min(15, len(sorted_articles))
205
  display_articles = sorted_articles[:max_display]
206
 
207
  dates = [a.get("date", "")[:10] for a in display_articles] # 날짜 부분만 표시
208
- scores = [a.get("total_score", 0) for a in display_articles]
209
 
210
  # 점수에 따른 색상 설정
211
  bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
@@ -213,7 +144,7 @@ def create_sentiment_summary(analyzed_articles, asset_name):
213
  bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
214
  ax2.set_xticks(range(len(dates)))
215
  ax2.set_xticklabels(dates, rotation=45, ha='right')
216
- ax2.set_ylabel('Weighted Sentiment Score')
217
  ax2.set_title(f'Recent Article Scores for {asset_name}')
218
  ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
219
 
@@ -225,8 +156,8 @@ def create_sentiment_summary(analyzed_articles, asset_name):
225
  Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
226
  Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
227
 
228
- Base Score Sum: {base_score_sum:.2f}
229
- Weighted Score Sum: {weighted_score_sum:.2f}
230
  """
231
 
232
  plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
@@ -261,16 +192,14 @@ def convert_to_dataframe(analyzed_articles):
261
  df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
262
 
263
  # 점수 컬럼 추가
264
- df["Base Score"] = df["base_score"]
265
- df["Weight"] = df["time_weight"].apply(lambda x: f"{x*100:.0f}%")
266
- df["Total Score"] = df["total_score"].apply(lambda x: f"{x:.2f}")
267
 
268
- return df[["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"]]
269
 
270
  with gr.Blocks() as iface:
271
  gr.Markdown("# Trading Asset Sentiment Analysis")
272
  gr.Markdown(
273
- "Enter the name of a trading asset, and I'll fetch recent articles and analyze their sentiment!"
274
  )
275
 
276
  with gr.Row():
@@ -304,8 +233,8 @@ with gr.Blocks() as iface:
304
  with gr.Blocks():
305
  gr.Markdown("## Articles and Sentiment Analysis")
306
  articles_output = gr.Dataframe(
307
- headers=["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"],
308
- datatype=["markdown", "html", "markdown", "markdown", "number", "markdown", "markdown"],
309
  wrap=False,
310
  )
311
 
 
26
  )
27
  logging.info("Model initialized successfully")
28
 
29
+ def fetch_articles(query, max_articles=100):
30
  try:
31
  logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
32
  googlenews = GoogleNews(lang="en")
 
37
 
38
  # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
39
  page = 2
40
+ while len(articles) < max_articles and page <= 20: # 최대 20페이지까지 시도
41
  logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
42
  googlenews.get_page(page)
43
  page_results = googlenews.result()
 
70
  article["sentiment"] = sentiment
71
  return article
72
 
73
+ def calculate_sentiment_score(sentiment_label):
74
  """
75
+ 감성 레이블에 따른 기본 점수 계산
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  - positive: +3점
77
  - neutral: 0점
78
  - negative: -3점
 
 
 
 
 
79
  """
80
  base_score = {
81
  'positive': 3,
 
83
  'negative': -3
84
  }.get(sentiment_label, 0)
85
 
86
+ return base_score
 
 
 
87
 
88
  def analyze_asset_sentiment(asset_name):
89
  logging.info(f"Starting sentiment analysis for asset: {asset_name}")
90
+ logging.info("Fetching up to 100 articles")
91
+ articles = fetch_articles(asset_name, max_articles=100)
92
  logging.info("Analyzing sentiment of each article")
93
  analyzed_articles = [analyze_article_sentiment(article) for article in articles]
94
 
95
+ # 각 기사에 대한 감성 점수 계산 (가중치 없음)
96
  for article in analyzed_articles:
 
 
 
97
  sentiment_label = article["sentiment"]["label"]
98
+ article["score"] = calculate_sentiment_score(sentiment_label)
 
 
 
 
99
 
100
  logging.info("Sentiment analysis completed")
101
 
 
113
  neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
114
  negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
115
 
116
+ # 점수 합계
117
+ score_sum = sum(a["score"] for a in analyzed_articles)
 
 
 
118
 
119
  # 그래프 생성
120
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
 
128
  ax1.axis('equal')
129
  ax1.set_title(f'Sentiment Distribution for {asset_name}')
130
 
131
+ # 2. 날짜별 감성 점수 (정렬)
132
  sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
133
 
134
  # 최대 표시할 기사 수 (가독성을 위해)
135
+ max_display = min(20, len(sorted_articles))
136
  display_articles = sorted_articles[:max_display]
137
 
138
  dates = [a.get("date", "")[:10] for a in display_articles] # 날짜 부분만 표시
139
+ scores = [a.get("score", 0) for a in display_articles]
140
 
141
  # 점수에 따른 색상 설정
142
  bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
 
144
  bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
145
  ax2.set_xticks(range(len(dates)))
146
  ax2.set_xticklabels(dates, rotation=45, ha='right')
147
+ ax2.set_ylabel('Sentiment Score')
148
  ax2.set_title(f'Recent Article Scores for {asset_name}')
149
  ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
150
 
 
156
  Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
157
  Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
158
 
159
+ Total Score Sum: {score_sum:.2f}
160
+ Average Score: {score_sum/total_articles:.2f}
161
  """
162
 
163
  plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
 
192
  df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
193
 
194
  # 점수 컬럼 추가
195
+ df["Score"] = df["score"]
 
 
196
 
197
+ return df[["Sentiment", "Title", "Description", "Date", "Score"]]
198
 
199
  with gr.Blocks() as iface:
200
  gr.Markdown("# Trading Asset Sentiment Analysis")
201
  gr.Markdown(
202
+ "Enter the name of a trading asset, and I'll fetch up to 100 recent articles and analyze their sentiment!"
203
  )
204
 
205
  with gr.Row():
 
233
  with gr.Blocks():
234
  gr.Markdown("## Articles and Sentiment Analysis")
235
  articles_output = gr.Dataframe(
236
+ headers=["Sentiment", "Title", "Description", "Date", "Score"],
237
+ datatype=["markdown", "html", "markdown", "markdown", "number"],
238
  wrap=False,
239
  )
240