Stock-Trading-Analysis

Running

App Files Files Community

cutechicken commited on Mar 22

Commit

2d0696f

verified ·

1 Parent(s): 996aec5

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -22

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ sentiment_analyzer = pipeline(
 )
 logging.info("Model initialized successfully")
-def fetch_articles(query, max_articles=100):
     try:
         logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
         googlenews = GoogleNews(lang="en")
@@ -37,7 +37,7 @@ def fetch_articles(query, max_articles=100):
         # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
         page = 2
-        while len(articles) < max_articles and page <= 20:  # 최대 20페이지까지 시도
             logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
             googlenews.get_page(page)
             page_results = googlenews.result()
@@ -70,12 +70,68 @@ def analyze_article_sentiment(article):
     article["sentiment"] = sentiment
     return article
-def calculate_sentiment_score(sentiment_label):
     """
-    감성 레이블에 따른 기본 점수 계산
     - positive: +3점
     - neutral: 0점
     - negative: -3점
     """
     base_score = {
         'positive': 3,
@@ -83,19 +139,29 @@ def calculate_sentiment_score(sentiment_label):
         'negative': -3
     }.get(sentiment_label, 0)
-    return base_score
 def analyze_asset_sentiment(asset_name):
     logging.info(f"Starting sentiment analysis for asset: {asset_name}")
-    logging.info("Fetching up to 100 articles")
-    articles = fetch_articles(asset_name, max_articles=100)
     logging.info("Analyzing sentiment of each article")
     analyzed_articles = [analyze_article_sentiment(article) for article in articles]
-    # 각 기사에 대한 감성 점수 계산 (가중치 없음)
     for article in analyzed_articles:
         sentiment_label = article["sentiment"]["label"]
-        article["score"] = calculate_sentiment_score(sentiment_label)
     logging.info("Sentiment analysis completed")
@@ -113,8 +179,11 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
     negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
-    # 점수 합계
-    score_sum = sum(a["score"] for a in analyzed_articles)
     # 그래프 생성
     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
@@ -128,15 +197,15 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     ax1.axis('equal')
     ax1.set_title(f'Sentiment Distribution for {asset_name}')
-    # 2. 날짜별 감성 점수 (정렬)
     sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
     # 최대 표시할 기사 수 (가독성을 위해)
-    max_display = min(20, len(sorted_articles))
     display_articles = sorted_articles[:max_display]
     dates = [a.get("date", "")[:10] for a in display_articles]  # 날짜 부분만 표시
-    scores = [a.get("score", 0) for a in display_articles]
     # 점수에 따른 색상 설정
     bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
@@ -144,7 +213,7 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
     ax2.set_xticks(range(len(dates)))
     ax2.set_xticklabels(dates, rotation=45, ha='right')
-    ax2.set_ylabel('Sentiment Score')
     ax2.set_title(f'Recent Article Scores for {asset_name}')
     ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
@@ -156,8 +225,8 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
     Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
-    Total Score Sum: {score_sum:.2f}
-    Average Score: {score_sum/total_articles:.2f}
     """
     plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
@@ -192,14 +261,16 @@ def convert_to_dataframe(analyzed_articles):
     df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
     # 점수 컬럼 추가
-    df["Score"] = df["score"]
-    return df[["Sentiment", "Title", "Description", "Date", "Score"]]
 with gr.Blocks() as iface:
     gr.Markdown("# Trading Asset Sentiment Analysis")
     gr.Markdown(
-        "Enter the name of a trading asset, and I'll fetch up to 100 recent articles and analyze their sentiment!"
     )
     with gr.Row():
@@ -233,8 +304,8 @@ with gr.Blocks() as iface:
             with gr.Blocks():
                 gr.Markdown("## Articles and Sentiment Analysis")
                 articles_output = gr.Dataframe(
-                    headers=["Sentiment", "Title", "Description", "Date", "Score"],
-                    datatype=["markdown", "html", "markdown", "markdown", "number"],
                     wrap=False,
                 )

 )
 logging.info("Model initialized successfully")
+def fetch_articles(query, max_articles=30):
     try:
         logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
         googlenews = GoogleNews(lang="en")
         # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
         page = 2
+        while len(articles) < max_articles and page <= 10:  # 최대 10페이지까지만 시도
             logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
             googlenews.get_page(page)
             page_results = googlenews.result()
     article["sentiment"] = sentiment
     return article
+def calculate_time_weight(article_date_str):
     """
+    기사 시간 기준으로 가중치 계산
+    - 1시간 내 기사는 24% 가중치
+    - 시간이 지날수록 1%씩 감소 (최소 1%)
+    - 예: 1시간 내 기사 = 24%, 10시간 전 기사 = 15%, 24시간 전 기사 = 1%
+    - 24시간 이상이면 1%로 고정
+    """
+    try:
+        # 기사 날짜 문자열 파싱 (다양한 형식 처리)
+        date_formats = [
+            '%a, %d %b %Y %H:%M:%S %z',  # 기본 GoogleNews 형식
+            '%Y-%m-%d %H:%M:%S',
+            '%a, %d %b %Y %H:%M:%S',
+            '%Y-%m-%dT%H:%M:%S%z',
+            '%a %b %d, %Y',
+            '%d %b %Y'
+        ]
+        parsed_date = None
+        for format_str in date_formats:
+            try:
+                parsed_date = datetime.strptime(article_date_str, format_str)
+                break
+            except ValueError:
+                continue
+        # 어떤 형식으로도 파싱할 수 없으면 현재 시간 기준 24시간 전으로 가정
+        if parsed_date is None:
+            logging.warning(f"Could not parse date: {article_date_str}, using default 24h ago")
+            return 0.01  # 최소 가중치 1%
+        # 현재 시간과의 차이 계산 (시간 단위)
+        now = datetime.now()
+        if parsed_date.tzinfo is not None:
+            now = now.replace(tzinfo=parsed_date.tzinfo)
+        hours_diff = (now - parsed_date).total_seconds() / 3600
+        # 24시간 이내인 경우만 고려
+        if hours_diff < 1:  # 1시간 이내
+            return 0.24  # 24% 가중치
+        elif hours_diff < 24:  # 1~23시간
+            # 1시간당 1%씩 감소 (1시간 = 24%, 2시간 = 23%, ...)
+            return max(0.01, 0.24 - ((hours_diff - 1) * 0.01))
+        else:
+            return 0.01  # 24시간 이상 지난 기사는 1% 가중치
+    except Exception as e:
+        logging.error(f"Error calculating time weight: {e}")
+        return 0.01  # 오류 발생 시 최소 가중치 적용
+def calculate_sentiment_score(sentiment_label, time_weight):
+    """
+    감성 레이블에 따른 기본 점수 계산 및 시간 가중치 적용
     - positive: +3점
     - neutral: 0점
     - negative: -3점
+    시간 가중치는 백분율로 적용 (기본 점수에 가중치 % 만큼 추가)
+    예:
+    - 1시간 내 긍정 기사: 3점 + (3 * 24%) = 3 + 0.72 = 3.72점
+    - 10시간 전 부정 기사: -3점 + (-3 * 15%) = -3 - 0.45 = -3.45점
     """
     base_score = {
         'positive': 3,
         'negative': -3
     }.get(sentiment_label, 0)
+    # 가중치를 적용한 추가 점수 계산
+    weighted_addition = base_score * time_weight
+    return base_score, weighted_addition
 def analyze_asset_sentiment(asset_name):
     logging.info(f"Starting sentiment analysis for asset: {asset_name}")
+    logging.info("Fetching up to 30 articles")
+    articles = fetch_articles(asset_name, max_articles=30)
     logging.info("Analyzing sentiment of each article")
     analyzed_articles = [analyze_article_sentiment(article) for article in articles]
+    # 각 기사에 대한 시간 가중치 및 감성 점수 계산
     for article in analyzed_articles:
+        time_weight = calculate_time_weight(article["date"])
+        article["time_weight"] = time_weight
         sentiment_label = article["sentiment"]["label"]
+        base_score, weighted_addition = calculate_sentiment_score(sentiment_label, time_weight)
+        article["base_score"] = base_score
+        article["weighted_addition"] = weighted_addition
+        article["total_score"] = base_score + weighted_addition
     logging.info("Sentiment analysis completed")
     neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
     negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
+    # 기본 점수 합계
+    base_score_sum = sum(a["base_score"] for a in analyzed_articles)
+    # 가중치 적용 점수 합계
+    weighted_score_sum = sum(a["total_score"] for a in analyzed_articles)
     # 그래프 생성
     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
     ax1.axis('equal')
     ax1.set_title(f'Sentiment Distribution for {asset_name}')
+    # 2. 시간별 가중치 적용 점수 (정렬)
     sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
     # 최대 표시할 기사 수 (가독성을 위해)
+    max_display = min(15, len(sorted_articles))
     display_articles = sorted_articles[:max_display]
     dates = [a.get("date", "")[:10] for a in display_articles]  # 날짜 부분만 표시
+    scores = [a.get("total_score", 0) for a in display_articles]
     # 점수에 따른 색상 설정
     bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
     bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
     ax2.set_xticks(range(len(dates)))
     ax2.set_xticklabels(dates, rotation=45, ha='right')
+    ax2.set_ylabel('Weighted Sentiment Score')
     ax2.set_title(f'Recent Article Scores for {asset_name}')
     ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
     Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
     Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
+    Base Score Sum: {base_score_sum:.2f}
+    Weighted Score Sum: {weighted_score_sum:.2f}
     """
     plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
     df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
     # 점수 컬럼 추가
+    df["Base Score"] = df["base_score"]
+    df["Weight"] = df["time_weight"].apply(lambda x: f"{x*100:.0f}%")
+    df["Total Score"] = df["total_score"].apply(lambda x: f"{x:.2f}")
+    return df[["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"]]
 with gr.Blocks() as iface:
     gr.Markdown("# Trading Asset Sentiment Analysis")
     gr.Markdown(
+        "Enter the name of a trading asset, and I'll fetch recent articles and analyze their sentiment!"
     )
     with gr.Row():
             with gr.Blocks():
                 gr.Markdown("## Articles and Sentiment Analysis")
                 articles_output = gr.Dataframe(
+                    headers=["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"],
+                    datatype=["markdown", "html", "markdown", "markdown", "number", "markdown", "markdown"],
                     wrap=False,
                 )