Stock-Trading-Analysis

Running

App Files Files Community

cutechicken commited on Mar 22

Commit

996aec5

verified ·

1 Parent(s): 71c3ed0

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -93

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ sentiment_analyzer = pipeline(
 )
 logging.info("Model initialized successfully")
-def fetch_articles(query, max_articles=30):
     try:
         logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
         googlenews = GoogleNews(lang="en")
@@ -37,7 +37,7 @@ def fetch_articles(query, max_articles=30):
         # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
         page = 2
-        while len(articles) < max_articles and page <= 10:  # 최대 10페이지까지만 시도
             logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
             googlenews.get_page(page)
             page_results = googlenews.result()
@@ -70,68 +70,12 @@ def analyze_article_sentiment(article):
     article["sentiment"] = sentiment
     return article
-def calculate_time_weight(article_date_str):
     """
-    기사 시간 기준으로 가중치 계산
-    - 1시간 내 기사는 24% 가중치
-    - 시간이 지날수록 1%씩 감소 (최소 1%)
-    - 예: 1시간 내 기사 = 24%, 10시간 전 기사 = 15%, 24시간 전 기사 = 1%
-    - 24시간 이상이면 1%로 고정
-    """
-    try:
-        # 기사 날짜 문자열 파싱 (다양한 형식 처리)
-        date_formats = [
-            '%a, %d %b %Y %H:%M:%S %z',  # 기본 GoogleNews 형식
-            '%Y-%m-%d %H:%M:%S',
-            '%a, %d %b %Y %H:%M:%S',
-            '%Y-%m-%dT%H:%M:%S%z',
-            '%a %b %d, %Y',
-            '%d %b %Y'
-        ]
-        parsed_date = None
-        for format_str in date_formats:
-            try:
-                parsed_date = datetime.strptime(article_date_str, format_str)
-                break
-            except ValueError:
-                continue
-        # 어떤 형식으로도 파싱할 수 없으면 현재 시간 기준 24시간 전으로 가정
-        if parsed_date is None:
-            logging.warning(f"Could not parse date: {article_date_str}, using default 24h ago")
-            return 0.01  # 최소 가중치 1%
-        # 현재 시간과의 차이 계산 (시간 단위)
-        now = datetime.now()
-        if parsed_date.tzinfo is not None:
-            now = now.replace(tzinfo=parsed_date.tzinfo)
-        hours_diff = (now - parsed_date).total_seconds() / 3600
-        # 24시간 이내인 경우만 고려
-        if hours_diff < 1:  # 1시간 이내
-            return 0.24  # 24% 가중치
-        elif hours_diff < 24:  # 1~23시간
-            # 1시간당 1%씩 감소 (1시간 = 24%, 2시간 = 23%, ...)
-            return max(0.01, 0.24 - ((hours_diff - 1) * 0.01))
-        else:
-            return 0.01  # 24시간 이상 지난 기사는 1% 가중치
-    except Exception as e:
-        logging.error(f"Error calculating time weight: {e}")
-        return 0.01  # 오류 발생 시 최소 가중치 적용
-def calculate_sentiment_score(sentiment_label, time_weight):
-    """
-    감성 레이블에 따른 기본 점수 계산 및 시간 가중치 적용
     - positive: +3점
     - neutral: 0점
     - negative: -3점
-    시간 가중치는 백분율로 적용 (기본 점수에 가중치 % 만큼 추가)
-    예:
-    - 1시간 내 긍정 기사: 3점 + (3 * 24%) = 3 + 0.72 = 3.72점
-    - 10시간 전 부정 기사: -3점 + (-3 * 15%) = -3 - 0.45 = -3.45점
     """
     base_score = {
         'positive': 3,
@@ -139,29 +83,19 @@ def calculate_sentiment_score(sentiment_label, time_weight):
         'negative': -3
     }.get(sentiment_label, 0)
-    # 가중치를 적용한 추가 점수 계산
-    weighted_addition = base_score * time_weight
-    return base_score, weighted_addition
 def analyze_asset_sentiment(asset_name):
     logging.info(f"Starting sentiment analysis for asset: {asset_name}")
-    logging.info("Fetching up to 30 articles")
-    articles = fetch_articles(asset_name, max_articles=30)
     logging.info("Analyzing sentiment of each article")
     analyzed_articles = [analyze_article_sentiment(article) for article in articles]
-    # 각 기사에 대한 시간 가중치 및 감성 점수 계산
     for article in analyzed_articles:
-        time_weight = calculate_time_weight(article["date"])
-        article["time_weight"] = time_weight
         sentiment_label = article["sentiment"]["label"]
-        base_score, weighted_addition = calculate_sentiment_score(sentiment_label, time_weight)
-        article["base_score"] = base_score
-        article["weighted_addition"] = weighted_addition
-        article["total_score"] = base_score + weighted_addition
     logging.info("Sentiment analysis completed")
@@ -179,11 +113,8 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
     negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
-    # 기본 점수 합계
-    base_score_sum = sum(a["base_score"] for a in analyzed_articles)
-    # 가중치 적용 점수 합계
-    weighted_score_sum = sum(a["total_score"] for a in analyzed_articles)
     # 그래프 생성
     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
@@ -197,15 +128,15 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     ax1.axis('equal')
     ax1.set_title(f'Sentiment Distribution for {asset_name}')
-    # 2. 시간별 가중치 적용 점수 (정렬)
     sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
     # 최대 표시할 기사 수 (가독성을 위해)
-    max_display = min(15, len(sorted_articles))
     display_articles = sorted_articles[:max_display]
     dates = [a.get("date", "")[:10] for a in display_articles]  # 날짜 부분만 표시
-    scores = [a.get("total_score", 0) for a in display_articles]
     # 점수에 따른 색상 설정
     bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
@@ -213,7 +144,7 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
     ax2.set_xticks(range(len(dates)))
     ax2.set_xticklabels(dates, rotation=45, ha='right')
-    ax2.set_ylabel('Weighted Sentiment Score')
     ax2.set_title(f'Recent Article Scores for {asset_name}')
     ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
@@ -225,8 +156,8 @@ def create_sentiment_summary(analyzed_articles, asset_name):
     Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
     Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
-    Base Score Sum: {base_score_sum:.2f}
-    Weighted Score Sum: {weighted_score_sum:.2f}
     """
     plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
@@ -261,16 +192,14 @@ def convert_to_dataframe(analyzed_articles):
     df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
     # 점수 컬럼 추가
-    df["Base Score"] = df["base_score"]
-    df["Weight"] = df["time_weight"].apply(lambda x: f"{x*100:.0f}%")
-    df["Total Score"] = df["total_score"].apply(lambda x: f"{x:.2f}")
-    return df[["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"]]
 with gr.Blocks() as iface:
     gr.Markdown("# Trading Asset Sentiment Analysis")
     gr.Markdown(
-        "Enter the name of a trading asset, and I'll fetch recent articles and analyze their sentiment!"
     )
     with gr.Row():
@@ -304,8 +233,8 @@ with gr.Blocks() as iface:
             with gr.Blocks():
                 gr.Markdown("## Articles and Sentiment Analysis")
                 articles_output = gr.Dataframe(
-                    headers=["Sentiment", "Title", "Description", "Date", "Base Score", "Weight", "Total Score"],
-                    datatype=["markdown", "html", "markdown", "markdown", "number", "markdown", "markdown"],
                     wrap=False,
                 )

 )
 logging.info("Model initialized successfully")
+def fetch_articles(query, max_articles=100):
     try:
         logging.info(f"Fetching up to {max_articles} articles for query: '{query}'")
         googlenews = GoogleNews(lang="en")
         # 목표 기사 수에 도달할 때까지 추가 페이지 가져오기
         page = 2
+        while len(articles) < max_articles and page <= 20:  # 최대 20페이지까지 시도
             logging.info(f"Fetched {len(articles)} articles so far. Getting page {page}...")
             googlenews.get_page(page)
             page_results = googlenews.result()
     article["sentiment"] = sentiment
     return article
+def calculate_sentiment_score(sentiment_label):
     """
+    감성 레이블에 따른 기본 점수 계산
     - positive: +3점
     - neutral: 0점
     - negative: -3점
     """
     base_score = {
         'positive': 3,
         'negative': -3
     }.get(sentiment_label, 0)
+    return base_score
 def analyze_asset_sentiment(asset_name):
     logging.info(f"Starting sentiment analysis for asset: {asset_name}")
+    logging.info("Fetching up to 100 articles")
+    articles = fetch_articles(asset_name, max_articles=100)
     logging.info("Analyzing sentiment of each article")
     analyzed_articles = [analyze_article_sentiment(article) for article in articles]
+    # 각 기사에 대한 감성 점수 계산 (가중치 없음)
     for article in analyzed_articles:
         sentiment_label = article["sentiment"]["label"]
+        article["score"] = calculate_sentiment_score(sentiment_label)
     logging.info("Sentiment analysis completed")
     neutral_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "neutral")
     negative_count = sum(1 for a in analyzed_articles if a["sentiment"]["label"] == "negative")
+    # 점수 합계
+    score_sum = sum(a["score"] for a in analyzed_articles)
     # 그래프 생성
     fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
     ax1.axis('equal')
     ax1.set_title(f'Sentiment Distribution for {asset_name}')
+    # 2. 날짜별 감성 점수 (정렬)
     sorted_articles = sorted(analyzed_articles, key=lambda x: x.get("date", ""), reverse=True)
     # 최대 표시할 기사 수 (가독성을 위해)
+    max_display = min(20, len(sorted_articles))
     display_articles = sorted_articles[:max_display]
     dates = [a.get("date", "")[:10] for a in display_articles]  # 날짜 부분만 표시
+    scores = [a.get("score", 0) for a in display_articles]
     # 점수에 따른 색상 설정
     bar_colors = ['green' if s > 0 else 'red' if s < 0 else 'gray' for s in scores]
     bars = ax2.bar(range(len(dates)), scores, color=bar_colors)
     ax2.set_xticks(range(len(dates)))
     ax2.set_xticklabels(dates, rotation=45, ha='right')
+    ax2.set_ylabel('Sentiment Score')
     ax2.set_title(f'Recent Article Scores for {asset_name}')
     ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)
     Neutral: {neutral_count} ({neutral_count/total_articles*100:.1f}%)
     Negative: {negative_count} ({negative_count/total_articles*100:.1f}%)
+    Total Score Sum: {score_sum:.2f}
+    Average Score: {score_sum/total_articles:.2f}
     """
     plt.figtext(0.5, 0.01, summary_text, ha='center', fontsize=10, bbox={"facecolor":"orange", "alpha":0.2, "pad":5})
     df["Sentiment"] = df["sentiment"].apply(lambda x: sentiment_badge(x["label"]))
     # 점수 컬럼 추가
+    df["Score"] = df["score"]
+    return df[["Sentiment", "Title", "Description", "Date", "Score"]]
 with gr.Blocks() as iface:
     gr.Markdown("# Trading Asset Sentiment Analysis")
     gr.Markdown(
+        "Enter the name of a trading asset, and I'll fetch up to 100 recent articles and analyze their sentiment!"
     )
     with gr.Row():
             with gr.Blocks():
                 gr.Markdown("## Articles and Sentiment Analysis")
                 articles_output = gr.Dataframe(
+                    headers=["Sentiment", "Title", "Description", "Date", "Score"],
+                    datatype=["markdown", "html", "markdown", "markdown", "number"],
                     wrap=False,
                 )