File size: 4,418 Bytes
58b55e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

import re
import string
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline, pipeline


# FinBERT yükle
finbert_model_name = "yiyanghkust/finbert-tone"
finbert_tokenizer = AutoTokenizer.from_pretrained(finbert_model_name)
finbert_model = AutoModelForSequenceClassification.from_pretrained(finbert_model_name)
finbert_classifier = TextClassificationPipeline(model=finbert_model, tokenizer=finbert_tokenizer, top_k=None)


translator = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en", max_length=512)



def clean_text(text):
    text = text.lower()
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    return text


def multilingual_batch_classify(user_inputs):
    texts = user_inputs.split('\n')  # Her satırı ayrı haber
    texts = [t for t in texts if t.strip() != '']  # Boş satırları çıkar

    results = []
    for text in texts:
        original_text = text
        cleaned_text = clean_text(text)

        # 🔍 İngilizce'ye çevir
        translation_output = translator(cleaned_text)[0]['translation_text']
        translated_text = translation_output

        # 🔮 FinBERT ile analiz
        outputs = finbert_classifier(translated_text)
        outputs = outputs[0]

        labels_scores = {out['label']: out['score'] for out in outputs}
        best_label = max(labels_scores, key=labels_scores.get)
        best_confidence = labels_scores[best_label]

        positive_score = labels_scores.get("Positive", 0) * 100
        negative_score = labels_scores.get("Negative", 0) * 100
        neutral_score = labels_scores.get("Neutral", 0) * 100

        # 🏷️ Auto-tagging
        if best_label == "Positive":
            auto_tag = "📈 Opportunity"
        elif best_label == "Negative":
            auto_tag = "⚠️ Risk Alert"
        elif best_label == "Neutral":
            auto_tag = "ℹ️ Informational"
        else:
            auto_tag = "🤔 Unclassified"

        results.append((
            original_text,
            translated_text,
            best_label,
            best_confidence * 100,
            positive_score,
            negative_score,
            neutral_score,
            auto_tag
        ))

    df_results = pd.DataFrame(results, columns=[
        "Original Text", "Translated Text", "Predicted Sentiment", "Best Confidence (%)",
        "Positive (%)", "Negative (%)", "Neutral (%)", "Auto Tag"
    ])

    return df_results


def multilingual_analyze_and_filter(user_inputs, selected_sentiment):
    df_results = multilingual_batch_classify(user_inputs)

    
    if selected_sentiment != "All":
        df_results = df_results[df_results["Predicted Sentiment"] == selected_sentiment]

   
    fig, ax = plt.subplots(figsize=(10,6))
    sentiment_colors = {
        "Positive": "green",
        "Negative": "red",
        "Neutral": "blue"
    }

    color_list = [sentiment_colors.get(sent, "gray") for sent in df_results["Predicted Sentiment"]]

    ax.bar(range(len(df_results)), df_results["Best Confidence (%)"], color=color_list)
    ax.set_xticks(range(len(df_results)))
    ax.set_xticklabels([f"News {i+1}" for i in range(len(df_results))], rotation=45)
    ax.set_ylabel("Best Confidence (%)")
    ax.set_title(f"Sentiment Confidence Trend ({selected_sentiment})")

    plt.tight_layout()

    return df_results, fig


with gr.Blocks() as demo:
    gr.Markdown("# 🧠🌍 FinBERT Multilingual Financial News Analyzer")
    gr.Markdown("Paste multiple news headlines (any language!) and see full sentiment analysis after translation!")

    user_input = gr.Textbox(lines=10, placeholder="Enter each news headline on a new line...", label="📝 News Headlines (any language)")
    sentiment_filter = gr.Dropdown(["All", "Positive", "Negative", "Neutral"], label="🎯 Filter by Sentiment", value="All")

    output_table = gr.Dataframe(label="🔮 Prediction Table (with Auto Tags and Translation)")
    output_plot = gr.Plot(label="📈 Sentiment Confidence Trend")

    submit_button = gr.Button("Analyze News Batch (Multilingual)")

    submit_button.click(fn=multilingual_analyze_and_filter, inputs=[user_input, sentiment_filter], outputs=[output_table, output_plot])

# App başlat
demo.launch()