Spaces:

elisara
/

financial-news

Running

File size: 4,418 Bytes

58b55e2


import re
import string
import gradio as gr
import matplotlib.pyplot as plt
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline, pipeline


# FinBERT yükle
finbert_model_name = "yiyanghkust/finbert-tone"
finbert_tokenizer = AutoTokenizer.from_pretrained(finbert_model_name)
finbert_model = AutoModelForSequenceClassification.from_pretrained(finbert_model_name)
finbert_classifier = TextClassificationPipeline(model=finbert_model, tokenizer=finbert_tokenizer, top_k=None)


translator = pipeline("translation", model="Helsinki-NLP/opus-mt-tr-en", max_length=512)



def clean_text(text):
    text = text.lower()
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'[%s]' % re.escape(string.punctuation), '', text)
    return text


def multilingual_batch_classify(user_inputs):
    texts = user_inputs.split('\n')  # Her satırı ayrı haber
    texts = [t for t in texts if t.strip() != '']  # Boş satırları çıkar

    results = []
    for text in texts:
        original_text = text
        cleaned_text = clean_text(text)

        # 🔍 İngilizce'ye çevir
        translation_output = translator(cleaned_text)[0]['translation_text']
        translated_text = translation_output

        # 🔮 FinBERT ile analiz
        outputs = finbert_classifier(translated_text)
        outputs = outputs[0]

        labels_scores = {out['label']: out['score'] for out in outputs}
        best_label = max(labels_scores, key=labels_scores.get)
        best_confidence = labels_scores[best_label]

        positive_score = labels_scores.get("Positive", 0) * 100
        negative_score = labels_scores.get("Negative", 0) * 100
        neutral_score = labels_scores.get("Neutral", 0) * 100

        # 🏷️ Auto-tagging
        if best_label == "Positive":
            auto_tag = "📈 Opportunity"
        elif best_label == "Negative":
            auto_tag = "⚠️ Risk Alert"
        elif best_label == "Neutral":
            auto_tag = "ℹ️ Informational"
        else:
            auto_tag = "🤔 Unclassified"

        results.append((
            original_text,
            translated_text,
            best_label,
            best_confidence * 100,
            positive_score,
            negative_score,
            neutral_score,
            auto_tag
        ))

    df_results = pd.DataFrame(results, columns=[
        "Original Text", "Translated Text", "Predicted Sentiment", "Best Confidence (%)",
        "Positive (%)", "Negative (%)", "Neutral (%)", "Auto Tag"
    ])

    return df_results


def multilingual_analyze_and_filter(user_inputs, selected_sentiment):
    df_results = multilingual_batch_classify(user_inputs)

    
    if selected_sentiment != "All":
        df_results = df_results[df_results["Predicted Sentiment"] == selected_sentiment]

   
    fig, ax = plt.subplots(figsize=(10,6))
    sentiment_colors = {
        "Positive": "green",
        "Negative": "red",
        "Neutral": "blue"
    }

    color_list = [sentiment_colors.get(sent, "gray") for sent in df_results["Predicted Sentiment"]]

    ax.bar(range(len(df_results)), df_results["Best Confidence (%)"], color=color_list)
    ax.set_xticks(range(len(df_results)))
    ax.set_xticklabels([f"News {i+1}" for i in range(len(df_results))], rotation=45)
    ax.set_ylabel("Best Confidence (%)")
    ax.set_title(f"Sentiment Confidence Trend ({selected_sentiment})")

    plt.tight_layout()

    return df_results, fig


with gr.Blocks() as demo:
    gr.Markdown("# 🧠🌍 FinBERT Multilingual Financial News Analyzer")
    gr.Markdown("Paste multiple news headlines (any language!) and see full sentiment analysis after translation!")

    user_input = gr.Textbox(lines=10, placeholder="Enter each news headline on a new line...", label="📝 News Headlines (any language)")
    sentiment_filter = gr.Dropdown(["All", "Positive", "Negative", "Neutral"], label="🎯 Filter by Sentiment", value="All")

    output_table = gr.Dataframe(label="🔮 Prediction Table (with Auto Tags and Translation)")
    output_plot = gr.Plot(label="📈 Sentiment Confidence Trend")

    submit_button = gr.Button("Analyze News Batch (Multilingual)")

    submit_button.click(fn=multilingual_analyze_and_filter, inputs=[user_input, sentiment_filter], outputs=[output_table, output_plot])

# App başlat
demo.launch()