# ==============================================================================
# SOCIAL PERCEPTION ANALYZER - FINAL COMPLETE APPLICATION
# Version: 3.0 (Architecturally Refactored, Production Ready)
# ==============================================================================

# --- IMPORTS ---
import gradio as gr
import pandas as pd
import numpy as np
import torch
import re
import sqlite3
import json
import logging
import requests
import os
import time
import random
import functools
from io import StringIO
from datetime import datetime, timezone
from logging.handlers import RotatingFileHandler

# --- APIs and Web Scraping ---
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from GoogleNews import GoogleNews
from urllib.error import HTTPError
import dateparser

# --- NLP & Machine Learning ---
from transformers import pipeline, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer
from huggingface_hub.utils import HfHubHTTPError

# --- Visualization ---
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import seaborn as sns
from wordcloud import WordCloud

# ==============================================================================
# SETUP PRODUCTION-GRADE LOGGING & CONFIGURATION
# ==============================================================================

log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
log_handler = RotatingFileHandler('app.log', maxBytes=5*1024*1024, backupCount=2)
log_handler.setFormatter(log_formatter)
logger = logging.getLogger()
logger.setLevel(logging.INFO)
if not logger.handlers:
    logger.addHandler(log_handler)
logger.info("Application starting up.")

# --- APPLICATION CONFIGURATION ---
APP_TITLE = "Social Perception Analyzer"
APP_TAGLINE = "Prepared for the Policymakers of Bangladesh Nationalist Party (BNP)"
APP_FOOTER = "Developed by CDSR"

# --- FONT CONFIGURATION ---
FONT_PATH = 'NotoSansBengali-Regular.ttf'
try:
    BANGLA_FONT = FontProperties(fname=FONT_PATH)
    logger.info("Successfully loaded 'NotoSansBengali-Regular.ttf' font.")
except OSError:
    logger.error("Failed to load 'NotoSansBengali-Regular.ttf'. Ensure the file is in the root directory.")
    gr.Warning("Bangla font not found! Visualizations may not render text correctly.")
    BANGLA_FONT = FontProperties()

# ==============================================================================
# CORE HELPER FUNCTIONS
# ==============================================================================

BANGLA_STOP_WORDS = [
    'অতএব', 'অথচ', 'অথবা', 'অনুযায়ী', 'অনেক', 'অনেকে', 'অনেকেই', 'অন্তত', 'অন্য', 'অবধি', 'অবশ্য',
    'অভিপ্রায়', 'একে', 'একই', 'একেবারে', 'একটি', 'একবার', 'এখন', 'এখনও', 'এখানে', 'এখানেই', 'এটি',
    'এতটাই', 'এতদূর', 'এতটুকু', 'এক', 'এবং', 'এবার', 'এমন', 'এমনভাবে', 'এর', 'এরা', 'এঁরা', 'এঁদের',
    'এই', 'এইভাবে', 'ও', 'ওঁরা', 'ওঁর', 'ওঁদের', 'ওকে', 'ওখানে', 'ওদের', 'ওর', 'কাছ', 'কাছে', 'কাজ',
    'কারণ', 'কিছু', 'কিছুই', 'কিন্তু', 'কিভাবে', 'কেন', 'কোন', 'কোনও', 'কোনো', 'ক্ষেত্রে', 'খুব',
    'গুলি', 'গিয়ে', 'চায়', 'ছাড়া', 'জন্য', 'জানা', 'ঠিক', 'তিনি', 'তিন', 'তিনিও', 'তাকে', 'তাঁকে',
    'তার', 'তাঁর', 'তারা', 'তাঁরা', 'তাদের', 'তাঁদের', 'তাহলে', ' থাকলেও', 'থেকে', 'মধ্যেই', 'মধ্যে',
    'द्वारा', 'নয়', 'না', 'নিজের', 'নিজে', 'নিয়ে', 'পারেন', 'পারা', 'পারে', 'পরে', 'পর্যন্ত', 'পুনরায়',
    'ফলে', 'বজায়', 'বা', 'বাদে', 'বার', 'বিশেষ', 'বিভিন্ন', 'ব্যবহার', 'ব্যাপারে', 'ভাবে', 'ভাবেই', 'মাধ্যমে',
    'মতো', 'মতোই', 'যখন', 'যদি', 'যদিও', 'যা', 'যাকে', 'যাওয়া', 'যায়', 'যে', 'যেখানে', 'যেতে', 'যেমন',
    'যেহেতু', 'রহিছে', 'শিক্ষা', 'শুধু', 'সঙ্গে', 'সব', 'সমস্ত', 'সম্প্রতি', 'সহ', 'সাধারণ', 'সামনে', 'হতে',
    'হতেই', 'হবে', 'হয়', 'হয়তো', 'হয়', 'হচ্ছে', 'হত', 'হলে', 'হলেও', 'হয়নি', 'হাজার', 'হোওয়া', 'আরও', 'আমরা',
    'আমার', 'আমি', 'আর', 'আগে', 'আগেই', 'আছে', 'আজ', 'তাকে', 'তাতে', 'তাদের', 'তাহার', 'তাহাতে', 'তাহারই',
    'তথা', 'তথাপি', 'সে', 'সেই', 'সেখান', 'সেখানে', 'থেকে', 'নাকি', 'নাগাদ', 'দু', 'দুটি', 'সুতরাং',
    'সম্পর্কে', 'সঙ্গেও', 'সর্বাধিক', 'সর্বদা', 'সহ', 'হৈতে', 'হইবে', 'হইয়া', 'হৈল', 'জানিয়েছেন', 'প্রতিবেদক'
]

def get_dynamic_time_agg(start_date, end_date):
    """Hardened helper to determine time aggregation level."""
    if not isinstance(start_date, pd.Timestamp) or not isinstance(end_date, pd.Timestamp):
        return 'D', 'Daily' # Graceful fallback
    delta = end_date - start_date
    if delta.days <= 2: return 'H', 'Hourly'
    if delta.days <= 90: return 'D', 'Daily'
    if delta.days <= 730: return 'W', 'Weekly'
    return 'M', 'Monthly'

# ==============================================================================
# ML MODEL MANAGEMENT
# ==============================================================================


SENTIMENT_MODEL_ID = 'ahs95/banglabert-sentiment-analysis'
MODELS = {"sentiment_pipeline": None}

def _load_pipeline_with_retry(task, model_id, retries=3):
    logger.info(f"Initializing {task} pipeline for model: {model_id}")
    for attempt in range(retries):
        try:
            device = 0 if torch.cuda.is_available() else -1
            if device == -1: gr.Warning(f"{model_id} will run on CPU and may be very slow.")
            pipe = pipeline(task, model=model_id, device=device)
            logger.info(f"Pipeline '{task}' loaded successfully.")
            return pipe
        except (HfHubHTTPError, requests.exceptions.ConnectionError) as e:
            logger.warning(f"Network error on loading {model_id} (Attempt {attempt + 1}/{retries}): {e}")
            if attempt < retries - 1: time.sleep(5)
            else: raise gr.Error(f"Failed to download model '{model_id}' after {retries} attempts. Check network.")
        except Exception as e:
            logger.error(f"An unexpected error occurred while loading {model_id}: {e}")
            raise gr.Error(f"Could not initialize model '{model_id}'. Error: {e}")
    return None

def get_sentiment_pipeline():
    if MODELS["sentiment_pipeline"] is None:
        MODELS["sentiment_pipeline"] = _load_pipeline_with_retry("sentiment-analysis", SENTIMENT_MODEL_ID)
    return MODELS["sentiment_pipeline"]

# ==============================================================================
# NEWS SCRAPER BACKEND
# ==============================================================================

def run_news_scraper_pipeline(search_keywords, sites, start_date_str, end_date_str, interval, max_pages, filter_keys, progress=gr.Progress()):
    """Full, robust implementation of the news scraper."""
    # Input validation and sanitization
    search_keywords = search_keywords.strip()
    if not all([search_keywords, start_date_str, end_date_str]):
        raise gr.Error("Search Keywords, Start Date, and End Date are required.")

    start_dt = dateparser.parse(start_date_str)
    end_dt = dateparser.parse(end_date_str)
    if not all([start_dt, end_dt]):
        raise gr.Error("Invalid date format. Please use a recognizable format like YYYY-MM-DD or '2 weeks ago'.")

    all_articles, current_dt = [], start_dt
    while current_dt <= end_dt:
        interval_end_dt = min(current_dt + pd.Timedelta(days=interval - 1), end_dt)
        start_str, end_str = current_dt.strftime('%Y-%m-%d'), interval_end_dt.strftime('%Y-%m-%d')
        progress(0, desc=f"Fetching news from {start_str} to {end_str}")

        site_query = f"({' OR '.join(['site:' + s.strip() for s in sites.split(',') if s.strip()])})" if sites else ""
        final_query = f'"{search_keywords}" {site_query} after:{start_str} before:{end_str}'

        googlenews = GoogleNews(lang='bn', region='BD')
        googlenews.search(final_query)
        
        for page in range(1, max_pages + 1):
            try:
                results = googlenews.results()
                if not results: break
                all_articles.extend(results)
                if page < max_pages:
                    googlenews.getpage(page + 1)
                    time.sleep(random.uniform(2, 5))
            except HTTPError as e:
                if e.code == 429:
                    wait_time = random.uniform(15, 30)
                    gr.Warning(f"Rate limited by Google News. Pausing for {wait_time:.0f} seconds.")
                    time.sleep(wait_time)
                else:
                    logger.error(f"HTTP Error fetching news: {e}"); break
            except Exception as e:
                logger.error(f"An error occurred fetching news: {e}"); break
        
        current_dt += pd.Timedelta(days=interval)

    if not all_articles: return pd.DataFrame(), pd.DataFrame()
    
    df = pd.DataFrame(all_articles).drop_duplicates(subset=['link'])
    df['published_date'] = df['date'].apply(lambda x: dateparser.parse(x, languages=['bn']))
    df.dropna(subset=['published_date', 'title'], inplace=True)

    if filter_keys and filter_keys.strip():
        keywords = [k.strip().lower() for k in filter_keys.split(',')]
        mask = df.apply(lambda row: any(key in str(row['title']).lower() or key in str(row['desc']).lower() for key in keywords), axis=1)
        df = df[mask]

    return df, df[['published_date', 'title', 'media', 'desc', 'link']].sort_values(by='published_date', ascending=False)

# ==============================================================================
# YOUTUBE ANALYZER BACKEND
# ==============================================================================
# (This section remains unchanged from the previous robust version)
def _fetch_video_details(youtube_service, video_ids: list):
    all_videos_data = []
    try:
        for i in range(0, len(video_ids), 50):
            id_batch = video_ids[i:i+50]
            video_request = youtube_service.videos().list(part="snippet,statistics", id=",".join(id_batch))
            video_response = video_request.execute()
            for item in video_response.get('items', []):
                stats = item.get('statistics', {})
                all_videos_data.append({
                    'video_id': item['id'], 'video_title': item['snippet']['title'],
                    'channel': item['snippet']['channelTitle'], 'published_date': item['snippet']['publishedAt'],
                    'view_count': int(stats.get('viewCount', 0)), 'like_count': int(stats.get('likeCount', 0)),
                    'comment_count': int(stats.get('commentCount', 0))
                })
    except HttpError as e:
        logger.error(f"Could not fetch video details. Error: {e}")
        gr.Warning("Could not fetch details for some videos due to an API error.")
    return all_videos_data

def _scrape_single_video_comments(youtube_service, video_id, max_comments):
    comments_list = []
    try:
        request = youtube_service.commentThreads().list(
            part="snippet", videoId=video_id, maxResults=min(max_comments, 100),
            order='relevance', textFormat="plainText"
        )
        response = request.execute()
        for item in response.get('items', []):
            snippet = item['snippet']['topLevelComment']['snippet']
            comments_list.append({
                'author': snippet['authorDisplayName'], 'published_date_comment': snippet['publishedAt'],
                'comment_text': snippet['textDisplay'], 'likes': snippet['likeCount'],
                'replies': item['snippet']['totalReplyCount']
            })
    except HttpError as e:
        logger.warning(f"Could not retrieve comments for video {video_id} (may be disabled). Error: {e}")
    return comments_list

def run_youtube_analysis_pipeline(api_key, query, max_videos_for_stats, num_videos_for_comments, max_comments_per_video, published_after, progress=gr.Progress()):
    if not api_key: raise gr.Error("YouTube API Key is required.")
    if not query: raise gr.Error("Search Keywords are required.")
    try:
        youtube = build('youtube', 'v3', developerKey=api_key)
    except HttpError as e:
        raise gr.Error(f"Failed to initialize YouTube service. Check API Key. Error: {e}")
    except Exception as e:
        raise gr.Error(f"An unexpected error occurred during API initialization: {e}")

    progress(0.1, desc="Performing broad scan for videos...")
    all_video_ids, next_page_token, total_results_estimate = [], None, 0
    PAGES_TO_FETCH = min(15, (max_videos_for_stats // 50) + 1)
    search_params = {'q': query, 'part': 'id', 'maxResults': 50, 'type': 'video', 'order': 'relevance'}
    if published_after:
        parsed_date = dateparser.parse(published_after)
        if parsed_date:
            search_params['publishedAfter'] = parsed_date.replace(tzinfo=timezone.utc).isoformat()
        else:
            gr.Warning(f"Could not parse date: '{published_after}'. Ignoring filter.")

    for page in range(PAGES_TO_FETCH):
        try:
            if next_page_token: search_params['pageToken'] = next_page_token
            response = youtube.search().list(**search_params).execute()
            if page == 0:
                total_results_estimate = response.get('pageInfo', {}).get('totalResults', 0)
            all_video_ids.extend([item['id']['videoId'] for item in response.get('items', [])])
            next_page_token = response.get('nextPageToken')
            progress(0.1 + (0.3 * (page / PAGES_TO_FETCH)), desc=f"Broad scan: Found {len(all_video_ids)} videos...")
            if not next_page_token: break
        except HttpError as e:
             if "quotaExceeded" in str(e): raise gr.Error("CRITICAL: YouTube API daily quota exceeded. Try again tomorrow.")
             logger.error(f"HTTP error during video search: {e}"); break

    if not all_video_ids:
        return pd.DataFrame(), pd.DataFrame(), 0

    progress(0.4, desc=f"Fetching details for {len(all_video_ids)} videos...")
    videos_df_full_scan = pd.DataFrame(_fetch_video_details(youtube, all_video_ids))
    if videos_df_full_scan.empty:
        return pd.DataFrame(), pd.DataFrame(), 0

    videos_df_full_scan['published_date'] = pd.to_datetime(videos_df_full_scan['published_date'])
    videos_df_full_scan['engagement_rate'] = ((videos_df_full_scan['like_count'] + videos_df_full_scan['comment_count']) / videos_df_full_scan['view_count']).fillna(0)
    videos_df_full_scan = videos_df_full_scan.sort_values(by='view_count', ascending=False).reset_index(drop=True)

    videos_to_scrape_df, all_comments = videos_df_full_scan.head(int(num_videos_for_comments)), []
    for index, row in videos_to_scrape_df.iterrows():
        progress(0.7 + (0.3 * (index / len(videos_to_scrape_df))), desc=f"Deep dive: Scraping comments from video {index+1}/{len(videos_to_scrape_df)}...")
        comments_for_video = _scrape_single_video_comments(youtube, row['video_id'], max_comments_per_video)
        if comments_for_video:
            for comment in comments_for_video:
                comment.update({'video_id': row['video_id'], 'video_title': row['video_title']})
            all_comments.extend(comments_for_video)

    comments_df = pd.DataFrame(all_comments)
    if not comments_df.empty:
        comments_df['published_date_comment'] = pd.to_datetime(comments_df['published_date_comment'])

    logger.info(f"YouTube analysis complete. Est. total videos: {total_results_estimate}. Scanned: {len(videos_df_full_scan)}. Comments: {len(comments_df)}.")
    return videos_df_full_scan, comments_df, total_results_estimate


# ==============================================================================
# ADVANCED ANALYTICS MODULE
# ==============================================================================
# (This section remains unchanged, as it was already robust)
def set_plot_style():
    plt.style.use('seaborn-v0_8-whitegrid')
    plt.rcParams['figure.dpi'] = 100

def run_sentiment_analysis(df: pd.DataFrame, text_column: str, progress=gr.Progress()):
    if text_column not in df.columns: return df
    sentiment_pipeline = get_sentiment_pipeline()
    if not sentiment_pipeline:
        gr.Warning("Sentiment model failed to load. Skipping analysis.")
        return df

    texts = df[text_column].dropna().tolist()
    if not texts: return df

    progress(0, desc="Running sentiment analysis...")
    results = sentiment_pipeline(texts, batch_size=32)

    text_to_sentiment = {text: result for text, result in zip(texts, results)}
    df['sentiment_label'] = df[text_column].map(lambda x: text_to_sentiment.get(x, {}).get('label'))
    df['sentiment_score'] = df[text_column].map(lambda x: text_to_sentiment.get(x, {}).get('score'))
    logger.info("Sentiment analysis complete.")
    return df

def generate_scraper_dashboard(df: pd.DataFrame):
    set_plot_style()
    
    total_articles, unique_media = len(df), df['media'].nunique()
    start_date, end_date = pd.to_datetime(df['published_date']).min(), pd.to_datetime(df['published_date']).max()
    date_range_str = f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}"

    agg_code, agg_name = get_dynamic_time_agg(start_date, end_date)
    timeline_df = df.set_index(pd.to_datetime(df['published_date'])).resample(agg_code).size().reset_index(name='count')
    timeline_plot = gr.LinePlot(timeline_df, x='published_date', y='count', title=f'{agg_name} News Volume', tooltip=['published_date', 'count'])
    
    media_counts = df['media'].dropna().value_counts().nlargest(15).sort_values()
    fig_media = None
    if not media_counts.empty:
        fig_media, ax = plt.subplots(figsize=(8, 6)); media_counts.plot(kind='barh', ax=ax, color='skyblue'); ax.set_title("Top 15 Media Sources", fontproperties=BANGLA_FONT)
        ax.set_yticklabels(media_counts.index, fontproperties=BANGLA_FONT); ax.set_xlabel("Article Count"); plt.tight_layout()

    text = " ".join(title for title in df['title'].astype(str))
    fig_wc = None
    try:
        wc = WordCloud(font_path=FONT_PATH, width=800, height=400, background_color='white', stopwords=BANGLA_STOP_WORDS, collocations=False).generate(text)
        fig_wc, ax = plt.subplots(figsize=(10, 5)); ax.imshow(wc, interpolation='bilinear'); ax.axis("off")
    except Exception as e: logger.error(f"WordCloud failed: {e}")
    
    return {
        kpi_total_articles: str(total_articles), kpi_unique_media: str(unique_media), kpi_date_range: date_range_str,
        dashboard_timeline_plot: timeline_plot, dashboard_media_plot: fig_media, dashboard_wordcloud_plot: fig_wc,
        scraper_dashboard_group: gr.update(visible=True)
    }

def generate_sentiment_dashboard(df: pd.DataFrame):
    updates = {sentiment_dashboard_tab: gr.update(visible=False)}
    set_plot_style()
    
    if 'sentiment_label' in df.columns:
        sentiment_counts = df['sentiment_label'].value_counts()
        fig_pie, fig_media_sent = None, None
        if not sentiment_counts.empty:
            fig_pie, ax = plt.subplots(figsize=(6, 6)); ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=['#66c2a5', '#fc8d62', '#8da0cb'])
            ax.set_title("Overall Sentiment Distribution", fontproperties=BANGLA_FONT); ax.axis('equal')
        
        top_media = df['media'].value_counts().nlargest(10).index
        media_sentiment = pd.crosstab(df[df['media'].isin(top_media)]['media'], df['sentiment_label'], normalize='index').mul(100)
        if not media_sentiment.empty:
            fig_media_sent, ax = plt.subplots(figsize=(10, 7)); media_sentiment.plot(kind='barh', stacked=True, ax=ax, colormap='viridis')
            ax.set_title("Sentiment by Top Media Sources", fontproperties=BANGLA_FONT); ax.set_yticklabels(media_sentiment.index, fontproperties=BANGLA_FONT); plt.tight_layout()
        
        updates.update({sentiment_pie_plot: fig_pie, sentiment_by_media_plot: fig_media_sent, sentiment_dashboard_tab: gr.update(visible=True)})
    return updates

def generate_youtube_dashboard(videos_df, comments_df):
    set_plot_style()
    kpis = {
        kpi_yt_videos_found: f"{len(videos_df):,}" if videos_df is not None else "0",
        kpi_yt_views_scanned: f"{videos_df['view_count'].sum():,}" if videos_df is not None else "0",
        kpi_yt_comments_scraped: f"{len(comments_df):,}" if comments_df is not None else "0"
    }
    
    channel_counts = videos_df['channel'].value_counts().nlargest(15).sort_values()
    fig_channels, ax = plt.subplots(figsize=(8, 6))
    if not channel_counts.empty:
        channel_counts.plot(kind='barh', ax=ax, color='coral'); ax.set_title("Top 15 Channels by Video Volume", fontproperties=BANGLA_FONT); ax.set_yticklabels(channel_counts.index, fontproperties=BANGLA_FONT); plt.tight_layout()

    fig_wc, fig_pie, fig_sentiment_video = None, None, None
    if comments_df is not None and not comments_df.empty:
        text = " ".join(comment for comment in comments_df['comment_text'].astype(str))
        try:
            wc = WordCloud(font_path=FONT_PATH, width=800, height=400, background_color='white', stopwords=BANGLA_STOP_WORDS, collocations=False).generate(text)
            fig_wc, ax = plt.subplots(figsize=(10, 5)); ax.imshow(wc, interpolation='bilinear'); ax.axis("off"); ax.set_title("Most Common Words in Comments", fontproperties=BANGLA_FONT)
        except Exception as e: logger.error(f"YouTube WordCloud failed: {e}")

        if 'sentiment_label' in comments_df.columns:
            sentiment_counts = comments_df['sentiment_label'].value_counts()
            if not sentiment_counts.empty:
                fig_pie, ax = plt.subplots(figsize=(6, 6)); ax.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90, colors=['#66c2a5', '#fc8d62', '#8da0cb']); ax.set_title("Overall Comment Sentiment", fontproperties=BANGLA_FONT)
            
            top_videos_by_comment = comments_df['video_title'].value_counts().nlargest(10).index
            video_sentiment = comments_df.groupby('video_title')['sentiment_label'].value_counts(normalize=True).unstack().mul(100).reindex(top_videos_by_comment).dropna(how='all')
            if not video_sentiment.empty:
                fig_sentiment_video, ax = plt.subplots(figsize=(10, 8)); video_sentiment.plot(kind='barh', stacked=True, ax=ax, colormap='viridis'); ax.set_title("Comment Sentiment by Top 10 Videos", fontproperties=BANGLA_FONT); ax.set_yticklabels(video_sentiment.index, fontproperties=BANGLA_FONT); plt.tight_layout()

    return {**kpis, yt_channel_plot: fig_channels, yt_wordcloud_plot: fig_wc, yt_sentiment_pie_plot: fig_pie, yt_sentiment_by_video_plot: fig_sentiment_video}

def generate_youtube_topic_dashboard(videos_df_full_scan: pd.DataFrame):
    if videos_df_full_scan is None or videos_df_full_scan.empty: return None, None, None
    set_plot_style()
    
    channel_views = videos_df_full_scan.groupby('channel')['view_count'].sum().nlargest(15).sort_values()
    fig_channel_views, ax = plt.subplots(figsize=(10, 7)); channel_views.plot(kind='barh', ax=ax, color='purple'); ax.set_title("Channel Dominance by Total Views (Top 15)", fontproperties=BANGLA_FONT); ax.set_xlabel("Combined Views on Topic"); ax.set_yticklabels(channel_views.index, fontproperties=BANGLA_FONT); plt.tight_layout()

    df_sample = videos_df_full_scan.sample(n=min(len(videos_df_full_scan), 200))
    avg_views, avg_engagement = df_sample['view_count'].median(), df_sample['engagement_rate'].median()
    fig_quadrant, ax = plt.subplots(figsize=(10, 8)); sns.scatterplot(data=df_sample, x='view_count', y='engagement_rate', size='like_count', sizes=(20, 400), hue='channel', alpha=0.7, ax=ax, legend=False)
    ax.set_xscale('log'); ax.set_yscale('log'); ax.set_title("Content Performance Quadrant", fontproperties=BANGLA_FONT); ax.set_xlabel("Video Views (Log Scale)", fontproperties=BANGLA_FONT); ax.set_ylabel("Engagement Rate (Log Scale)", fontproperties=BANGLA_FONT)
    ax.axhline(avg_engagement, ls='--', color='gray'); ax.axvline(avg_views, ls='--', color='gray'); ax.text(avg_views*1.1, ax.get_ylim()[1], 'High Performers', color='green', fontproperties=BANGLA_FONT); ax.text(ax.get_xlim()[0], avg_engagement*1.1, 'Niche Stars', color='blue', fontproperties=BANGLA_FONT)

    fig_age, ax = plt.subplots(figsize=(10, 7)); sns.scatterplot(data=df_sample, x='published_date', y='view_count', size='engagement_rate', sizes=(20, 400), alpha=0.6, ax=ax)
    ax.set_yscale('log'); ax.set_title("Content Age vs. Impact", fontproperties=BANGLA_FONT); ax.set_xlabel("Publication Date", fontproperties=BANGLA_FONT); ax.set_ylabel("Views (Log Scale)", fontproperties=BANGLA_FONT); plt.xticks(rotation=45)
    
    return fig_channel_views, fig_quadrant, fig_age

# ==============================================================================
# GRADIO UI DEFINITION
# ==============================================================================

with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="orange"), title=APP_TITLE) as app:
    gr.Markdown(f"# {APP_TITLE}\n*{APP_TAGLINE}*")

    # --- STATE MANAGEMENT ---
    scraper_results_state = gr.State()
    youtube_results_state = gr.State()

    with gr.Tabs() as tabs:
        with gr.TabItem("1. News Scraper", id=0):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 1. Search Criteria")
                    search_keywords_textbox = gr.Textbox(label="Search Keywords", placeholder="e.g., বিএনপি সমাবেশ")
                    sites_to_search_textbox = gr.Textbox(label="Target Sites (Optional, comma-separated)", placeholder="e.g., prothomalo.com")
                    start_date_textbox = gr.Textbox(label="Start Date", placeholder="YYYY-MM-DD or 'last week'")
                    end_date_textbox = gr.Textbox(label="End Date", placeholder="YYYY-MM-DD or 'today'")
                    gr.Markdown("### 2. Scraping Parameters")
                    interval_days_slider = gr.Slider(1, 7, 3, step=1, label="Days per Interval")
                    max_pages_slider = gr.Slider(1, 10, 5, step=1, label="Max Pages per Interval")
                    filter_keywords_textbox = gr.Textbox(label="Filter Keywords (comma-separated, optional)", placeholder="e.g., নির্বাচন, সরকার")
                    start_scraper_button = gr.Button("Start Scraping & Analysis", variant="primary")
                with gr.Column(scale=2):
                    scraper_results_df = gr.DataFrame(label="Filtered Results", interactive=False, wrap=True)
                    scraper_download_file = gr.File(label="Download Filtered Results CSV")

        with gr.TabItem("2. News Analytics", id=1):
             with gr.Group(visible=False) as scraper_dashboard_group:
                with gr.Tabs():
                    with gr.TabItem("Overview"):
                        with gr.Row():
                            kpi_total_articles = gr.Textbox(label="Total Articles Found", interactive=False)
                            kpi_unique_media = gr.Textbox(label="Unique Media Sources", interactive=False)
                            kpi_date_range = gr.Textbox(label="Date Range of Articles", interactive=False)
                        dashboard_timeline_plot = gr.LinePlot(label="News Volume Timeline")
                        with gr.Row():
                            dashboard_media_plot = gr.Plot(label="Top Media Sources by Article Count")
                            dashboard_wordcloud_plot = gr.Plot(label="Headline Word Cloud")
                    with gr.TabItem("Sentiment Analysis", visible=False) as sentiment_dashboard_tab:
                        with gr.Row():
                            sentiment_pie_plot = gr.Plot(label="Overall Sentiment")
                            sentiment_by_media_plot = gr.Plot(label="Sentiment by Media Source")

        with gr.TabItem("3. YouTube Topic Analysis", id=2):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### 1. YouTube API & Search")
                    yt_api_key = gr.Textbox(label="YouTube API Key", type="password", placeholder="Paste your API key")
                    yt_search_keywords = gr.Textbox(label="Search Keywords", placeholder="e.g., বিএনপি, তারেক রহমান")
                    yt_published_after = gr.Textbox(label="Published After Date (Optional)", placeholder="YYYY-MM-DD or '1 month ago'")
                    gr.Markdown("### 2. Analysis Parameters")
                    yt_max_videos_for_stats = gr.Slider(label="Videos to Scan for Topic Stats (Broad Scan)", minimum=50, maximum=750, value=300, step=50)
                    yt_num_videos_for_comments = gr.Slider(label="Top Videos for Comment Analysis (Deep Dive)", minimum=5, maximum=100, value=25, step=5)
                    yt_max_comments = gr.Slider(10, 100, 30, step=10, label="Max Comments per Video")
                    start_yt_analysis_button = gr.Button("Start YouTube Analysis", variant="primary")
                with gr.Column(scale=2):
                    with gr.Group(visible=False) as yt_dashboard_group:
                        gr.Markdown("### Topic Footprint KPIs (Based on Broad Scan)")
                        with gr.Row():
                            kpi_yt_total_topic_videos = gr.Textbox(label="Est. Total Videos on Topic (YT)", interactive=False)
                            kpi_yt_videos_found = gr.Textbox(label="Videos Scanned for Stats", interactive=False)
                            kpi_yt_views_scanned = gr.Textbox(label="Combined Views (of Scanned)", interactive=False)
                            kpi_yt_comments_scraped = gr.Textbox(label="Comments Analyzed (from Top Videos)", interactive=False)
                        with gr.Tabs():
                            with gr.TabItem("Deep Dive Analysis (on Top Videos)"):
                                yt_videos_df_output = gr.DataFrame(label="Top Videos Analyzed for Comments (sorted by views)")
                                with gr.Row():
                                    yt_channel_plot = gr.Plot(label="Channel Contribution by Video Count")
                                    yt_sentiment_pie_plot = gr.Plot(label="Overall Comment Sentiment")
                                with gr.Row():
                                    yt_wordcloud_plot = gr.Plot(label="Comment Word Cloud")
                                    yt_sentiment_by_video_plot = gr.Plot(label="Comment Sentiment by Video")
                            with gr.TabItem("Topic-Level Analytics (on All Scanned Videos)"):
                                yt_channel_views_plot = gr.Plot(label="Channel Dominance by Views")
                                yt_performance_quadrant_plot = gr.Plot(label="Content Performance Quadrant")
                                yt_content_age_plot = gr.Plot(label="Content Age vs. Impact")
    
    gr.Markdown(f"<div style='text-align: center; margin-top: 20px;'>{APP_FOOTER}</div>")

    # ==============================================================================
    # EVENT HANDLERS
    # ==============================================================================
    
    # --- NEWS SCRAPER WORKFLOW ---
    def news_scraper_workflow(search_keywords, sites, start_date, end_date, interval, max_pages, filter_keys, progress=gr.Progress()):
        progress(0, desc="Starting news analysis...")
        raw_df, display_df = run_news_scraper_pipeline(search_keywords, sites, start_date, end_date, interval, max_pages, filter_keys, progress)
        
        if raw_df.empty:
            gr.Info("No news articles found for your query."); return None, None, None
            
        progress(0.8, desc="Analyzing sentiment of news headlines...")
        analyzed_df = run_sentiment_analysis(raw_df.copy(), 'title', progress)
        
        output_path = "filtered_news_data.csv"; display_df.to_csv(output_path, index=False)
        return display_df, output_path, analyzed_df

    start_scraper_button.click(
        fn=news_scraper_workflow,
        inputs=[search_keywords_textbox, sites_to_search_textbox, start_date_textbox, end_date_textbox, interval_days_slider, max_pages_slider, filter_keywords_textbox],
        outputs=[scraper_results_df, scraper_download_file, scraper_results_state]
    )

    def update_news_dashboards(analyzed_df):
        if analyzed_df is None or analyzed_df.empty:
            return {scraper_dashboard_group: gr.update(visible=False), sentiment_dashboard_tab: gr.update(visible=False)}
        
        scraper_updates = generate_scraper_dashboard(analyzed_df)
        sentiment_updates = generate_sentiment_dashboard(analyzed_df)
        return {**scraper_updates, **sentiment_updates}

    news_ui_components = [
        scraper_dashboard_group, kpi_total_articles, kpi_unique_media, kpi_date_range,
        dashboard_timeline_plot, dashboard_media_plot, dashboard_wordcloud_plot,
        sentiment_dashboard_tab, sentiment_pie_plot, sentiment_by_media_plot
    ]
    scraper_results_state.change(fn=update_news_dashboards, inputs=scraper_results_state, outputs=news_ui_components)

    # --- YOUTUBE WORKFLOW ---
    def youtube_workflow(api_key, query, max_stats, num_comments, max_comments, published_after, progress=gr.Progress()):
        sanitized_api_key = api_key.strip()
        sanitized_query = query.strip()
        videos_df_full, comments_df, total_vids_est = run_youtube_analysis_pipeline(
            sanitized_api_key, sanitized_query, max_stats, num_comments, max_comments, published_after, progress
        )
        if videos_df_full.empty:
            gr.Info("No videos found for your YouTube query."); return None, None

        if comments_df is not None and not comments_df.empty:
            progress(0.9, desc="Analyzing comment sentiment...")
            comments_df = run_sentiment_analysis(comments_df.copy(), 'comment_text', progress)
            
        top_videos_for_display = videos_df_full.head(int(num_comments))
        return top_videos_for_display, {"full_scan": videos_df_full, "comments": comments_df, "total_estimate": total_vids_est}

    start_yt_analysis_button.click(
        fn=youtube_workflow,
        inputs=[yt_api_key, yt_search_keywords, yt_max_videos_for_stats, yt_num_videos_for_comments, yt_max_comments, yt_published_after],
        outputs=[yt_videos_df_output, youtube_results_state]
    )

    def update_youtube_dashboards(results_data):
        if not results_data or results_data.get("full_scan") is None or results_data["full_scan"].empty:
            return {
                yt_dashboard_group: gr.update(visible=False), kpi_yt_total_topic_videos: "0",
                kpi_yt_videos_found: "0", kpi_yt_views_scanned: "0", kpi_yt_comments_scraped: "0",
                yt_channel_plot: None, yt_wordcloud_plot: None, yt_sentiment_pie_plot: None,
                yt_sentiment_by_video_plot: None, yt_channel_views_plot: None,
                yt_performance_quadrant_plot: None, yt_content_age_plot: None
            }
        
        videos_df_full, comments_df, total_estimate = results_data.get("full_scan"), results_data.get("comments"), results_data.get("total_estimate", 0)
        deep_dive_updates = generate_youtube_dashboard(videos_df_full, comments_df)
        fig_ch_views, fig_quad, fig_age = generate_youtube_topic_dashboard(videos_df_full)
        
        return {
            yt_dashboard_group: gr.update(visible=True),
            kpi_yt_total_topic_videos: f"{total_estimate:,}",
            **deep_dive_updates,
            yt_channel_views_plot: fig_ch_views,
            yt_performance_quadrant_plot: fig_quad,
            yt_content_age_plot: fig_age,
        }
    
    yt_ui_components = [
        yt_dashboard_group, kpi_yt_total_topic_videos, kpi_yt_videos_found, kpi_yt_views_scanned, kpi_yt_comments_scraped,
        yt_channel_plot, yt_wordcloud_plot, yt_sentiment_pie_plot, yt_sentiment_by_video_plot,
        yt_channel_views_plot, yt_performance_quadrant_plot, yt_content_age_plot
    ]
    youtube_results_state.change(fn=update_youtube_dashboards, inputs=youtube_results_state, outputs=yt_ui_components)
    
# ==============================================================================
# LAUNCH THE APP
# ==============================================================================

# --- LAUNCH THE APP ---
if __name__ == "__main__":
    app.launch(debug=True, share=True)