import requests
import pandas as pd
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import json
# Commenting out blockchain-related imports that cause loading issues
# from web3 import Web3
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import random
import logging
from typing import List, Dict, Any, Optional
# Comment out the import for now and replace with dummy functions
# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
# APR visualization functions integrated directly
from fetch_and_preprocess_data import generate_continuous_random_data
from initial_value_fixer import fix_apr_and_roi
from load_from_csv import (
    load_apr_data_from_csv, 
    load_roi_data_from_csv, 
    load_statistics_from_csv,
    check_csv_data_availability,
    get_data_freshness_info
)

# Set up logging with appropriate verbosity
logging.basicConfig(
    level=logging.INFO,  # Use INFO level instead of DEBUG to reduce verbosity
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("app_debug.log", mode='a'),  # Append mode for persistence
        logging.StreamHandler()  # Also log to console
    ],
    force=True  # Force reconfiguration of logging
)
logger = logging.getLogger(__name__)

# Ensure the logger level is set correctly
logger.setLevel(logging.INFO)

# Test logging to verify it's working
logger.info("=== LOGGING SYSTEM INITIALIZED ===")
logger.info("Debug logs will be written to app_debug.log")

# Reduce third-party library logging
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)

# Log the startup information
logger.info("============= APPLICATION STARTING =============")
logger.info(f"Running from directory: {os.getcwd()}")

# Global variables to store the data for reuse
global_df = None
global_roi_df = None
global_dummy_apr_df = None  # Store dummy APR data separately
global_dummy_roi_df = None  # Store dummy ROI data separately

# Configuration
API_BASE_URL = "https://afmdb.autonolas.tech"
logger.info(f"Using API endpoint: {API_BASE_URL}")

def get_agent_type_by_name(type_name: str) -> Dict[str, Any]:
    """Get agent type by name"""
    url = f"{API_BASE_URL}/api/agent-types/name/{type_name}"
    logger.debug(f"Calling API: {url}")
    
    try:
        response = requests.get(url)
        logger.debug(f"Response status: {response.status_code}")
        
        if response.status_code == 404:
            logger.error(f"Agent type '{type_name}' not found")
            return None
            
        response.raise_for_status()
        result = response.json()
        logger.debug(f"Agent type response: {result}")
        return result
    except Exception as e:
        logger.error(f"Error in get_agent_type_by_name: {e}")
        return None

def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]:
    """Get attribute definition by name"""
    url = f"{API_BASE_URL}/api/attributes/name/{attr_name}"
    logger.debug(f"Calling API: {url}")
    
    try:
        response = requests.get(url)
        logger.debug(f"Response status: {response.status_code}")
        
        if response.status_code == 404:
            logger.error(f"Attribute definition '{attr_name}' not found")
            return None
            
        response.raise_for_status()
        result = response.json()
        logger.debug(f"Attribute definition response: {result}")
        return result
    except Exception as e:
        logger.error(f"Error in get_attribute_definition_by_name: {e}")
        return None

def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]:
    """Get all agents of a specific type"""
    url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/"
    logger.debug(f"Calling API: {url}")
    
    try:
        response = requests.get(url)
        logger.debug(f"Response status: {response.status_code}")
        
        if response.status_code == 404:
            logger.error(f"No agents found for type ID {type_id}")
            return []
            
        response.raise_for_status()
        result = response.json()
        logger.debug(f"Agents count: {len(result)}")
        logger.debug(f"First few agents: {result[:2] if result else []}")
        return result
    except Exception as e:
        logger.error(f"Error in get_agents_by_type: {e}")
        return []

def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]:
    """Get all attribute values for a specific attribute definition across all agents of a given list"""
    all_attributes = []
    logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}")
    
    # For each agent, get their attributes and filter for the one we want
    for agent in agents:
        agent_id = agent["agent_id"]
        
        # Call the /api/agents/{agent_id}/attributes/ endpoint
        url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/"
        logger.debug(f"Calling API for agent {agent_id}: {url}")
        
        try:
            response = requests.get(url, params={"limit": 1000})
            
            if response.status_code == 404:
                logger.error(f"No attributes found for agent ID {agent_id}")
                continue
            
            response.raise_for_status()
            agent_attrs = response.json()
            logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes")
            
            # Filter for the specific attribute definition ID
            filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id]
            logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes")
            
            if filtered_attrs:
                logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}")
            
            all_attributes.extend(filtered_attrs)
        except requests.exceptions.RequestException as e:
            logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}")
    
    logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}")
    return all_attributes

def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str:
    """Get agent name from agent ID"""
    for agent in agents:
        if agent["agent_id"] == agent_id:
            return agent["agent_name"]
    return "Unknown"

def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]:
    """Extract APR value, adjusted APR value, ROI value, and timestamp from JSON value"""
    try:
        agent_id = attr.get("agent_id", "unknown")
        logger.debug(f"Extracting APR value for agent {agent_id}")
        
        # The APR value is stored in the json_value field
        if attr["json_value"] is None:
            logger.debug(f"Agent {agent_id}: json_value is None")
            return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False}
        
        # If json_value is a string, parse it
        if isinstance(attr["json_value"], str):
            logger.debug(f"Agent {agent_id}: json_value is string, parsing")
            json_data = json.loads(attr["json_value"])
        else:
            json_data = attr["json_value"]
        
        apr = json_data.get("apr")
        adjusted_apr = json_data.get("adjusted_apr")  # Extract adjusted_apr if present
        timestamp = json_data.get("timestamp")
        address = json_data.get("portfolio_snapshot", {}).get("portfolio", {}).get("address")
        
        # Extract ROI (f_i_ratio) from calculation_metrics if it exists
        roi = None
        if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None:
            roi = json_data["calculation_metrics"].get("f_i_ratio")
        
        # Filter ROI values to -10 to 10 range
        if roi is not None and (roi < -10 or roi > 10):
            roi = None  # Exclude ROI values outside the range
        
        logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, timestamp: {timestamp}")
        
        # Convert timestamp to datetime if it exists
        timestamp_dt = None
        if timestamp:
            timestamp_dt = datetime.fromtimestamp(timestamp)
            
        result = json_data.copy()  # Copy the original JSON data for logging
        result.update({
            "apr": apr, 
            "adjusted_apr": adjusted_apr, 
            "roi": roi,
            "timestamp": timestamp_dt, 
            "agent_id": agent_id, 
            "is_dummy": False,
            "address": address
        })
        logger.debug(f"Agent {agent_id}: Extracted result: {result}")
        return result
    except (json.JSONDecodeError, KeyError, TypeError) as e:
        logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}")
        logger.error(f"Problematic json_value: {attr.get('json_value')}")
        return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False, "address": None}

def fetch_apr_data_from_db():
    """
    Fetch APR data from database using the API.
    """
    global global_df
    global global_roi_df
    
    logger.info("==== Starting APR data fetch ====")
    
    try:
        # Step 1: Find the Optimus agent type
        logger.info("Finding Optimus agent type")
        optimus_type = get_agent_type_by_name("Optimus")
        if not optimus_type:
            logger.error("Optimus agent type not found, using placeholder data")
            global_df = pd.DataFrame([])
            return global_df
        
        type_id = optimus_type["type_id"]
        logger.info(f"Found Optimus agent type with ID: {type_id}")
        
        # Step 2: Find the APR attribute definition
        logger.info("Finding APR attribute definition")
        apr_attr_def = get_attribute_definition_by_name("APR")
        if not apr_attr_def:
            logger.error("APR attribute definition not found, using placeholder data")
            global_df = pd.DataFrame([])
            return global_df
            
        attr_def_id = apr_attr_def["attr_def_id"]
        logger.info(f"Found APR attribute definition with ID: {attr_def_id}")
        
        # Step 3: Get all agents of type Optimus
        logger.info(f"Getting all agents of type Optimus (type_id: {type_id})")
        optimus_agents = get_agents_by_type(type_id)
        if not optimus_agents:
            logger.error("No agents of type 'Optimus' found")
            global_df = pd.DataFrame([])
            return global_df
        
        logger.info(f"Found {len(optimus_agents)} Optimus agents")
        logger.debug(f"Optimus agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in optimus_agents]}")
        
        # Step 4: Fetch all APR values for Optimus agents
        logger.info(f"Fetching APR values for all Optimus agents (attr_def_id: {attr_def_id})")
        apr_attributes = get_attribute_values_by_type_and_attr(optimus_agents, attr_def_id)
        if not apr_attributes:
            logger.error("No APR values found for 'Optimus' agents")
            global_df = pd.DataFrame([])
            return global_df
        
        logger.info(f"Found {len(apr_attributes)} APR attributes total")
        
        # Step 5: Extract APR and ROI data
        logger.info("Extracting APR and ROI data from attributes")
        apr_data_list = []
        roi_data_list = []
        
        for attr in apr_attributes:
            data = extract_apr_value(attr)
            if data["timestamp"] is not None:
                # Get agent name
                agent_name = get_agent_name(attr["agent_id"], optimus_agents)
                # Add agent name to the data
                data["agent_name"] = agent_name
                # Add is_dummy flag (all real data)
                data["is_dummy"] = False
                
                # Process APR data
                if data["apr"] is not None:
                    # Include all APR values (including negative ones) EXCEPT zero and -100
                    if data["apr"] != 0 and data["apr"] != -100:
                        apr_entry = data.copy()
                        apr_entry["metric_type"] = "APR"
                        logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}")
                        # Add to the APR data list
                        apr_data_list.append(apr_entry)
                    else:
                        # Log that we're skipping zero or -100 values
                        logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)")
                
                # Process ROI data
                if data["roi"] is not None:
                    # Include all ROI values
                    roi_entry = {
                        "roi": data["roi"],
                        "timestamp": data["timestamp"],
                        "agent_id": data["agent_id"],
                        "agent_name": agent_name,
                        "is_dummy": False,
                        "metric_type": "ROI"
                    }
                    logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}")
                    # Add to the ROI data list
                    roi_data_list.append(roi_entry)
        
        logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points")
        
        # Added debug for adjusted APR data after May 10th
        may_10_2025 = datetime(2025, 5, 10)
        after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
        with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
        
        logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
        logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
        
        # Log detailed information about when data began
        first_adjusted = None
        if with_adjusted_after_may_10:
            first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
            logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
        
        # Check all data for first adjusted_apr
        all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
        if all_with_adjusted:
            first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
            logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
            last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
            logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
            
            # Calculate overall coverage
            adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
            logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
            
            # Log per-agent adjusted APR statistics
            agent_stats = {}
            for record in apr_data_list:
                agent_id = record['agent_id']
                has_adjusted = record['adjusted_apr'] is not None
                
                if agent_id not in agent_stats:
                    agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
                
                agent_stats[agent_id]['total'] += 1
                if has_adjusted:
                    agent_stats[agent_id]['adjusted'] += 1
            
            # Log stats for agents with meaningful data
            for agent_id, stats in agent_stats.items():
                if stats['total'] > 0:
                    coverage = (stats['adjusted'] / stats['total']) * 100
                    if coverage > 0:  # Only log agents that have at least some adjusted data
                        logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
            
            # Check for gaps in adjusted APR data
            for agent_id in agent_stats:
                # Get all records for this agent
                agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
                # Sort by timestamp
                agent_records.sort(key=lambda x: x['timestamp'])
                
                # Find where adjusted APR starts and if there are gaps
                has_adjusted = False
                gap_count = 0
                streak_length = 0
                for record in agent_records:
                    if record['adjusted_apr'] is not None:
                        if not has_adjusted:
                            has_adjusted = True
                            logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
                        streak_length += 1
                    elif has_adjusted:
                        # We had adjusted data but now it's missing
                        gap_count += 1
                        if streak_length > 0:
                            logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
                            streak_length = 0
                
                if gap_count > 0:
                    logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
                elif has_adjusted:
                    logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
            
            # Provide summary statistics
            agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
            agents_with_gaps = sum(1 for agent_id in agent_stats if 
                any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
                   i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and 
                   apr_data_list[i+1]['adjusted_apr'] is None
                   for i in range(len(apr_data_list)-1)))
            
            logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
            if agents_with_gaps > 0:
                logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
                logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
            else:
                logger.info("No gaps detected in adjusted APR data - graph should be continuous")
        
        if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
            logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
            
            # Log agent IDs with missing adjusted_apr after May 10th
            agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
            logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
            
            # Check these same agents before May 10th
            before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
            agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
            
            # Agents that had adjusted_apr before but not after
            missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
            if missing_adjusted:
                logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
                
                # Find the last valid adjusted_apr date for these agents
                for agent_id in missing_adjusted:
                    agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
                    if agent_data:
                        last_entry = max(agent_data, key=lambda d: d['timestamp'])
                        logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
                        
                        # Look at the first entry after the cutoff without adjusted_apr
                        agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
                        if agent_after:
                            first_after = min(agent_after, key=lambda d: d['timestamp'])
                            logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
                            
                            # If the agent data has the 'adjusted_apr_key' field, log that info
                            if 'adjusted_apr_key' in first_after:
                                logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
        
        # Add debug logic to check for any adjusted_apr after May 10th and which agents have it
        elif len(with_adjusted_after_may_10) > 0:
            logger.info("Found adjusted_apr values after May 10th, 2025")
            
            # Group by agent and log
            agent_counts = {}
            for item in with_adjusted_after_may_10:
                agent_id = item['agent_id']
                if agent_id in agent_counts:
                    agent_counts[agent_id] += 1
                else:
                    agent_counts[agent_id] = 1
            
            logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
            
            # Log adjusted_apr keys used
            keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
            if keys_used:
                logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
        
        # Convert to DataFrames
        if not apr_data_list:
            logger.error("No valid APR data extracted")
            global_df = pd.DataFrame([])
        else:
            # Convert list of dictionaries to DataFrame for APR
            global_df = pd.DataFrame(apr_data_list)
            
        if not roi_data_list:
            logger.error("No valid ROI data extracted")
            global_roi_df = pd.DataFrame([])
        else:
            # Convert list of dictionaries to DataFrame for ROI
            global_roi_df = pd.DataFrame(roi_data_list)
        
        # Handle dummy data generation
        global global_dummy_apr_df
        global global_dummy_roi_df
        
        logger.info("Handling dummy data...")
        
        # Generate dummy APR data only if needed
        if not global_df.empty:
            # Check if we already have dummy data
            if global_dummy_apr_df is None:
                # First time - generate all dummy data
                logger.info("Generating initial dummy APR data...")
                global_dummy_apr_df = generate_continuous_random_data(global_df)
                
                # Only keep APR data
                if not global_dummy_apr_df.empty:
                    global_dummy_apr_df = global_dummy_apr_df[global_dummy_apr_df['metric_type'] == 'APR']
                    logger.info(f"Generated {len(global_dummy_apr_df)} initial dummy APR data points")
            else:
                # We already have dummy data - check if we need to generate more
                # Find the latest timestamp in the real data
                latest_real_timestamp = global_df['timestamp'].max()
                
                # Find the latest timestamp in the dummy data
                latest_dummy_timestamp = global_dummy_apr_df['timestamp'].max() if not global_dummy_apr_df.empty else None
                
                # If the real data has newer timestamps, generate more dummy data
                if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
                    logger.info("Generating additional dummy APR data for new timestamps...")
                    
                    # Create a temporary dataframe with only the latest real data
                    temp_df = global_df[global_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_df
                    
                    # Generate dummy data for the new timestamps
                    new_dummy_data = generate_continuous_random_data(temp_df)
                    
                    # Only keep APR data
                    if not new_dummy_data.empty:
                        new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'APR']
                        logger.info(f"Generated {len(new_dummy_data)} additional dummy APR data points")
                        
                        # Append the new dummy data to the existing dummy data
                        global_dummy_apr_df = pd.concat([global_dummy_apr_df, new_dummy_data], ignore_index=True)
                else:
                    logger.info("No new timestamps in real data, using existing dummy APR data")
            
            # Combine real and dummy APR data
            if not global_dummy_apr_df.empty:
                apr_dummy_count = len(global_dummy_apr_df)
                global_df = pd.concat([global_df, global_dummy_apr_df], ignore_index=True)
                logger.info(f"Added {apr_dummy_count} dummy APR data points to the dataset")
        
        # Generate dummy ROI data only if needed
        if not global_roi_df.empty:
            # Check if we already have dummy data
            if global_dummy_roi_df is None:
                # First time - generate all dummy data
                logger.info("Generating initial dummy ROI data...")
                global_dummy_roi_df = generate_continuous_random_data(global_roi_df)
                
                # Only keep ROI data
                if not global_dummy_roi_df.empty:
                    global_dummy_roi_df = global_dummy_roi_df[global_dummy_roi_df['metric_type'] == 'ROI']
                    logger.info(f"Generated {len(global_dummy_roi_df)} initial dummy ROI data points")
            else:
                # We already have dummy data - check if we need to generate more
                # Find the latest timestamp in the real data
                latest_real_timestamp = global_roi_df['timestamp'].max()
                
                # Find the latest timestamp in the dummy data
                latest_dummy_timestamp = global_dummy_roi_df['timestamp'].max() if not global_dummy_roi_df.empty else None
                
                # If the real data has newer timestamps, generate more dummy data
                if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
                    logger.info("Generating additional dummy ROI data for new timestamps...")
                    
                    # Create a temporary dataframe with only the latest real data
                    temp_df = global_roi_df[global_roi_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_roi_df
                    
                    # Generate dummy data for the new timestamps
                    new_dummy_data = generate_continuous_random_data(temp_df)
                    
                    # Only keep ROI data
                    if not new_dummy_data.empty:
                        new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'ROI']
                        logger.info(f"Generated {len(new_dummy_data)} additional dummy ROI data points")
                        
                        # Append the new dummy data to the existing dummy data
                        global_dummy_roi_df = pd.concat([global_dummy_roi_df, new_dummy_data], ignore_index=True)
                else:
                    logger.info("No new timestamps in real data, using existing dummy ROI data")
            
            # Combine real and dummy ROI data
            if not global_dummy_roi_df.empty:
                roi_dummy_count = len(global_dummy_roi_df)
                global_roi_df = pd.concat([global_roi_df, global_dummy_roi_df], ignore_index=True)
                logger.info(f"Added {roi_dummy_count} dummy ROI data points to the dataset")
        
        # Log the resulting dataframe
        logger.info(f"Created DataFrame with {len(global_df)} rows (including dummy data)")
        logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
        logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
        
        # Log adjusted APR statistics if available
        if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any():
            logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}")
            logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}")
            
            # Log the difference between APR and adjusted APR
            valid_rows = global_df[global_df['adjusted_apr'].notna()]
            if not valid_rows.empty:
                avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
                max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
                min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
                logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
            
        # All values are APR type (excluding zero and -100 values)
        logger.info("All values are APR type (excluding zero and -100 values)")
        logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}")
        
        # Log the entire dataframe for debugging
        logger.debug("Final DataFrame contents:")
        for idx, row in global_df.iterrows():
            logger.debug(f"Row {idx}: {row.to_dict()}")
        
        # Add this at the end, right before returning
        logger.info("Analyzing adjusted_apr data availability...")
        log_adjusted_apr_availability(global_df)
        
        return global_df, global_roi_df
        
    except requests.exceptions.RequestException as e:
        logger.error(f"API request error: {e}")
        global_df = pd.DataFrame([])
        global_roi_df = pd.DataFrame([])
        return global_df, global_roi_df
    except Exception as e:
        logger.error(f"Error fetching APR data: {e}")
        logger.exception("Exception traceback:")
        global_df = pd.DataFrame([])
        global_roi_df = pd.DataFrame([])
        return global_df, global_roi_df

def log_adjusted_apr_availability(df):
    """
    Analyzes and logs detailed information about adjusted_apr data availability.
    
    Args:
        df: DataFrame containing the APR data with adjusted_apr column
    """
    if df.empty or 'adjusted_apr' not in df.columns:
        logger.warning("No adjusted_apr data available for analysis")
        return
    
    # Get only rows with valid adjusted_apr values
    has_adjusted = df[df['adjusted_apr'].notna()]
    
    if has_adjusted.empty:
        logger.warning("No valid adjusted_apr values found in the dataset")
        return
    
    # 1. When did adjusted_apr data start?
    first_adjusted = has_adjusted['timestamp'].min()
    last_adjusted = has_adjusted['timestamp'].max()
    logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
    logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
    logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
    
    # Calculate coverage percentage
    total_records = len(df)
    records_with_adjusted = len(has_adjusted)
    coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
    logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
    
    # 2. How many agents are providing adjusted_apr?
    agents_with_adjusted = has_adjusted['agent_id'].unique()
    logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
    logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
    
    # 3. May 10th cutoff analysis
    may_10_2025 = datetime(2025, 5, 10)
    before_cutoff = df[df['timestamp'] < may_10_2025]
    after_cutoff = df[df['timestamp'] >= may_10_2025]
    
    if not before_cutoff.empty and not after_cutoff.empty:
        before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
        before_pct = (before_with_adjusted / len(before_cutoff)) * 100
        
        after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
        after_pct = (after_with_adjusted / len(after_cutoff)) * 100
        
        logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
        logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
        
        # Check which agents had data before and after
        agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
        agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
        
        missing_after = agents_before - agents_after
        if missing_after:
            logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
        
        new_after = agents_after - agents_before
        if new_after:
            logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
    
    # 4. Find date ranges for missing adjusted_apr
    # Group by agent to analyze per-agent data availability
    logger.info("=== DETAILED AGENT ANALYSIS ===")
    for agent_id in df['agent_id'].unique():
        agent_data = df[df['agent_id'] == agent_id]
        agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
        
        # Get the valid adjusted_apr values for this agent
        agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
        
        if agent_adjusted.empty:
            logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
            continue
        
        # Get the date range for this agent's data
        agent_start = agent_data['timestamp'].min()
        agent_end = agent_data['timestamp'].max()
        
        # Get the date range for adjusted_apr data
        adjusted_start = agent_adjusted['timestamp'].min()
        adjusted_end = agent_adjusted['timestamp'].max()
        
        total_agent_records = len(agent_data)
        agent_with_adjusted = len(agent_adjusted)
        coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
        
        logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
        logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
        logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
        
        # Calculate if this agent had data before/after May 10th
        if not before_cutoff.empty and not after_cutoff.empty:
            agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
            agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
            
            has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
            has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
            
            if has_before and not has_after:
                last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
                logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
            elif not has_before and has_after:
                first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
                logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
        
        # Check for gaps in adjusted_apr (periods of 24+ hours without data)
        if len(agent_adjusted) < 2:
            continue
            
        # Sort by timestamp
        sorted_data = agent_adjusted.sort_values('timestamp')
        
        # Calculate time differences between consecutive data points
        time_diffs = sorted_data['timestamp'].diff()
        
        # Find gaps larger than 24 hours
        gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
        
        if not gaps.empty:
            logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
            
            # Log the gaps
            for i, row in gaps.iterrows():
                # Find the previous timestamp before the gap
                prev_idx = sorted_data.index.get_loc(i) - 1
                prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
                
                if prev_time:
                    gap_start = prev_time
                    gap_end = row['timestamp']
                    gap_duration = gap_end - gap_start
                    logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")

def generate_apr_visualizations():
    """Generate APR visualizations using CSV data only for consistency with ROI graph"""
    global global_df
    
    # CONSISTENCY FIX: Always use CSV data to match ROI graph behavior
    logger.info("Loading APR data from CSV files for consistency with ROI graph...")
    df, csv_file = load_apr_data_from_csv()
    
    if not df.empty:
        logger.info(f"Successfully loaded APR data from CSV: {len(df)} records")
        global_df = df
        
        # Create visualizations using CSV data
        logger.info("Creating APR visualizations from CSV data...")
        combined_fig = create_combined_time_series_graph(df)
        return combined_fig, csv_file
    
    # FALLBACK: If CSV not available, return error message
    logger.error("CSV data not available and API fallback disabled for consistency")
    # Create empty visualization with a message using Plotly
    fig = go.Figure()
    fig.add_annotation(
        x=0.5, y=0.5,
        text="No APR data available - CSV file missing",
        font=dict(size=20),
        showarrow=False
    )
    fig.update_layout(
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    )
    
    return fig, None

def generate_roi_visualizations():
    """Generate ROI visualizations directly from optimus_apr_values.csv"""
    global global_roi_df
    
    # SIMPLIFIED APPROACH: Load ROI data directly from APR CSV
    logger.info("Loading ROI data directly from optimus_apr_values.csv...")
    df_apr, csv_file = load_apr_data_from_csv()
    
    if not df_apr.empty and 'roi' in df_apr.columns:
        # CONSISTENCY FIX: Apply same filtering as APR graph
        logger.info("=== ROI GRAPH DATA FILTERING DEBUG ===")
        logger.info(f"Initial APR data loaded: {len(df_apr)} records")
        logger.info(f"Unique agents in initial data: {df_apr['agent_id'].nunique()}")
        logger.info(f"Agent IDs in initial data: {sorted(df_apr['agent_id'].unique().tolist())}")
        
        # Check metric_type distribution
        if 'metric_type' in df_apr.columns:
            metric_counts = df_apr['metric_type'].value_counts()
            logger.info(f"Metric type distribution: {metric_counts.to_dict()}")
        else:
            logger.warning("No 'metric_type' column found in APR data")
        
        # First filter by metric_type == 'APR' to match APR graph logic
        df_apr_filtered = df_apr[df_apr['metric_type'] == 'APR'].copy()
        logger.info(f"After metric_type == 'APR' filter: {len(df_apr_filtered)} records")
        logger.info(f"Unique agents after APR filter: {df_apr_filtered['agent_id'].nunique()}")
        logger.info(f"Agent IDs after APR filter: {sorted(df_apr_filtered['agent_id'].unique().tolist())}")
        
        # Then filter for rows with valid ROI values
        df_roi = df_apr_filtered[df_apr_filtered['roi'].notna()].copy()
        logger.info(f"After ROI filter: {len(df_roi)} records")
        logger.info(f"Unique agents after ROI filter: {df_roi['agent_id'].nunique()}")
        logger.info(f"Agent IDs after ROI filter: {sorted(df_roi['agent_id'].unique().tolist())}")
        
        if not df_roi.empty:
            # Add metric_type column for consistency
            df_roi['metric_type'] = 'ROI'
            
            logger.info(f"Successfully loaded {len(df_roi)} ROI records from APR CSV")
            global_roi_df = df_roi
            
            # Create visualizations using ROI data from APR CSV
            logger.info("Creating ROI visualizations from APR CSV data...")
            combined_fig = create_combined_roi_time_series_graph(df_roi)
            return combined_fig, csv_file
        else:
            logger.warning("No valid ROI data found in APR CSV")
    else:
        logger.warning("APR CSV not available or missing ROI column")
    
    # FALLBACK: If CSV not available, try API
    logger.info("CSV data not available, falling back to API...")
    try:
        # Fetch data from database if not already fetched
        if global_roi_df is None or global_roi_df.empty:
            _, df_roi = fetch_apr_data_from_db()
        else:
            df_roi = global_roi_df
        
        # If we got no data at all, return placeholder figures
        if df_roi.empty:
            logger.info("No ROI data available from API either. Using fallback visualization.")
            # Create empty visualizations with a message using Plotly
            fig = go.Figure()
            fig.add_annotation(
                x=0.5, y=0.5,
                text="No ROI data available",
                font=dict(size=20),
                showarrow=False
            )
            fig.update_layout(
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
            )
            
            return fig, None
        
        # Set global_roi_df for access by other functions
        global_roi_df = df_roi
        
        # Create visualizations using API data
        logger.info("Creating ROI visualizations from API data...")
        combined_fig = create_combined_roi_time_series_graph(df_roi)
        
        return combined_fig, None
        
    except Exception as e:
        logger.error(f"Error fetching ROI data from API: {e}")
        # Return error visualization
        fig = go.Figure()
        fig.add_annotation(
            x=0.5, y=0.5,
            text=f"Error loading data: {str(e)}",
            font=dict(size=16, color="red"),
            showarrow=False
        )
        fig.update_layout(
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
        )
        return fig, None

def aggregate_daily_data(df, metric_column):
    """
    Aggregate data by date and agent, taking the median of values within each day.
    
    Args:
        df: DataFrame with timestamp, agent_id, and metric data
        metric_column: Name of the metric column ('apr' or 'roi')
    
    Returns:
        DataFrame with daily aggregated data per agent
    """
    if df.empty:
        return df
    
    # Convert timestamp to date only (ignore time)
    df = df.copy()
    df['date'] = df['timestamp'].dt.date
    
    # DEBUG: Log July 8th data specifically
    july_8_data = df[df['date'] == pd.to_datetime('2025-07-08').date()]
    if not july_8_data.empty:
        july_8_agents = july_8_data['agent_id'].unique()
        logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agents before aggregation: {len(july_8_agents)}")
        logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agent IDs: {sorted(july_8_agents.tolist())}")
    
    # NEW: Add detailed logging to verify median calculation
    logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===")
    
    # Find days with multiple data points per agent to show the difference
    sample_groups = df.groupby(['date', 'agent_id']).size()
    multi_point_days = sample_groups[sample_groups > 1].head(10)  # Show up to 10 examples
    
    logger.info(f"Found {len(multi_point_days)} agent-days with multiple data points (showing up to 10):")
    
    mean_median_differences = []
    
    for (date, agent_id), count in multi_point_days.items():
        day_data = df[(df['date'] == date) & (df['agent_id'] == agent_id)]
        values = day_data[metric_column].tolist()
        calculated_mean = day_data[metric_column].mean()
        calculated_median = day_data[metric_column].median()
        agent_name = day_data['agent_name'].iloc[0] if not day_data.empty else f"Agent {agent_id}"
        
        difference = abs(calculated_mean - calculated_median)
        mean_median_differences.append(difference)
        
        logger.info(f"  {agent_name} on {date}: {count} values = {values}")
        logger.info(f"    MEAN: {calculated_mean:.4f}, MEDIAN: {calculated_median:.4f}, DIFF: {difference:.4f}")
    
    # Summary statistics
    if mean_median_differences:
        avg_difference = sum(mean_median_differences) / len(mean_median_differences)
        max_difference = max(mean_median_differences)
        logger.info(f"Mean vs Median differences - Avg: {avg_difference:.4f}, Max: {max_difference:.4f}")
    else:
        logger.info("No days found with multiple data points per agent")
    
    # Show total distribution of data points per day
    single_point_days = len(sample_groups[sample_groups == 1])
    multi_point_days_count = len(sample_groups[sample_groups > 1])
    logger.info(f"Data distribution: {single_point_days} agent-days with 1 point, {multi_point_days_count} agent-days with multiple points")
    
    # Group by date and agent, calculate median for each day
    daily_agent_data = df.groupby(['date', 'agent_id']).agg({
        metric_column: 'mean',
        'agent_name': 'first',
        'is_dummy': 'first',
        'metric_type': 'first'
    }).reset_index()
    
    # Convert date back to datetime for plotting
    daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
    
    # Log a few sample median values from the result
    logger.info(f"Sample calculated median values:")
    for i, row in daily_agent_data.head(5).iterrows():
        logger.info(f"  {row['agent_name']} on {row['date']}: median {metric_column} = {row[metric_column]:.4f}")
    
    logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column} using MEDIAN")
    
    return daily_agent_data

def calculate_daily_medians(daily_agent_data, metric_column):
    """
    Calculate daily medians across all agents for each date.
    
    Args:
        daily_agent_data: DataFrame with daily aggregated data per agent
        metric_column: Name of the metric column ('apr' or 'roi')
    
    Returns:
        DataFrame with daily median values
    """
    if daily_agent_data.empty:
        return daily_agent_data
    
    # For each date, calculate median across all agents (excluding missing data)
    daily_medians = daily_agent_data.groupby('date').agg({
        metric_column: 'median'
    }).reset_index()
    
    # Convert date back to datetime for plotting
    daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
    
    logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
    
    return daily_medians

def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
    """
    Calculate moving average of daily medians using a specified time window.
    
    Args:
        daily_medians: DataFrame with daily median values
        metric_column: Name of the metric column ('apr' or 'roi')
        window_days: Number of days for the moving average window
    
    Returns:
        DataFrame with moving average values added
    """
    if daily_medians.empty:
        return daily_medians
    
    # Sort by timestamp
    daily_medians = daily_medians.sort_values('timestamp').copy()
    
    # Initialize moving average column
    daily_medians['moving_avg'] = None
    
    # Define the time window
    time_window = pd.Timedelta(days=window_days)
    logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
    
    # Calculate moving averages for each timestamp
    for i, row in daily_medians.iterrows():
        current_time = row['timestamp']
        window_start = current_time - time_window
        
        # Get all median values within the time window
        window_data = daily_medians[
            (daily_medians['timestamp'] >= window_start) & 
            (daily_medians['timestamp'] <= current_time)
        ]
        
        # Calculate the average of medians for the time window
        if not window_data.empty:
            daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
        else:
            # If no data points in the window, use the current value
            daily_medians.at[i, 'moving_avg'] = row[metric_column]
    
    logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
    
    return daily_medians

def create_combined_roi_time_series_graph(df):
    """Create a time series graph showing daily median ROI values with 7-day moving average"""
    if len(df) == 0:
        logger.error("No data to plot combined ROI graph")
        fig = go.Figure()
        fig.add_annotation(
            text="No ROI data available",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # Calculate runtime for each agent from their actual first data point
    logger.info(f"Calculating runtime for each agent from their actual start date")
    
    agent_runtimes = {}
    for agent_id in df['agent_id'].unique():
        agent_data = df[df['agent_id'] == agent_id]
        agent_name = agent_data['agent_name'].iloc[0]
        first_report = agent_data['timestamp'].min()  # Agent's actual start date
        last_report = agent_data['timestamp'].max()   # Agent's last report
        runtime_days = (last_report - first_report).total_seconds() / (24 * 3600)  # Convert to days
        agent_runtimes[agent_id] = {
            'agent_name': agent_name,
            'first_report': first_report,
            'last_report': last_report,
            'runtime_days': runtime_days
        }
    
    # Calculate average runtime
    avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0
    logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days")
    
    # Log individual agent runtimes for debugging
    for agent_id, data in agent_runtimes.items():
        logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
    
    # SIMPLIFIED: ROI data is already clean from CSV, just ensure proper data types
    logger.info("Processing ROI data from CSV...")
    
    # Remove rows with invalid ROI values
    initial_count = len(df)
    df = df[df['roi'].notna()]
    final_count = len(df)
    removed_count = initial_count - final_count
    
    if removed_count > 0:
        logger.warning(f"Removed {removed_count} rows with invalid ROI values")
    
    # Ensure proper data types
    df['roi'] = df['roi'].astype(float)
    df['metric_type'] = df['metric_type'].astype(str)
    
    # Get min and max time for shapes
    min_time = df['timestamp'].min()
    max_time = df['timestamp'].max()
    
    # Use the actual start date from the data instead of a fixed date
    x_start_date = min_time
    
    # CRITICAL: Log the exact dataframe we're using for plotting to help debug
    logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}")
    logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}")
    logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}")
    
    # Export full dataframe to CSV for debugging
    debug_csv = "debug_roi_data.csv"
    df.to_csv(debug_csv)
    logger.info(f"Exported ROI graph data to {debug_csv} for debugging")
    
    # Create Plotly figure in a clean state
    fig = go.Figure()
    
    # Get min and max time for shapes
    min_time = df['timestamp'].min()
    max_time = df['timestamp'].max()
    
    # Add background shapes for positive and negative regions
    # Add shape for positive ROI region (above zero) - use reasonable fixed range
    fig.add_shape(
        type="rect",
        fillcolor="rgba(230, 243, 255, 0.3)",
        line=dict(width=0),
        y0=0, y1=10,  # Fixed positive range to avoid extreme outliers affecting the view
        x0=min_time, x1=max_time,
        layer="below"
    )
    
    # Add shape for negative ROI region (below zero) - use reasonable fixed range
    fig.add_shape(
        type="rect",
        fillcolor="rgba(255, 230, 230, 0.3)",
        line=dict(width=0),
        y0=-10, y1=0,  # Fixed negative range to avoid extreme outliers affecting the view
        x0=min_time, x1=max_time,
        layer="below"
    )
    
    # Add zero line
    fig.add_shape(
        type="line",
        line=dict(dash="solid", width=1.5, color="black"),
        y0=0, y1=0,
        x0=min_time, x1=max_time
    )
    
    # Filter ROI outliers for better visualization (±200% range)
    before_outlier_filter = len(df)
    df = df[(df['roi'] <= 200) & (df['roi'] >= -200)]
    after_outlier_filter = len(df)
    excluded_by_outlier = before_outlier_filter - after_outlier_filter
    
    logger.info(f"ROI outlier filtering: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded)")
    
    # IMPORTANT: Filter data by hardcoded date range (June 6 to July 8, 2025)
    min_date = datetime(2025, 6, 6)
    max_date = datetime(2025, 7, 21, 23, 59, 59)  # Include all of July 8th
    logger.info(f"Filtering ROI data to date range: {min_date} to {max_date}")
    
    # Count data points before filtering
    before_filter_count = len(df)
    
    # Apply date filter
    df = df[(df['timestamp'] >= min_date) & (df['timestamp'] <= max_date)]
    
    # Count data points after filtering
    after_filter_count = len(df)
    excluded_by_date = before_filter_count - after_filter_count
    
    logger.info(f"ROI Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)")
    
    # NEW APPROACH: Daily aggregation and median calculation
    # Step 1: Aggregate data daily per agent (mean of values within each day)
    daily_agent_data = aggregate_daily_data(df, 'roi')
    
    # Step 2: Calculate daily medians across all agents
    daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
    
    # Step 3: Calculate 7-day moving average of daily medians
    daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
    
    logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
    
    # Find the last date where we have valid moving average data
    last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
    
    # If we don't have any valid moving average data, use the max time from the original data
    last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
    
    logger.info(f"Last valid moving average date: {last_valid_ma_date}")
    logger.info(f"Using last valid date for graph: {last_valid_date}")
    
    # Plot individual agent daily data points with agent names in hover, but limit display for scalability
    if not daily_agent_data.empty:
        # Group by agent to use different colors for each agent
        unique_agents = daily_agent_data['agent_name'].unique()
        colors = px.colors.qualitative.Plotly[:len(unique_agents)]
        
        # Create a color map for agents
        color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
        
        # Calculate the total number of data points per agent to determine which are most active
        agent_counts = daily_agent_data['agent_name'].value_counts()
        
        # Determine how many agents to show individually (limit to top 5 most active)
        MAX_VISIBLE_AGENTS = 5
        top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
        
        logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
        
        # Add daily aggregated data points for each agent, but only make top agents visible by default
        for agent_name in unique_agents:
            agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
            
            # Explicitly convert to Python lists
            x_values = agent_data['timestamp'].tolist()
            y_values = agent_data['roi'].tolist()
            
            # Change default visibility to False to hide all agent data points
            is_visible = False
            
            # Add data points as markers for ROI
            fig.add_trace(
                go.Scatter(
                    x=x_values,
                    y=y_values,
                    mode='markers',  # Only markers for original data
                    marker=dict(
                        color=color_map[agent_name],
                        symbol='circle',
                        size=10,
                        line=dict(width=1, color='black')
                    ),
                    name=f'Agent: {agent_name} (Daily ROI)',
                    hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
                    visible=is_visible  # All agents hidden by default
                )
            )
            logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
        
        # Add ROI 7-day moving average of daily medians as a smooth line
        x_values_ma = daily_medians_with_ma['timestamp'].tolist()
        y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
        
        # Create hover template for the ROI moving average line
        hover_data_roi = []
        for idx, row in daily_medians_with_ma.iterrows():
            timestamp = row['timestamp']
            # Format timestamp to show only date for daily data
            formatted_timestamp = timestamp.strftime('%Y-%m-%d')
            
            # Calculate number of active agents on this date
            active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
            
            # DEBUG: Log agent counts for July 8th specifically
            if formatted_timestamp == '2025-07-08':
                agents_on_date = daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()
                logger.info(f"ROI GRAPH - July 8th active agents: {active_agents}")
                logger.info(f"ROI GRAPH - July 8th agent IDs: {sorted(agents_on_date.tolist())}")
            
            hover_data_roi.append(
                f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
            )
        
        fig.add_trace(
            go.Scatter(
                x=x_values_ma,
                y=y_values_ma,
                mode='lines',  # Only lines for moving average
                line=dict(color='blue', width=3, shape='spline', smoothing=1.3),  # Smooth curved line like APR
                name='Median ROI (7d window)',
                hovertext=hover_data_roi,
                hoverinfo='text',
                visible=True  # Visible by default
            )
        )
        logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
    
    # Update layout with average runtime information in the title
    fig.update_layout(
        title=dict(
            text=f"Optimus Agents ROI (over avg. {avg_runtime:.1f} days runtime)",
            font=dict(
                family="Arial, sans-serif",
                size=22,
                color="black",
                weight="bold"
            )
        ),
        xaxis_title=None,  # Remove x-axis title to use annotation instead
        yaxis_title=None,  # Remove the y-axis title as we'll use annotations instead
        template="plotly_white",
        height=600,  # Reduced height for better fit on smaller screens
        autosize=True,  # Enable auto-sizing for responsiveness
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            groupclick="toggleitem"
        ),
        margin=dict(r=30, l=120, t=40, b=50),  # Increased bottom margin for x-axis title
        hovermode="closest"
    )
    
    # Add single annotation for y-axis
    fig.add_annotation(
        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
        y=0,      # Center of the y-axis
        xref="paper",
        yref="y",
        text="ROI [%]",
        showarrow=False,
        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        textangle=-90,  # Rotate text to be vertical
        align="center"
    )
    
    # Update layout for legend
    fig.update_layout(
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            groupclick="toggleitem",
            font=dict(
                family="Arial, sans-serif",
                size=14,  # Adjusted font size
                color="black",
                weight="bold"
            )
        )
    )
    
    # Update y-axis with clipping at -5
    fig.update_yaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        range=[-5, 10],  # Clip bottom at -5, reasonable top at 10
        tickformat=".1f",  # Format tick labels with 1 decimal place for better precision
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        title=None  # Remove the built-in axis title since we're using annotations
    )
    
    # Update x-axis with better formatting and hardcoded date range (June 6 to July 8)
    min_date = datetime(2025, 6, 6)  # Hardcoded start date: June 6, 2025
    max_date = datetime(2025, 7, 21)  # Hardcoded end date: July 8, 2025
    logger.info(f"ROI Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}")
    fig.update_xaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        # Set hardcoded range from June 6 to June 17, 2025
        range=[min_date, max_date],
        autorange=False,  # Explicitly disable autoscale
        tickformat="%b %d",  # Simplified date format without time
        tickangle=-30,  # Angle the labels for better readability
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        title=None  # Remove built-in title to use annotation instead
    )
    
    try:
        # Save the figure
        graph_file = "optimus_roi_graph.html"
        fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
        
        # Also save as image for compatibility
        img_file = "optimus_roi_graph.png"
        try:
            fig.write_image(img_file)
            logger.info(f"ROI graph saved to {graph_file} and {img_file}")
        except Exception as e:
            logger.error(f"Error saving ROI image: {e}")
            logger.info(f"ROI graph saved to {graph_file} only")
        
        # Return the figure object for direct use in Gradio
        return fig
    except Exception as e:
        # If the complex graph approach fails, create a simpler one
        logger.error(f"Error creating advanced ROI graph: {e}")
        logger.info("Falling back to Simpler ROI graph")
        
        # Create a simpler graph as fallback
        simple_fig = go.Figure()
        
        # Add zero line
        simple_fig.add_shape(
            type="line",
            line=dict(dash="solid", width=1.5, color="black"),
            y0=0, y1=0,
            x0=min_time, x1=max_time
        )
        
        # Add background shapes with fixed reasonable ranges
        simple_fig.add_shape(
            type="rect",
            fillcolor="rgba(230, 243, 255, 0.3)",
            line=dict(width=0),
            y0=0, y1=10,  # Fixed positive range to avoid extreme outliers affecting the view
            x0=min_time, x1=max_time,
            layer="below"
        )
        
        simple_fig.add_shape(
            type="rect",
            fillcolor="rgba(255, 230, 230, 0.3)",
            line=dict(width=0),
            y0=-10, y1=0,  # Fixed negative range to avoid extreme outliers affecting the view
            x0=min_time, x1=max_time,
            layer="below"
        )
        
        # Simply plot the average ROI data with moving average
        if not avg_roi_data.empty:
            # Add moving average as a line
            simple_fig.add_trace(
                go.Scatter(
                    x=avg_roi_data_with_ma['timestamp'],
                    y=avg_roi_data_with_ma['moving_avg'],
                    mode='lines',
                    name='Average ROI (3d window)',
                    line=dict(width=2, color='blue')  # Thinner line
                )
            )
        
        # Simplified layout with adjusted y-axis range
        simple_fig.update_layout(
            title=dict(
                text="Optimus Agents ROI",
                font=dict(
                    family="Arial, sans-serif",
                    size=22,
                    color="black",
                    weight="bold"
                )
            ),
            xaxis_title=None,
            yaxis_title=None,
            template="plotly_white",
            height=600,
            autosize=True,
            margin=dict(r=30, l=120, t=40, b=50)
        )
        
        # Update y-axis with fixed range for ROI (-10 to 10)
        simple_fig.update_yaxes(
            showgrid=True, 
            gridwidth=1, 
            gridcolor='rgba(0,0,0,0.1)',
            range=[-10, 10],  # Set fixed range from -10 to 10
            tickformat=".2f",
            tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),
            title=None  # Remove the built-in axis title since we're using annotations
        )
        
        # Update x-axis with better formatting and autoscaling
        simple_fig.update_xaxes(
            showgrid=True, 
            gridwidth=1, 
            gridcolor='rgba(0,0,0,0.1)',
            autorange=True,  # Enable autoscaling
            tickformat="%b %d",
            tickangle=-30,
            tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")
        )
        
        # Save the figure
        graph_file = "optimus_roi_graph.html"
        simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
        
        # Return the simple figure
        return simple_fig

def save_roi_to_csv(df):
    """Save the ROI data DataFrame to a CSV file and return the file path"""
    if df.empty:
        logger.error("No ROI data to save to CSV")
        return None
    
    # Define the CSV file path
    csv_file = "optimus_roi_values.csv"
    
    # Save to CSV
    df.to_csv(csv_file, index=False)
    logger.info(f"ROI data saved to {csv_file}")
    
    return csv_file

def create_time_series_graph_per_agent(df):
    """Create a time series graph for each agent using Plotly"""
    # Get unique agents
    unique_agents = df['agent_id'].unique()
    
    if len(unique_agents) == 0:
        logger.error("No agent data to plot")
        fig = go.Figure()
        fig.add_annotation(
            text="No agent data available",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # Create a subplot figure for each agent
    fig = make_subplots(rows=len(unique_agents), cols=1, 
                       subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}" 
                                      for agent_id in unique_agents],
                       vertical_spacing=0.1)
    
    # Plot data for each agent
    for i, agent_id in enumerate(unique_agents):
        agent_data = df[df['agent_id'] == agent_id].copy()
        agent_name = agent_data['agent_name'].iloc[0]
        row = i + 1
        
        # Add zero line to separate APR and Performance
        fig.add_shape(
            type="line", line=dict(dash="solid", width=1.5, color="black"),
            y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
            row=row, col=1
        )
        
        # Add background colors with dynamic values
        fig.add_shape(
            type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0),
            y0=0, y1=agent_data['apr'].max() * 1.1 if not agent_data.empty else 10, 
            x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
            row=row, col=1, layer="below"
        )
        fig.add_shape(
            type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0),
            y0=agent_data['apr'].min() * 1.1 if not agent_data.empty else -10, y1=0, 
            x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
            row=row, col=1, layer="below"
        )
        
        # Create separate dataframes for different data types
        apr_data = agent_data[agent_data['metric_type'] == 'APR']
        perf_data = agent_data[agent_data['metric_type'] == 'Performance']
        
        # Sort all data by timestamp for the line plots
        combined_agent_data = agent_data.sort_values('timestamp')
        
        # Add main line connecting all points
        fig.add_trace(
            go.Scatter(
                x=combined_agent_data['timestamp'], 
                y=combined_agent_data['apr'],
                mode='lines',
                line=dict(color='purple', width=2),
                name=f'{agent_name}',
                legendgroup=agent_name,
                showlegend=(i == 0),  # Only show in legend once
                hovertemplate='Time: %{x}<br>Value: %{y:.2f}<extra></extra>'
            ),
            row=row, col=1
        )
        
        # Add scatter points for APR values
        if not apr_data.empty:
            fig.add_trace(
                go.Scatter(
                    x=apr_data['timestamp'], 
                    y=apr_data['apr'],
                    mode='markers',
                    marker=dict(color='blue', size=10, symbol='circle'),
                    name='APR',
                    legendgroup='APR',
                    showlegend=(i == 0),
                    hovertemplate='Time: %{x}<br>APR: %{y:.2f}<extra></extra>'
                ),
                row=row, col=1
            )
        
        # Add scatter points for Performance values
        if not perf_data.empty:
            fig.add_trace(
                go.Scatter(
                    x=perf_data['timestamp'], 
                    y=perf_data['apr'],
                    mode='markers',
                    marker=dict(color='red', size=10, symbol='square'),
                    name='Performance',
                    legendgroup='Performance',
                    showlegend=(i == 0),
                    hovertemplate='Time: %{x}<br>Performance: %{y:.2f}<extra></extra>'
                ),
                row=row, col=1
            )
        
        # Update axes
        fig.update_xaxes(title_text="Time", row=row, col=1)
        fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)')
    
    # Update layout
    fig.update_layout(
        height=400 * len(unique_agents),
        width=1000,
        title_text="APR and Performance Values per Agent",
        template="plotly_white",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1
        ),
        margin=dict(r=20, l=20, t=30, b=20),
        hovermode="closest"
    )
    
    # Save the figure (still useful for reference)
    graph_file = "optimus_apr_per_agent_graph.html"
    fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
    
    # Also save as image for compatibility
    img_file = "optimus_apr_per_agent_graph.png"
    fig.write_image(img_file)
    
    logger.info(f"Per-agent graph saved to {graph_file} and {img_file}")
    
    # Return the figure object for direct use in Gradio
    return fig

def write_debug_info(df, fig):
    """Minimal debug info function"""
    try:
        # Just log minimal information
        logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces")
        return True
    except Exception as e:
        logger.error(f"Error writing debug info: {e}")
        return False

def create_combined_time_series_graph(df):
    """Create a time series graph showing average APR values across all agents"""
    if len(df) == 0:
        logger.error("No data to plot combined graph")
        fig = go.Figure()
        fig.add_annotation(
            text="No data available",
            x=0.5, y=0.5,
            showarrow=False, font=dict(size=20)
        )
        return fig
    
    # IMPORTANT: Force data types to ensure consistency
    df['apr'] = df['apr'].astype(float)  # Ensure APR is float
    df['metric_type'] = df['metric_type'].astype(str)  # Ensure metric_type is string
    
    # Get min and max time for shapes
    min_time = df['timestamp'].min()
    max_time = df['timestamp'].max()
    
    # Use the actual start date from the data instead of a fixed date
    x_start_date = min_time
    
    # CRITICAL: Log the exact dataframe we're using for plotting to help debug
    logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}")
    logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}")
    logger.info("Graph data - all positive APR values only")
    logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}")
    
    # Export full dataframe to CSV for debugging
    debug_csv = "debug_graph_data.csv"
    df.to_csv(debug_csv)
    logger.info(f"Exported graph data to {debug_csv} for debugging")
    
    # Write detailed data report
    with open("debug_graph_data_report.txt", "w") as f:
        f.write("==== GRAPH DATA REPORT ====\n\n")
        f.write(f"Total data points: {len(df)}\n")
        f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n")
        
        # Output per-agent details
        unique_agents = df['agent_id'].unique()
        f.write(f"Number of agents: {len(unique_agents)}\n\n")
        
        for agent_id in unique_agents:
            agent_data = df[df['agent_id'] == agent_id]
            agent_name = agent_data['agent_name'].iloc[0]
            
            f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n")
            f.write(f"  Total data points: {len(agent_data)}\n")
            
            apr_data = agent_data[agent_data['metric_type'] == 'APR']
            
            f.write(f"  APR data points: {len(apr_data)}\n")
            
            if not apr_data.empty:
                f.write(f"  APR values: {apr_data['apr'].tolist()}\n")
                f.write(f"  APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n")
            
            f.write("\n")
    
    logger.info("Generated detailed graph data report")
    
    # ENSURE THERE ARE NO CONFLICTING AXES OR TRACES
    # Create Plotly figure in a clean state
    fig = go.Figure()
    
    # Enable autoscaling instead of fixed ranges
    logger.info("Using autoscaling for axes ranges")
    
    # Add background shapes for APR and Performance regions
    min_time = df['timestamp'].min()
    max_time = df['timestamp'].max()
    
    # Add shape for positive APR region (above zero) - use reasonable fixed range
    fig.add_shape(
        type="rect",
        fillcolor="rgba(230, 243, 255, 0.3)",
        line=dict(width=0),
        y0=0, y1=200,  # Fixed positive range to avoid extreme outliers affecting the view
        x0=min_time, x1=max_time,
        layer="below"
    )
    
    # Add shape for negative APR region (below zero) - use reasonable fixed range
    fig.add_shape(
        type="rect",
        fillcolor="rgba(255, 230, 230, 0.3)",
        line=dict(width=0),
        y0=-200, y1=0,  # Fixed negative range to avoid extreme outliers affecting the view
        x0=min_time, x1=max_time,
        layer="below"
    )
    
    # Add zero line
    fig.add_shape(
        type="line",
        line=dict(dash="solid", width=1.5, color="black"),
        y0=0, y1=0,
        x0=min_time, x1=max_time
    )
    
    # MODIFIED: Calculate average APR values across all agents for each timestamp
    # Filter for APR data only
    logger.info("=== APR GRAPH DATA FILTERING DEBUG ===")
    logger.info(f"Initial APR data loaded: {len(df)} records")
    logger.info(f"Unique agents in initial data: {df['agent_id'].nunique()}")
    logger.info(f"Agent IDs in initial data: {sorted(df['agent_id'].unique().tolist())}")
    
    # Check metric_type distribution
    if 'metric_type' in df.columns:
        metric_counts = df['metric_type'].value_counts()
        logger.info(f"Metric type distribution: {metric_counts.to_dict()}")
    else:
        logger.warning("No 'metric_type' column found in APR data")
    
    apr_data = df[df['metric_type'] == 'APR'].copy()
    logger.info(f"After metric_type == 'APR' filter: {len(apr_data)} records")
    logger.info(f"Unique agents after APR filter: {apr_data['agent_id'].nunique()}")
    logger.info(f"Agent IDs after APR filter: {sorted(apr_data['agent_id'].unique().tolist())}")
    
    # Date-based APR percentage filtering: ±500% filter until June 22, 2025, then no filter
    cutoff_date = datetime(2025, 6, 22)
    before_cutoff = apr_data[apr_data['timestamp'] < cutoff_date]
    after_cutoff = apr_data[apr_data['timestamp'] >= cutoff_date]
    
    # Apply ±500% filter to data before June 22, 2025
    before_outlier_filter = len(before_cutoff)
    before_cutoff_filtered = before_cutoff[(before_cutoff['apr'] <= 500) & (before_cutoff['apr'] >= -500)]
    after_outlier_filter = len(before_cutoff_filtered)
    excluded_by_outlier = before_outlier_filter - after_outlier_filter
    
    logger.info(f"APR filtering before June 22, 2025: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded by ±500% filter)")
    
    # No filtering for data after June 22, 2025
    logger.info(f"APR filtering after June 22, 2025: {len(after_cutoff)} data points (no percentage filter applied)")
    
    # Combine filtered before data with unfiltered after data
    apr_data = pd.concat([before_cutoff_filtered, after_cutoff], ignore_index=True)
    
    logger.info(f"Total APR data after date-based filtering: {len(apr_data)} data points")
    
    # IMPORTANT: Filter data by hardcoded date range (June 6 to July 8, 2025)
    min_date = datetime(2025, 6, 6)
    max_date = datetime(2025, 7, 21, 23, 59, 59)  # Include all of July 8th
    logger.info(f"Filtering APR data to date range: {min_date} to {max_date}")
    
    # Count data points before filtering
    before_filter_count = len(apr_data)
    
    # Apply date filter
    apr_data = apr_data[(apr_data['timestamp'] >= min_date) & (apr_data['timestamp'] <= max_date)]
    
    # Count data points after filtering
    after_filter_count = len(apr_data)
    excluded_by_date = before_filter_count - after_filter_count
    
    logger.info(f"Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)")
    
    # NEW APPROACH: Daily aggregation and median calculation for APR
    # Step 1: Aggregate data daily per agent (mean of values within each day)
    daily_agent_data = aggregate_daily_data(apr_data, 'apr')
    
    # Step 2: Calculate daily medians across all agents
    daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
    
    # Step 3: Calculate 7-day moving average of daily medians
    daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
    
    # Also handle adjusted APR if it exists
    daily_medians_adjusted = None
    daily_medians_adjusted_with_ma = None
    
    if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
        # Create a separate dataset for adjusted APR
        apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
        
        if not apr_data_with_adjusted.empty:
            # Step 1: Aggregate adjusted APR data daily per agent
            daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
            
            # Step 2: Calculate daily medians for adjusted APR
            daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
            
            # Step 3: Calculate 7-day moving average of daily medians for adjusted APR
            daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
    
    logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
    if daily_medians_adjusted is not None:
        logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
    
    # This old moving average calculation is no longer needed with the new daily median approach
    
    # Find the last date where we have valid moving average data
    last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
    
    # Find the last date where we have valid adjusted moving average data
    last_valid_adj_ma_date = None
    if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
        last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
    
    # Determine the last valid date for either moving average
    last_valid_date = last_valid_ma_date
    if last_valid_adj_ma_date is not None:
        last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date
    
    # If we don't have any valid moving average data, use the max time from the original data
    if last_valid_date is None:
        last_valid_date = df['timestamp'].max()
    
    logger.info(f"Last valid moving average date: {last_valid_ma_date}")
    logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}")
    logger.info(f"Using last valid date for graph: {last_valid_date}")
    
    # Plot individual agent data points with agent names in hover, but limit display for scalability
    if not apr_data.empty:
        # Group by agent to use different colors for each agent
        unique_agents = apr_data['agent_name'].unique()
        colors = px.colors.qualitative.Plotly[:len(unique_agents)]
        
        # Create a color map for agents
        color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
        
        # Calculate the total number of data points per agent to determine which are most active
        agent_counts = apr_data['agent_name'].value_counts()
        
        # Determine how many agents to show individually (limit to top 5 most active)
        MAX_VISIBLE_AGENTS = 5
        top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
        
        logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
        
        # Add data points for each agent, but only make top agents visible by default
        for agent_name in unique_agents:
            agent_data = apr_data[apr_data['agent_name'] == agent_name]
            
            # Explicitly convert to Python lists
            x_values = agent_data['timestamp'].tolist()
            y_values = agent_data['apr'].tolist()
            
            # Change default visibility to False to hide all agent data points
            is_visible = False
            
            # Add data points as markers for APR
            fig.add_trace(
                go.Scatter(
                    x=x_values,
                    y=y_values,
                    mode='markers',  # Only markers for original data
                    marker=dict(
                        color=color_map[agent_name],
                        symbol='circle',
                        size=10,
                        line=dict(width=1, color='black')
                    ),
                    name=f'Agent: {agent_name} (APR)',
                    hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
                    visible=is_visible  # All agents hidden by default
                )
            )
            logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
            
            # Add data points for adjusted APR if it exists
            if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any():
                x_values_adj = agent_data['timestamp'].tolist()
                y_values_adj = agent_data['adjusted_apr'].tolist()
                
                fig.add_trace(
                    go.Scatter(
                        x=x_values_adj,
                        y=y_values_adj,
                        mode='markers',  # Only markers for original data
                        marker=dict(
                            color=color_map[agent_name],
                            symbol='diamond',  # Different symbol for adjusted APR
                            size=10,
                            line=dict(width=1, color='black')
                        ),
                        name=f'Agent: {agent_name} (Adjusted APR)',
                        hovertemplate='Time: %{x}<br>Adjusted APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
                        visible=is_visible  # All agents hidden by default
                    )
                )
                logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
        
        # Add APR 7-day moving average of daily medians as a smooth line
        x_values_ma = daily_medians_with_ma['timestamp'].tolist()
        y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
        
        # Create hover template for the APR moving average line
        # CONSISTENCY FIX: Use ROI daily agent data for active agent counts
        hover_data_apr = []
        for idx, row in daily_medians_with_ma.iterrows():
            timestamp = row['timestamp']
            # Format timestamp to show only date for daily data
            formatted_timestamp = timestamp.strftime('%Y-%m-%d')
            
            # FIXED: Use ROI data to get consistent active agent counts
            # Load ROI data to get the correct agent counts
            try:
                df_roi_for_counts, _ = load_apr_data_from_csv()
                if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns:
                    # Filter for ROI data and same date
                    df_roi_filtered = df_roi_for_counts[
                        (df_roi_for_counts['metric_type'] == 'APR') & 
                        (df_roi_for_counts['roi'].notna())
                    ].copy()
                    
                    # Aggregate daily for ROI data
                    roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi')
                    
                    # Get active agents from ROI data for this date
                    active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
                else:
                    # Fallback to APR data if ROI not available
                    active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
            except:
                # Fallback to APR data if there's any error
                active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
            
            # DEBUG: Log agent counts for July 8th specifically
            if formatted_timestamp == '2025-07-08':
                logger.info(f"APR GRAPH - July 8th active agents (using ROI logic): {active_agents}")
            
            hover_data_apr.append(
                f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
            )
        
        fig.add_trace(
            go.Scatter(
                x=x_values_ma,
                y=y_values_ma,
                mode='lines',  # Only lines for moving average
                line=dict(color='red', width=3, shape='spline', smoothing=1.3),  # Smooth curved line
                name='Median APR (7d window)',
                hovertext=hover_data_apr,
                hoverinfo='text',
                visible=True  # Visible by default
            )
        )
        logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
        
        # Add adjusted APR 7-day moving average line if it exists
        if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
            x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
            y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
            
            # Create hover template for the adjusted APR moving average line
            # CONSISTENCY FIX: Use ROI daily agent data for active agent counts (same as regular APR)
            hover_data_adj = []
            for idx, row in daily_medians_adjusted_with_ma.iterrows():
                timestamp = row['timestamp']
                # Format timestamp to show only date for daily data
                formatted_timestamp = timestamp.strftime('%Y-%m-%d')
                
                # FIXED: Use ROI data to get consistent active agent counts (same logic as APR)
                try:
                    df_roi_for_counts, _ = load_apr_data_from_csv()
                    if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns:
                        # Filter for ROI data and same date
                        df_roi_filtered = df_roi_for_counts[
                            (df_roi_for_counts['metric_type'] == 'APR') & 
                            (df_roi_for_counts['roi'].notna())
                        ].copy()
                        
                        # Aggregate daily for ROI data
                        roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi')
                        
                        # Get active agents from ROI data for this date
                        active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
                    else:
                        # Fallback to adjusted APR data if ROI not available
                        active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
                except:
                    # Fallback to adjusted APR data if there's any error
                    active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
                
                # DEBUG: Log agent counts for July 8th specifically
                if formatted_timestamp == '2025-07-08':
                    logger.info(f"ADJUSTED APR GRAPH - July 8th active agents (using ROI logic): {active_agents}")
                
                hover_data_adj.append(
                    f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
                )
            
            fig.add_trace(
                go.Scatter(
                    x=x_values_adj_ma,
                    y=y_values_adj_ma,
                    mode='lines',  # Only lines for moving average
                    line=dict(color='green', width=3, shape='spline', smoothing=1.3),  # Smooth curved line
                    name='Median Adjusted APR (7d window)',
                    hovertext=hover_data_adj,
                    hoverinfo='text',
                    visible=True  # Visible by default
                )
            )
            logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
        else:
            logger.warning("No adjusted APR moving average data available to plot")
        
        # Removed cumulative APR as requested
        logger.info("Cumulative APR graph line has been removed as requested")
    
    # Update layout - use simple boolean values everywhere
    # Make chart responsive instead of fixed width
    fig.update_layout(
        title=dict(
            text="Optimus Agents",
            font=dict(
                family="Arial, sans-serif",
                size=22,
                color="black",
                weight="bold"
            )
        ),
        xaxis_title=None,  # Remove x-axis title to use annotation instead
        yaxis_title=None,  # Remove the y-axis title as we'll use annotations instead
        template="plotly_white",
        height=600,  # Reduced height for better fit on smaller screens
        # Removed fixed width to enable responsiveness
        autosize=True,  # Enable auto-sizing for responsiveness
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            groupclick="toggleitem"
        ),
        margin=dict(r=30, l=120, t=40, b=50),  # Increased bottom margin for x-axis title
        hovermode="closest"
    )
    
    # Add two separate annotations for y-axis titles
    # First annotation for "Percent drawdown (%)"
    fig.add_annotation(
        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
        y=-25,    # Position in the negative region
        xref="paper",
        yref="y",
        text="Percent drawdown (%)",
        showarrow=False,
        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        textangle=-90,  # Rotate text to be vertical
        align="center"
    )
    
    # Second annotation for "Agent APR (%)"
    fig.add_annotation(
        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
        y=75,     # Position in the positive region
        xref="paper",
        yref="y",
        text="Agent APR (%)",
        showarrow=False,
        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        textangle=-90,  # Rotate text to be vertical
        align="center"
    )
    
    # Remove x-axis title annotation
    # fig.add_annotation(
    #     x=0.5,    # Center of the x-axis
    #     y=-0.15,  # Below the x-axis
    #     xref="paper",
    #     yref="paper",
    #     text="Date",
    #     showarrow=False,
    #     font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
    #     align="center"
    # )
    
    # Update layout for legend
    fig.update_layout(
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            groupclick="toggleitem",
            font=dict(
                family="Arial, sans-serif",
                size=14,  # Adjusted font size
                color="black",
                weight="bold"
            )
        )
    )
    
    # Update y-axis with clipping at -50
    fig.update_yaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        range=[-50, 200],  # Clip bottom at -50, reasonable top at 200
        tickformat=".2f",  # Format tick labels with 2 decimal places
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        title=None  # Remove the built-in axis title since we're using annotations
    )
    
    # Update x-axis with better formatting and hardcoded date range (June 6 to July 8)
    min_date = datetime(2025, 6, 6)  # Hardcoded start date: June 6, 2025
    max_date = datetime(2025, 7, 21)  # Hardcoded end date: July 8, 2025
    logger.info(f"APR Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}")
    fig.update_xaxes(
        showgrid=True, 
        gridwidth=1, 
        gridcolor='rgba(0,0,0,0.1)',
        # Set hardcoded range from June 6 to June 18, 2025
        range=[min_date, max_date],
        autorange=False,  # Explicitly disable autoscale
        tickformat="%b %d",  # Simplified date format without time
        tickangle=-30,  # Angle the labels for better readability
        tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        title=None  # Remove built-in title to use annotation instead
    )
    
    # SIMPLIFIED APPROACH: Do a direct plot without markers for comparison
    # This creates a simple, reliable fallback plot if the advanced one fails
    try:
        # Write detailed debug information before saving the figure
        write_debug_info(df, fig)
        
        # Save the figure (still useful for reference)
        graph_file = "optimus_apr_combined_graph.html"
        fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
        
        # Also save as image for compatibility
        img_file = "optimus_apr_combined_graph.png"
        try:
            fig.write_image(img_file)
            logger.info(f"Combined graph saved to {graph_file} and {img_file}")
        except Exception as e:
            logger.error(f"Error saving image: {e}")
            logger.info(f"Combined graph saved to {graph_file} only")
        
        # Return the figure object for direct use in Gradio
        return fig
    except Exception as e:
        # If the complex graph approach fails, create a simpler one
        logger.error(f"Error creating advanced graph: {e}")
        logger.info("Falling back to Simpler graph")
        
        # Create a simpler graph as fallback
        simple_fig = go.Figure()
        
        # Add zero line
        simple_fig.add_shape(
            type="line",
            line=dict(dash="solid", width=1.5, color="black"),
            y0=0, y1=0,
            x0=min_time, x1=max_time
        )
        
        # Define colors for the fallback graph
        fallback_colors = px.colors.qualitative.Plotly
        
        # Simply plot the average APR data with moving average
        if not avg_apr_data.empty:
            # Sort by timestamp
            avg_apr_data = avg_apr_data.sort_values('timestamp')
            
            # Calculate both moving averages for the fallback graph
            avg_apr_data_with_ma = avg_apr_data.copy()
            avg_apr_data_with_ma['moving_avg'] = None  # 2-hour window
            avg_apr_data_with_ma['infinite_avg'] = None  # Infinite window
            
            # Define the time window (6 hours)
            time_window = pd.Timedelta(hours=6)
            
            # Calculate the moving averages for each timestamp
            for i, row in avg_apr_data_with_ma.iterrows():
                current_time = row['timestamp']
                window_start = current_time - time_window
                
                # Get all data points within the 2-hour time window
                window_data = apr_data[
                    (apr_data['timestamp'] >= window_start) & 
                    (apr_data['timestamp'] <= current_time)
                ]
                
                # Get all data points up to the current timestamp (infinite window)
                infinite_window_data = apr_data[
                    apr_data['timestamp'] <= current_time
                ]
                
                # Calculate the average APR for the 2-hour time window
                if not window_data.empty:
                    avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
                else:
                    # If no data points in the window, use the current value
                    avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
                
                # Calculate the average APR for the infinite window
                if not infinite_window_data.empty:
                    avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean()
                else:
                    avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr']
            
            # Add data points for each agent, but only make top agents visible by default
            unique_agents = apr_data['agent_name'].unique()
            colors = px.colors.qualitative.Plotly[:len(unique_agents)]
            color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
            
            # Calculate the total number of data points per agent
            agent_counts = apr_data['agent_name'].value_counts()
            
            # Determine how many agents to show individually (limit to top 5 most active)
            MAX_VISIBLE_AGENTS = 5
            top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
            
            for agent_name in unique_agents:
                agent_data = apr_data[apr_data['agent_name'] == agent_name]
                
                # Determine if this agent should be visible by default
                is_visible = agent_name in top_agents
                
                # Add data points as markers
                simple_fig.add_trace(
                    go.Scatter(
                        x=agent_data['timestamp'],
                        y=agent_data['apr'],
                        mode='markers',
                        name=f'Agent: {agent_name}',
                        marker=dict(
                            size=10, 
                            color=color_map[agent_name]
                        ),
                        hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
                        visible=is_visible  # Only top agents visible by default
                    )
                )
            
            # Add 2-hour moving average as a line
            simple_fig.add_trace(
                go.Scatter(
                    x=avg_apr_data_with_ma['timestamp'],
                    y=avg_apr_data_with_ma['moving_avg'],
                    mode='lines',
                    name='Average APR (6h window)',
                    line=dict(width=2, color='red')  # Thinner line
                )
            )
            
            # Add infinite window moving average as another line
            simple_fig.add_trace(
                go.Scatter(
                    x=avg_apr_data_with_ma['timestamp'],
                    y=avg_apr_data_with_ma['infinite_avg'],
                    mode='lines',
                    name='Cumulative Average APR (all data)',
                    line=dict(width=4, color='green')  # Thicker solid line
                )
            )
        
        # Simplified layout with fixed y-axis range (-10 to 10) and increased size
        simple_fig.update_layout(
            title=dict(
                text="Optimus Agents",
                font=dict(
                    family="Arial, sans-serif",
                    size=22,
                    color="black",
                    weight="bold"
                )
            ),
            xaxis_title=None,  # Remove x-axis title to use annotation instead
            yaxis_title=None,  # Remove the y-axis title as we'll use annotations instead
                    yaxis=dict(
                        # Fixed range from -10 to 10
                        range=[-10, 10],  # Set fixed range from -10 to 10
                        tickformat=".2f",  # Format tick labels with 2 decimal places
                        tickfont=dict(size=12)  # Larger font for tick labels
                    ),
            height=600,  # Reduced height for better fit
            # Removed fixed width to enable responsiveness
            autosize=True,  # Enable auto-sizing for responsiveness
            template="plotly_white",  # Use a cleaner template
            margin=dict(r=30, l=120, t=40, b=50)  # Increased bottom margin for x-axis title
        )
        
        # Add annotations for y-axis regions in the fallback graph
        simple_fig.add_annotation(
            x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
            y=-25,    # Middle of the negative region
            xref="paper",
            yref="y",
            text="Percent drawdown [%]",
            showarrow=False,
            font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
            textangle=-90,  # Rotate text to be vertical
            align="center"
        )
        
        simple_fig.add_annotation(
            x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
            y=50,     # Middle of the positive region
            xref="paper",
            yref="y",
            text="Agent APR [%]",
            showarrow=False,
            font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
            textangle=-90,  # Rotate text to be vertical
            align="center"
        )
        
        # Remove x-axis title annotation
        # simple_fig.add_annotation(
        #     x=0.5,    # Center of the x-axis
        #     y=-0.15,  # Below the x-axis
        #     xref="paper",
        #     yref="paper",
        #     text="Date",
        #     showarrow=False,
        #     font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
        #     align="center"
        # )
        
        # Update legend font for fallback graph
        simple_fig.update_layout(
            legend=dict(
                font=dict(
                    family="Arial, sans-serif",
                    size=14,  # Adjusted font size
                    color="black",
                    weight="bold"
                )
            )
        )
        
        # Apply autoscaling to the x-axis for the fallback graph 
        simple_fig.update_xaxes(
            autorange=True,  # Enable autoscaling
            tickformat="%b %d",  # Simplified date format without time
            tickangle=-30,
            tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
            title=None  # Remove built-in title to use annotation instead
        )
        
        # Update y-axis tick font for fallback graph
        simple_fig.update_yaxes(
            tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")  # Adjusted font size
        )
        
        # Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS
        if len(unique_agents) > MAX_VISIBLE_AGENTS:
            simple_fig.add_annotation(
                text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.",
                xref="paper", yref="paper",
                x=0.5, y=1.05,
                showarrow=False,
                font=dict(size=12, color="gray"),
                align="center"
            )
        
        # Return the simple figure
        return simple_fig

def save_to_csv(df):
    """Save the APR data DataFrame to a CSV file and return the file path"""
    if df.empty:
        logger.error("No APR data to save to CSV")
        return None
    
    # Define the CSV file path
    csv_file = "optimus_apr_values.csv"
    
    # Save to CSV
    df.to_csv(csv_file, index=False)
    logger.info(f"APR data saved to {csv_file}")
    
    # Also generate a statistics CSV file
    stats_df = generate_statistics_from_data(df)
    stats_csv = "optimus_apr_statistics.csv"
    stats_df.to_csv(stats_csv, index=False)
    logger.info(f"Statistics saved to {stats_csv}")
    
    # Log detailed statistics about adjusted APR
    if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any():
        adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()]
        logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}")
        
        for _, row in adjusted_stats.iterrows():
            if row['agent_id'] != 'ALL':  # Skip the overall stats row
                logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}")
        
        # Log overall adjusted APR stats
        overall_row = stats_df[stats_df['agent_id'] == 'ALL']
        if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]):
            logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}")
    
    return csv_file

def generate_statistics_from_data(df):
    """Generate statistics from the APR data"""
    if df.empty:
        return pd.DataFrame()
    
    # Get unique agents
    unique_agents = df['agent_id'].unique()
    stats_list = []
    
    # Generate per-agent statistics
    for agent_id in unique_agents:
        agent_data = df[df['agent_id'] == agent_id]
        agent_name = agent_data['agent_name'].iloc[0]
        
        # APR statistics
        apr_data = agent_data[agent_data['metric_type'] == 'APR']
        real_apr = apr_data[apr_data['is_dummy'] == False]
        
        # Performance statistics
        perf_data = agent_data[agent_data['metric_type'] == 'Performance']
        real_perf = perf_data[perf_data['is_dummy'] == False]
        
        # Check if adjusted_apr exists and has non-null values
        has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any()
        
        stats = {
            'agent_id': agent_id,
            'agent_name': agent_name,
            'total_points': len(agent_data),
            'apr_points': len(apr_data),
            'performance_points': len(perf_data),
            'real_apr_points': len(real_apr),
            'real_performance_points': len(real_perf),
            'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None,
            'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None,
            'max_apr': apr_data['apr'].max() if not apr_data.empty else None,
            'min_apr': apr_data['apr'].min() if not apr_data.empty else None,
            'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None,
            'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None,
            'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None,
            'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None
        }
        stats_list.append(stats)
    
    # Generate overall statistics
    apr_only = df[df['metric_type'] == 'APR']
    perf_only = df[df['metric_type'] == 'Performance']
    
    # Check if adjusted_apr exists and has non-null values for overall stats
    has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any()
    
    overall_stats = {
        'agent_id': 'ALL',
        'agent_name': 'All Agents',
        'total_points': len(df),
        'apr_points': len(apr_only),
        'performance_points': len(perf_only),
        'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]),
        'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]),
        'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None,
        'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None,
        'max_apr': apr_only['apr'].max() if not apr_only.empty else None,
        'min_apr': apr_only['apr'].min() if not apr_only.empty else None,
        'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None,
        'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None,
        'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None,
        'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None
    }
    stats_list.append(overall_stats)
    
    return pd.DataFrame(stats_list)

# Create dummy functions for the commented out imports
def create_transcation_visualizations():
    """Dummy implementation that returns a placeholder graph"""
    fig = go.Figure()
    fig.add_annotation(
        text="Blockchain data loading disabled - placeholder visualization", 
        x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=20)
    )
    return fig

def create_active_agents_visualizations():
    """Dummy implementation that returns a placeholder graph"""
    fig = go.Figure()
    fig.add_annotation(
        text="Blockchain data loading disabled - placeholder visualization", 
        x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=20)
    )
    return fig

# Dummy blockchain functions to replace the commented ones
def get_transfers(integrator: str, wallet: str) -> str:
    """Dummy function that returns an empty result"""
    return {"transfers": []}

def fetch_and_aggregate_transactions():
    """Dummy function that returns empty data"""
    return [], {}

# Function to parse the transaction data and prepare it for visualization
def process_transactions_and_agents(data):
    """Dummy function that returns empty dataframes"""
    df_transactions = pd.DataFrame()
    df_agents = pd.DataFrame(columns=['date', 'agent_count'])
    df_agents_weekly = pd.DataFrame()
    return df_transactions, df_agents, df_agents_weekly

# Function to create visualizations based on the metrics
def create_visualizations():
    # Placeholder figures for testing
    fig_swaps_chain = go.Figure()
    fig_swaps_chain.add_annotation(
        text="Blockchain data loading disabled - placeholder visualization", 
        x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=20)
    )
    
    fig_bridges_chain = go.Figure()
    fig_bridges_chain.add_annotation(
        text="Blockchain data loading disabled - placeholder visualization", 
        x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=20)
    )
    
    fig_agents_registered = go.Figure()
    fig_agents_registered.add_annotation(
        text="Blockchain data loading disabled - placeholder visualization", 
        x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=20)
    )
    
    fig_tvl = go.Figure()
    fig_tvl.add_annotation(
        text="Blockchain data loading disabled - placeholder visualization", 
        x=0.5, y=0.5, xref="paper", yref="paper",
        showarrow=False, font=dict(size=20)
    )
    
    return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl

# Modify dashboard function to make the plot container responsive
def dashboard():
    with gr.Blocks() as demo:
        gr.Markdown("# Average Optimus Agent Performance")
        
        # Create tabs for APR and ROI metrics
        with gr.Tabs():
            # APR Metrics tab
            with gr.Tab("APR Metrics"):
                with gr.Column():
                    refresh_apr_btn = gr.Button("Refresh APR Data")
                    
                    # Create container for plotly figure with responsive sizing
                    with gr.Column():
                        combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot")
                    
                    # Create compact toggle controls at the bottom of the graph
                    with gr.Row(visible=True):
                        gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title")
                    
                    with gr.Row():
                        with gr.Column():
                            with gr.Row(elem_id="apr_toggle_container"):
                                with gr.Column(scale=1, min_width=150):
                                    apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle")
                                
                                with gr.Column(scale=1, min_width=150):
                                    adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle")
                    
                    # Add a text area for status messages
                    apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
            
            # ROI Metrics tab
            with gr.Tab("ROI Metrics"):
                with gr.Column():
                    refresh_roi_btn = gr.Button("Refresh ROI Data")
                    
                    # Create container for plotly figure with responsive sizing
                    with gr.Column():
                        combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot")
                    
                    # Create compact toggle controls at the bottom of the graph
                    with gr.Row(visible=True):
                        gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title")
                    
                    with gr.Row():
                        with gr.Column():
                            with gr.Row(elem_id="roi_toggle_container"):
                                with gr.Column(scale=1, min_width=150):
                                    roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle")
                    
                    # Add a text area for status messages
                    roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
        
        # Add custom CSS for making the plots responsive
        gr.HTML("""
        <style>
            /* Make plots responsive */
            #responsive_apr_plot, #responsive_roi_plot {
                width: 100% !important;
                max-width: 100% !important;
            }
            #responsive_apr_plot > div, #responsive_roi_plot > div {
                width: 100% !important;
                height: auto !important;
                min-height: 500px !important;
            }
            
            /* Toggle checkbox styling */
            #apr_toggle .gr-checkbox {
                accent-color: #e74c3c !important;
            }
            
            #adjusted_apr_toggle .gr-checkbox {
                accent-color: #2ecc71 !important;
            }
            
            #roi_toggle .gr-checkbox {
                accent-color: #3498db !important;
            }
            
            /* Make the toggle section more compact */
            #apr_toggle_title, #roi_toggle_title {
                margin-bottom: 0;
                margin-top: 10px;
            }
            
            #apr_toggle_container, #roi_toggle_container {
                margin-top: 5px;
            }
            
            /* Style the checkbox labels */
            .gr-form.gr-box {
                border: none !important;
                background: transparent !important;
            }
            
            /* Make checkboxes and labels appear on the same line */
            .gr-checkbox-container {
                display: flex !important;
                align-items: center !important;
            }
            
            /* Add colored indicators */
            #apr_toggle .gr-checkbox-label::before {
                content: "●";
                color: #e74c3c;
                margin-right: 5px;
            }
            
            #adjusted_apr_toggle .gr-checkbox-label::before {
                content: "●";
                color: #2ecc71;
                margin-right: 5px;
            }
            
            #roi_toggle .gr-checkbox-label::before {
                content: "●";
                color: #3498db;
                margin-right: 5px;
            }
        </style>
        """)
        
        # Function to update the APR graph
        def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True):
            # Generate visualization and get figure object directly
            try:
                combined_fig, _ = generate_apr_visualizations()
                
                # Update visibility of traces based on toggle values
                for i, trace in enumerate(combined_fig.data):
                    # Check if this is a moving average trace
                    if trace.name == 'Median APR (7d window)':
                        trace.visible = show_apr_ma
                    elif trace.name == 'Average ETH Adjusted APR (3d window)':
                        trace.visible = show_adjusted_apr_ma
                
                return combined_fig
            except Exception as e:
                logger.exception("Error generating APR visualization")
                # Create error figure
                error_fig = go.Figure()
                error_fig.add_annotation(
                    text=f"Error: {str(e)}", 
                    x=0.5, y=0.5, 
                    showarrow=False, 
                    font=dict(size=15, color="red")
                )
                return error_fig
        
        # Function to update the ROI graph
        def update_roi_graph(show_roi_ma=True):
            # Generate visualization and get figure object directly
            try:
                combined_fig, _ = generate_roi_visualizations()
                
                # Update visibility of traces based on toggle values
                for i, trace in enumerate(combined_fig.data):
                    # Check if this is a moving average trace
                    if trace.name == 'Median ROI (7d window)':
                        trace.visible = show_roi_ma
                
                return combined_fig
            except Exception as e:
                logger.exception("Error generating ROI visualization")
                # Create error figure
                error_fig = go.Figure()
                error_fig.add_annotation(
                    text=f"Error: {str(e)}", 
                    x=0.5, y=0.5, 
                    showarrow=False, 
                    font=dict(size=15, color="red")
                )
                return error_fig
        
        # Initialize the APR graph on load with a placeholder
        apr_placeholder_fig = go.Figure()
        apr_placeholder_fig.add_annotation(
            text="Click 'Refresh APR Data' to load APR graph", 
            x=0.5, y=0.5, 
            showarrow=False, 
            font=dict(size=15)
        )
        combined_apr_graph.value = apr_placeholder_fig
        
        # Initialize the ROI graph on load with a placeholder
        roi_placeholder_fig = go.Figure()
        roi_placeholder_fig.add_annotation(
            text="Click 'Refresh ROI Data' to load ROI graph", 
            x=0.5, y=0.5, 
            showarrow=False, 
            font=dict(size=15)
        )
        combined_roi_graph.value = roi_placeholder_fig
        
        # Function to update the APR graph based on toggle states
        def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible):
            return update_apr_graph(apr_visible, adjusted_apr_visible)
        
        # Function to update the ROI graph based on toggle states
        def update_roi_graph_with_toggles(roi_visible):
            return update_roi_graph(roi_visible)
        
        # Function to refresh APR data
        def refresh_apr_data():
            """Refresh APR data from the database and update the visualization"""
            try:
                # Fetch new APR data
                logger.info("Manually refreshing APR data...")
                fetch_apr_data_from_db()
                
                # Verify data was fetched successfully
                if global_df is None or len(global_df) == 0:
                    logger.error("Failed to fetch APR data")
                    return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
                
                # Log info about fetched data with focus on adjusted_apr
                may_10_2025 = datetime(2025, 5, 10)
                if 'timestamp' in global_df and 'adjusted_apr' in global_df:
                    after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
                    with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
                    
                    logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
                    logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
                
                # Generate new visualization
                logger.info("Generating new APR visualization...")
                new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
                return new_graph, "APR data refreshed successfully"
            except Exception as e:
                logger.error(f"Error refreshing APR data: {e}")
                return combined_apr_graph.value, f"Error: {str(e)}"
        
        # Function to refresh ROI data
        def refresh_roi_data():
            """Refresh ROI data from the database and update the visualization"""
            try:
                # Fetch new ROI data
                logger.info("Manually refreshing ROI data...")
                fetch_apr_data_from_db()  # This also fetches ROI data
                
                # Verify data was fetched successfully
                if global_roi_df is None or len(global_roi_df) == 0:
                    logger.error("Failed to fetch ROI data")
                    return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details."
                
                # Generate new visualization
                logger.info("Generating new ROI visualization...")
                new_graph = update_roi_graph(roi_toggle.value)
                return new_graph, "ROI data refreshed successfully"
            except Exception as e:
                logger.error(f"Error refreshing ROI data: {e}")
                return combined_roi_graph.value, f"Error: {str(e)}"
        
        # Set up the button click event for APR refresh
        refresh_apr_btn.click(
            fn=refresh_apr_data,
            inputs=[],
            outputs=[combined_apr_graph, apr_status_text]
        )
        
        # Set up the button click event for ROI refresh
        refresh_roi_btn.click(
            fn=refresh_roi_data,
            inputs=[],
            outputs=[combined_roi_graph, roi_status_text]
        )
        
        # Set up the toggle switch events for APR
        apr_toggle.change(
            fn=update_apr_graph_with_toggles,
            inputs=[apr_toggle, adjusted_apr_toggle],
            outputs=[combined_apr_graph]
        )
        
        adjusted_apr_toggle.change(
            fn=update_apr_graph_with_toggles,
            inputs=[apr_toggle, adjusted_apr_toggle],
            outputs=[combined_apr_graph]
        )
        
        # Set up the toggle switch events for ROI
        roi_toggle.change(
            fn=update_roi_graph_with_toggles,
            inputs=[roi_toggle],
            outputs=[combined_roi_graph]
        )
    
    return demo

# Launch the dashboard
if __name__ == "__main__":
    dashboard().launch()

def generate_adjusted_apr_report():
    """
    Generate a detailed report about adjusted_apr data availability and save it to a file.
    Returns the path to the generated report file.
    """
    global global_df
    
    if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
        logger.warning("No adjusted_apr data available for report generation")
        return None
    
    # Create a report file
    report_path = "adjusted_apr_report.txt"
    
    with open(report_path, "w") as f:
        f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
        
        # Summary statistics
        total_records = len(global_df)
        records_with_adjusted = global_df['adjusted_apr'].notna().sum()
        pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
        
        f.write(f"Total APR records: {total_records}\n")
        f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
        
        # First and last data points
        if records_with_adjusted > 0:
            has_adjusted = global_df[global_df['adjusted_apr'].notna()]
            first_date = has_adjusted['timestamp'].min()
            last_date = has_adjusted['timestamp'].max()
            f.write(f"First adjusted_apr record: {first_date}\n")
            f.write(f"Last adjusted_apr record: {last_date}\n")
            f.write(f"Date range: {(last_date - first_date).days} days\n\n")
        
        # Agent statistics
        f.write("===== AGENT STATISTICS =====\n\n")
        
        # Group by agent
        agent_stats = []
        
        for agent_id in global_df['agent_id'].unique():
            agent_data = global_df[global_df['agent_id'] == agent_id]
            agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
            
            total_agent_records = len(agent_data)
            agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
            coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
            
            agent_stats.append({
                'agent_id': agent_id,
                'agent_name': agent_name,
                'total_records': total_agent_records,
                'with_adjusted': agent_with_adjusted,
                'coverage_pct': coverage_pct
            })
        
        # Sort by coverage percentage (descending)
        agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
        
        # Write agent statistics
        for agent in agent_stats:
            f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
            f.write(f"  Records: {agent['total_records']}\n")
            f.write(f"  With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
            
            # If agent has adjusted data, show date range
            agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
            agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
            
            if not agent_adjusted.empty:
                first = agent_adjusted['timestamp'].min()
                last = agent_adjusted['timestamp'].max()
                f.write(f"  First adjusted_apr: {first}\n")
                f.write(f"  Last adjusted_apr: {last}\n")
            
            f.write("\n")
        
        # Check for May 10th cutoff issue
        f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
        may_10_2025 = datetime(2025, 5, 10)
        
        before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
        after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
        
        # Calculate coverage before and after
        before_total = len(before_cutoff)
        before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
        before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
        
        after_total = len(after_cutoff)
        after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
        after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
        
        f.write(f"Before May 10th, 2025:\n")
        f.write(f"  Records: {before_total}\n")
        f.write(f"  With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
        
        f.write(f"After May 10th, 2025:\n")
        f.write(f"  Records: {after_total}\n")
        f.write(f"  With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
        
        # Check for agents that had data before but not after
        if before_total > 0 and after_total > 0:
            agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
            agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
            
            missing_after = agents_before - agents_after
            new_after = agents_after - agents_before
            
            if missing_after:
                f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
                
                # For each missing agent, show the last date with adjusted_apr
                for agent_id in missing_after:
                    agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) & 
                                              (before_cutoff['adjusted_apr'].notna())]
                    if not agent_data.empty:
                        last_date = agent_data['timestamp'].max()
                        agent_name = agent_data['agent_name'].iloc[0]
                        f.write(f"  {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
            
            if new_after:
                f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
        
    logger.info(f"Adjusted APR report generated: {report_path}")
    return report_path