Optimus-Agent-Performance

Running

App Files Files Community

gauravlochab commited on Jul 10

Commit

75be6c1

1 Parent(s): 6c05858

chore: fix counts for daily active agents

Browse files

Files changed (1) hide show

app.py +162 -239

app.py CHANGED Viewed

@@ -33,12 +33,20 @@ logging.basicConfig(
     level=logging.INFO,  # Use INFO level instead of DEBUG to reduce verbosity
     format="%(asctime)s - %(levelname)s - %(message)s",
     handlers=[
-        logging.FileHandler("app_debug.log"),  # Log to file for persistence
         logging.StreamHandler()  # Also log to console
-    ]
 )
 logger = logging.getLogger(__name__)
 # Reduce third-party library logging
 logging.getLogger("urllib3").setLevel(logging.WARNING)
 logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -759,11 +767,11 @@ def log_adjusted_apr_availability(df):
                     logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
 def generate_apr_visualizations():
-    """Generate APR visualizations with CSV-first approach for Hugging Face Space deployment"""
     global global_df
-    # CSV-FIRST APPROACH: Try to load from CSV first
-    logger.info("Attempting to load APR data from CSV files...")
     df, csv_file = load_apr_data_from_csv()
     if not df.empty:
@@ -775,136 +783,70 @@ def generate_apr_visualizations():
         combined_fig = create_combined_time_series_graph(df)
         return combined_fig, csv_file
-    # FALLBACK: If CSV not available, try API
-    logger.info("CSV data not available, falling back to API...")
-    try:
-        df, _ = fetch_apr_data_from_db()
-        # If we got no data at all, return placeholder figures
-        if df.empty:
-            logger.info("No APR data available from API either. Using fallback visualization.")
-            # Create empty visualizations with a message using Plotly
-            fig = go.Figure()
-            fig.add_annotation(
-                x=0.5, y=0.5,
-                text="No APR data available",
-                font=dict(size=20),
-                showarrow=False
-            )
-            fig.update_layout(
-                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
-            )
-            # Save as static file for reference
-            fig.write_html("optimus_apr_combined_graph.html")
-            fig.write_image("optimus_apr_combined_graph.png")
-            csv_file = None
-            return fig, csv_file
-        # Apply preprocessing to fix APR and ROI values
-        logger.info("Applying preprocessing to fix APR and ROI values...")
-        df = fix_apr_and_roi(df)  # Apply preprocessing
-        global_df = df
-        # IMPORTANT: Also fix the ROI DataFrame with corrected values
-        logger.info("Extracting corrected ROI values from fixed APR data...")
-        if not df.empty and 'roi' in df.columns:
-            # Create corrected ROI DataFrame from the fixed APR data
-            corrected_roi_data = []
-            for idx, row in df.iterrows():
-                if not row['is_dummy'] and pd.notna(row['roi']):
-                    roi_entry = {
-                        "roi": row["roi"],  # This is now the corrected ROI value
-                        "timestamp": row["timestamp"],
-                        "agent_id": row["agent_id"],
-                        "agent_name": row["agent_name"],
-                        "is_dummy": False,
-                        "metric_type": "ROI"
-                    }
-                    corrected_roi_data.append(roi_entry)
-            # Replace the original ROI DataFrame with corrected values
-            if corrected_roi_data:
-                corrected_roi_df = pd.DataFrame(corrected_roi_data)
-                # Combine with dummy ROI data if it exists
-                if global_roi_df is not None and not global_roi_df.empty:
-                    dummy_roi_data = global_roi_df[global_roi_df['is_dummy'] == True]
-                    if not dummy_roi_data.empty:
-                        global_roi_df = pd.concat([corrected_roi_df, dummy_roi_data], ignore_index=True)
-                    else:
-                        global_roi_df = corrected_roi_df
-                else:
-                    global_roi_df = corrected_roi_df
-                logger.info(f"Updated ROI DataFrame with {len(corrected_roi_data)} corrected ROI values")
-            else:
-                logger.warning("No corrected ROI values found to update ROI DataFrame")
-        # Save preprocessed data to CSV before creating visualizations
-        logger.info("Saving preprocessed APR data to CSV...")
-        csv_file = save_to_csv(df)
-        # Create visualizations using the saved CSV data
-        logger.info("Creating APR visualizations from preprocessed data...")
-        combined_fig = create_combined_time_series_graph(df)
-        return combined_fig, csv_file
-    except Exception as e:
-        logger.error(f"Error fetching APR data from API: {e}")
-        # Return error visualization
-        fig = go.Figure()
-        fig.add_annotation(
-            x=0.5, y=0.5,
-            text=f"Error loading data: {str(e)}",
-            font=dict(size=16, color="red"),
-            showarrow=False
-        )
-        fig.update_layout(
-            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
-        )
-        return fig, None
 def generate_roi_visualizations():
-    """Generate ROI visualizations with CSV-first approach for Hugging Face Space deployment"""
     global global_roi_df
-    # FIXED APPROACH: Use corrected ROI data from APR CSV instead of problematic ROI CSV
-    logger.info("Loading corrected ROI data from APR CSV files...")
     df_apr, csv_file = load_apr_data_from_csv()
     if not df_apr.empty and 'roi' in df_apr.columns:
-        # Extract ROI data from the APR CSV (which contains corrected values)
-        roi_data = []
-        for idx, row in df_apr.iterrows():
-            if pd.notna(row['roi']):
-                roi_entry = {
-                    "roi": row["roi"],  # Use corrected ROI from APR data
-                    "timestamp": row["timestamp"],
-                    "agent_id": row["agent_id"],
-                    "agent_name": row["agent_name"],
-                    "is_dummy": row["is_dummy"],
-                    "metric_type": "ROI",
-                    "apr": row.get("apr"),
-                    "adjusted_apr": row.get("adjusted_apr")
-                }
-                roi_data.append(roi_entry)
-        if roi_data:
-            df_roi = pd.DataFrame(roi_data)
-            logger.info(f"Successfully extracted {len(df_roi)} corrected ROI records from APR CSV")
             global_roi_df = df_roi
-            # Create visualizations using corrected ROI data
-            logger.info("Creating ROI visualizations from corrected APR CSV data...")
             combined_fig = create_combined_roi_time_series_graph(df_roi)
-            return combined_fig, "optimus_apr_values.csv"  # Source is APR CSV
         else:
-            logger.warning("No ROI data found in APR CSV")
     else:
         logger.warning("APR CSV not available or missing ROI column")
@@ -933,68 +875,16 @@ def generate_roi_visualizations():
                 yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
             )
-            # Save as static file for reference
-            fig.write_html("optimus_roi_graph.html")
-            fig.write_image("optimus_roi_graph.png")
-            csv_file = None
-            return fig, csv_file
         # Set global_roi_df for access by other functions
         global_roi_df = df_roi
-        # IMPORTANT: Apply ROI corrections if we have fresh API data
-        logger.info("Checking if ROI data needs correction...")
-        if not df_roi.empty:
-            # Check if this ROI data contains uncorrected values (from API)
-            uncorrected_roi = df_roi[df_roi['is_dummy'] == False]
-            if not uncorrected_roi.empty:
-                logger.info("ROI data contains uncorrected values, applying corrections...")
-                # We need to get the corrected APR data to extract corrected ROI values
-                if global_df is not None and not global_df.empty:
-                    # Extract corrected ROI values from the fixed APR data
-                    corrected_roi_data = []
-                    for idx, row in global_df.iterrows():
-                        if not row['is_dummy'] and pd.notna(row['roi']):
-                            roi_entry = {
-                                "roi": row["roi"],  # This is the corrected ROI value
-                                "timestamp": row["timestamp"],
-                                "agent_id": row["agent_id"],
-                                "agent_name": row["agent_name"],
-                                "is_dummy": False,
-                                "metric_type": "ROI"
-                            }
-                            corrected_roi_data.append(roi_entry)
-                    if corrected_roi_data:
-                        corrected_roi_df = pd.DataFrame(corrected_roi_data)
-                        # Combine with dummy ROI data if it exists
-                        dummy_roi_data = df_roi[df_roi['is_dummy'] == True]
-                        if not dummy_roi_data.empty:
-                            df_roi = pd.concat([corrected_roi_df, dummy_roi_data], ignore_index=True)
-                        else:
-                            df_roi = corrected_roi_df
-                        global_roi_df = df_roi
-                        logger.info(f"Updated ROI DataFrame with {len(corrected_roi_data)} corrected ROI values")
-                    else:
-                        logger.warning("No corrected ROI values found in APR data")
-                else:
-                    logger.warning("No corrected APR data available to extract ROI values from")
-            else:
-                logger.info("ROI data contains only dummy values, no correction needed")
-        # Save preprocessed ROI data to CSV before creating visualizations
-        logger.info("Saving preprocessed ROI data to CSV...")
-        csv_file = save_roi_to_csv(df_roi)
-        # Create visualizations using the saved CSV data
-        logger.info("Creating ROI visualizations from preprocessed data...")
         combined_fig = create_combined_roi_time_series_graph(df_roi)
-        return combined_fig, csv_file
     except Exception as e:
         logger.error(f"Error fetching ROI data from API: {e}")
@@ -1030,6 +920,13 @@ def aggregate_daily_data(df, metric_column):
     df = df.copy()
     df['date'] = df['timestamp'].dt.date
     # NEW: Add detailed logging to verify median calculation
     logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===")
@@ -1197,39 +1094,8 @@ def create_combined_roi_time_series_graph(df):
     for agent_id, data in agent_runtimes.items():
         logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
-    # IMPORTANT: Clean and convert ROI data to ensure consistency
-    logger.info("Cleaning ROI data before conversion...")
-    def clean_roi_value(value):
-        """Clean and convert ROI value to float"""
-        if pd.isna(value):
-            return None
-        # If it's already a number, return it
-        if isinstance(value, (int, float)):
-            return float(value)
-        # If it's a string, try to extract numeric value
-        if isinstance(value, str):
-            # Remove any non-numeric characters except decimal point and minus sign
-            import re
-            # Look for patterns like "value': 16.007665648354" and extract the number
-            match = re.search(r'[\d\.-]+', value)
-            if match:
-                try:
-                    return float(match.group())
-                except ValueError:
-                    logger.warning(f"Could not convert ROI value to float: {value}")
-                    return None
-            else:
-                logger.warning(f"No numeric value found in ROI string: {value}")
-                return None
-        logger.warning(f"Unexpected ROI value type: {type(value)} - {value}")
-        return None
-    # Apply cleaning function to ROI column
-    df['roi'] = df['roi'].apply(clean_roi_value)
     # Remove rows with invalid ROI values
     initial_count = len(df)
@@ -1240,10 +1106,9 @@ def create_combined_roi_time_series_graph(df):
     if removed_count > 0:
         logger.warning(f"Removed {removed_count} rows with invalid ROI values")
-    # Ensure ROI is float after cleaning
     df['roi'] = df['roi'].astype(float)
-    # ROI values are already in percentage format from initial_value_fixer.py
-    df['metric_type'] = df['metric_type'].astype(str)  # Ensure metric_type is string
     # Get min and max time for shapes
     min_time = df['timestamp'].min()
@@ -1406,6 +1271,12 @@ def create_combined_roi_time_series_graph(df):
             # Calculate number of active agents on this date
             active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
             hover_data_roi.append(
                 f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
             )
@@ -1858,22 +1729,22 @@ def create_combined_time_series_graph(df):
     min_time = df['timestamp'].min()
     max_time = df['timestamp'].max()
-    # Add shape for positive APR region (above zero)
     fig.add_shape(
         type="rect",
         fillcolor="rgba(230, 243, 255, 0.3)",
         line=dict(width=0),
-        y0=0, y1=apr_data['apr'].max() * 1.1 if not apr_data.empty else 10,  # Dynamic positive value based on data
         x0=min_time, x1=max_time,
         layer="below"
     )
-    # Add shape for negative APR region (below zero)
     fig.add_shape(
         type="rect",
         fillcolor="rgba(255, 230, 230, 0.3)",
         line=dict(width=0),
-        y0=apr_data['apr'].min() * 1.1 if not apr_data.empty else -10, y1=0,  # Dynamic negative value based on data
         x0=min_time, x1=max_time,
         layer="below"
     )
@@ -1888,7 +1759,22 @@ def create_combined_time_series_graph(df):
     # MODIFIED: Calculate average APR values across all agents for each timestamp
     # Filter for APR data only
     apr_data = df[df['metric_type'] == 'APR'].copy()
     # Filter APR outliers (±200% range)
     before_outlier_filter = len(apr_data)
@@ -2046,14 +1932,39 @@ def create_combined_time_series_graph(df):
         y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
         # Create hover template for the APR moving average line
         hover_data_apr = []
         for idx, row in daily_medians_with_ma.iterrows():
             timestamp = row['timestamp']
             # Format timestamp to show only date for daily data
             formatted_timestamp = timestamp.strftime('%Y-%m-%d')
-            # Calculate number of active agents on this date
-            active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
             hover_data_apr.append(
                 f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
@@ -2079,14 +1990,38 @@ def create_combined_time_series_graph(df):
             y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
             # Create hover template for the adjusted APR moving average line
             hover_data_adj = []
             for idx, row in daily_medians_adjusted_with_ma.iterrows():
                 timestamp = row['timestamp']
                 # Format timestamp to show only date for daily data
                 formatted_timestamp = timestamp.strftime('%Y-%m-%d')
-                # Calculate number of active agents on this date
-                active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
                 hover_data_adj.append(
                     f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
@@ -2141,25 +2076,13 @@ def create_combined_time_series_graph(df):
         hovermode="closest"
     )
-    # Add annotations for y-axis regions
-    fig.add_annotation(
-        x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
-        y=-25,    # Middle of the negative region
-        xref="paper",
-        yref="y",
-        text="Percent drawdown [%]",
-        showarrow=False,
-        font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
-        textangle=-90,  # Rotate text to be vertical
-        align="center"
-    )
     fig.add_annotation(
         x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
-        y=50,     # Middle of the positive region
         xref="paper",
         yref="y",
-        text="Agent APR [%]",
         showarrow=False,
         font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
         textangle=-90,  # Rotate text to be vertical

     level=logging.INFO,  # Use INFO level instead of DEBUG to reduce verbosity
     format="%(asctime)s - %(levelname)s - %(message)s",
     handlers=[
+        logging.FileHandler("app_debug.log", mode='a'),  # Append mode for persistence
         logging.StreamHandler()  # Also log to console
+    ],
+    force=True  # Force reconfiguration of logging
 )
 logger = logging.getLogger(__name__)
+# Ensure the logger level is set correctly
+logger.setLevel(logging.INFO)
+# Test logging to verify it's working
+logger.info("=== LOGGING SYSTEM INITIALIZED ===")
+logger.info("Debug logs will be written to app_debug.log")
 # Reduce third-party library logging
 logging.getLogger("urllib3").setLevel(logging.WARNING)
 logging.getLogger("httpx").setLevel(logging.WARNING)
                     logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
 def generate_apr_visualizations():
+    """Generate APR visualizations using CSV data only for consistency with ROI graph"""
     global global_df
+    # CONSISTENCY FIX: Always use CSV data to match ROI graph behavior
+    logger.info("Loading APR data from CSV files for consistency with ROI graph...")
     df, csv_file = load_apr_data_from_csv()
     if not df.empty:
         combined_fig = create_combined_time_series_graph(df)
         return combined_fig, csv_file
+    # FALLBACK: If CSV not available, return error message
+    logger.error("CSV data not available and API fallback disabled for consistency")
+    # Create empty visualization with a message using Plotly
+    fig = go.Figure()
+    fig.add_annotation(
+        x=0.5, y=0.5,
+        text="No APR data available - CSV file missing",
+        font=dict(size=20),
+        showarrow=False
+    )
+    fig.update_layout(
+        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
+    )
+    return fig, None
 def generate_roi_visualizations():
+    """Generate ROI visualizations directly from optimus_apr_values.csv"""
     global global_roi_df
+    # SIMPLIFIED APPROACH: Load ROI data directly from APR CSV
+    logger.info("Loading ROI data directly from optimus_apr_values.csv...")
     df_apr, csv_file = load_apr_data_from_csv()
     if not df_apr.empty and 'roi' in df_apr.columns:
+        # CONSISTENCY FIX: Apply same filtering as APR graph
+        logger.info("=== ROI GRAPH DATA FILTERING DEBUG ===")
+        logger.info(f"Initial APR data loaded: {len(df_apr)} records")
+        logger.info(f"Unique agents in initial data: {df_apr['agent_id'].nunique()}")
+        logger.info(f"Agent IDs in initial data: {sorted(df_apr['agent_id'].unique().tolist())}")
+        # Check metric_type distribution
+        if 'metric_type' in df_apr.columns:
+            metric_counts = df_apr['metric_type'].value_counts()
+            logger.info(f"Metric type distribution: {metric_counts.to_dict()}")
+        else:
+            logger.warning("No 'metric_type' column found in APR data")
+        # First filter by metric_type == 'APR' to match APR graph logic
+        df_apr_filtered = df_apr[df_apr['metric_type'] == 'APR'].copy()
+        logger.info(f"After metric_type == 'APR' filter: {len(df_apr_filtered)} records")
+        logger.info(f"Unique agents after APR filter: {df_apr_filtered['agent_id'].nunique()}")
+        logger.info(f"Agent IDs after APR filter: {sorted(df_apr_filtered['agent_id'].unique().tolist())}")
+        # Then filter for rows with valid ROI values
+        df_roi = df_apr_filtered[df_apr_filtered['roi'].notna()].copy()
+        logger.info(f"After ROI filter: {len(df_roi)} records")
+        logger.info(f"Unique agents after ROI filter: {df_roi['agent_id'].nunique()}")
+        logger.info(f"Agent IDs after ROI filter: {sorted(df_roi['agent_id'].unique().tolist())}")
+        if not df_roi.empty:
+            # Add metric_type column for consistency
+            df_roi['metric_type'] = 'ROI'
+            logger.info(f"Successfully loaded {len(df_roi)} ROI records from APR CSV")
             global_roi_df = df_roi
+            # Create visualizations using ROI data from APR CSV
+            logger.info("Creating ROI visualizations from APR CSV data...")
             combined_fig = create_combined_roi_time_series_graph(df_roi)
+            return combined_fig, csv_file
         else:
+            logger.warning("No valid ROI data found in APR CSV")
     else:
         logger.warning("APR CSV not available or missing ROI column")
                 yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
             )
+            return fig, None
         # Set global_roi_df for access by other functions
         global_roi_df = df_roi
+        # Create visualizations using API data
+        logger.info("Creating ROI visualizations from API data...")
         combined_fig = create_combined_roi_time_series_graph(df_roi)
+        return combined_fig, None
     except Exception as e:
         logger.error(f"Error fetching ROI data from API: {e}")
     df = df.copy()
     df['date'] = df['timestamp'].dt.date
+    # DEBUG: Log July 8th data specifically
+    july_8_data = df[df['date'] == pd.to_datetime('2025-07-08').date()]
+    if not july_8_data.empty:
+        july_8_agents = july_8_data['agent_id'].unique()
+        logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agents before aggregation: {len(july_8_agents)}")
+        logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agent IDs: {sorted(july_8_agents.tolist())}")
     # NEW: Add detailed logging to verify median calculation
     logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===")
     for agent_id, data in agent_runtimes.items():
         logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
+    # SIMPLIFIED: ROI data is already clean from CSV, just ensure proper data types
+    logger.info("Processing ROI data from CSV...")
     # Remove rows with invalid ROI values
     initial_count = len(df)
     if removed_count > 0:
         logger.warning(f"Removed {removed_count} rows with invalid ROI values")
+    # Ensure proper data types
     df['roi'] = df['roi'].astype(float)
+    df['metric_type'] = df['metric_type'].astype(str)
     # Get min and max time for shapes
     min_time = df['timestamp'].min()
             # Calculate number of active agents on this date
             active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
+            # DEBUG: Log agent counts for July 8th specifically
+            if formatted_timestamp == '2025-07-08':
+                agents_on_date = daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()
+                logger.info(f"ROI GRAPH - July 8th active agents: {active_agents}")
+                logger.info(f"ROI GRAPH - July 8th agent IDs: {sorted(agents_on_date.tolist())}")
             hover_data_roi.append(
                 f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
             )
     min_time = df['timestamp'].min()
     max_time = df['timestamp'].max()
+    # Add shape for positive APR region (above zero) - use reasonable fixed range
     fig.add_shape(
         type="rect",
         fillcolor="rgba(230, 243, 255, 0.3)",
         line=dict(width=0),
+        y0=0, y1=200,  # Fixed positive range to avoid extreme outliers affecting the view
         x0=min_time, x1=max_time,
         layer="below"
     )
+    # Add shape for negative APR region (below zero) - use reasonable fixed range
     fig.add_shape(
         type="rect",
         fillcolor="rgba(255, 230, 230, 0.3)",
         line=dict(width=0),
+        y0=-200, y1=0,  # Fixed negative range to avoid extreme outliers affecting the view
         x0=min_time, x1=max_time,
         layer="below"
     )
     # MODIFIED: Calculate average APR values across all agents for each timestamp
     # Filter for APR data only
+    logger.info("=== APR GRAPH DATA FILTERING DEBUG ===")
+    logger.info(f"Initial APR data loaded: {len(df)} records")
+    logger.info(f"Unique agents in initial data: {df['agent_id'].nunique()}")
+    logger.info(f"Agent IDs in initial data: {sorted(df['agent_id'].unique().tolist())}")
+    # Check metric_type distribution
+    if 'metric_type' in df.columns:
+        metric_counts = df['metric_type'].value_counts()
+        logger.info(f"Metric type distribution: {metric_counts.to_dict()}")
+    else:
+        logger.warning("No 'metric_type' column found in APR data")
     apr_data = df[df['metric_type'] == 'APR'].copy()
+    logger.info(f"After metric_type == 'APR' filter: {len(apr_data)} records")
+    logger.info(f"Unique agents after APR filter: {apr_data['agent_id'].nunique()}")
+    logger.info(f"Agent IDs after APR filter: {sorted(apr_data['agent_id'].unique().tolist())}")
     # Filter APR outliers (±200% range)
     before_outlier_filter = len(apr_data)
         y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
         # Create hover template for the APR moving average line
+        # CONSISTENCY FIX: Use ROI daily agent data for active agent counts
         hover_data_apr = []
         for idx, row in daily_medians_with_ma.iterrows():
             timestamp = row['timestamp']
             # Format timestamp to show only date for daily data
             formatted_timestamp = timestamp.strftime('%Y-%m-%d')
+            # FIXED: Use ROI data to get consistent active agent counts
+            # Load ROI data to get the correct agent counts
+            try:
+                df_roi_for_counts, _ = load_apr_data_from_csv()
+                if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns:
+                    # Filter for ROI data and same date
+                    df_roi_filtered = df_roi_for_counts[
+                        (df_roi_for_counts['metric_type'] == 'APR') &
+                        (df_roi_for_counts['roi'].notna())
+                    ].copy()
+                    # Aggregate daily for ROI data
+                    roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi')
+                    # Get active agents from ROI data for this date
+                    active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
+                else:
+                    # Fallback to APR data if ROI not available
+                    active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
+            except:
+                # Fallback to APR data if there's any error
+                active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
+            # DEBUG: Log agent counts for July 8th specifically
+            if formatted_timestamp == '2025-07-08':
+                logger.info(f"APR GRAPH - July 8th active agents (using ROI logic): {active_agents}")
             hover_data_apr.append(
                 f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
             y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
             # Create hover template for the adjusted APR moving average line
+            # CONSISTENCY FIX: Use ROI daily agent data for active agent counts (same as regular APR)
             hover_data_adj = []
             for idx, row in daily_medians_adjusted_with_ma.iterrows():
                 timestamp = row['timestamp']
                 # Format timestamp to show only date for daily data
                 formatted_timestamp = timestamp.strftime('%Y-%m-%d')
+                # FIXED: Use ROI data to get consistent active agent counts (same logic as APR)
+                try:
+                    df_roi_for_counts, _ = load_apr_data_from_csv()
+                    if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns:
+                        # Filter for ROI data and same date
+                        df_roi_filtered = df_roi_for_counts[
+                            (df_roi_for_counts['metric_type'] == 'APR') &
+                            (df_roi_for_counts['roi'].notna())
+                        ].copy()
+                        # Aggregate daily for ROI data
+                        roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi')
+                        # Get active agents from ROI data for this date
+                        active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
+                    else:
+                        # Fallback to adjusted APR data if ROI not available
+                        active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
+                except:
+                    # Fallback to adjusted APR data if there's any error
+                    active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
+                # DEBUG: Log agent counts for July 8th specifically
+                if formatted_timestamp == '2025-07-08':
+                    logger.info(f"ADJUSTED APR GRAPH - July 8th active agents (using ROI logic): {active_agents}")
                 hover_data_adj.append(
                     f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
         hovermode="closest"
     )
+    # Add single annotation for y-axis with proper spacing
     fig.add_annotation(
         x=-0.08,  # Position further from the y-axis to avoid overlapping with tick labels
+        y=0,      # Center of the y-axis
         xref="paper",
         yref="y",
+        text="Percent drawdown (%) Agent APR (%)",
         showarrow=False,
         font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
         textangle=-90,  # Rotate text to be vertical