import requests import pandas as pd import gradio as gr import plotly.graph_objects as go import plotly.express as px from plotly.subplots import make_subplots from datetime import datetime, timedelta import json # Commenting out blockchain-related imports that cause loading issues # from web3 import Web3 import os import numpy as np import matplotlib.pyplot as plt import matplotlib.dates as mdates import random import logging from typing import List, Dict, Any, Optional # Comment out the import for now and replace with dummy functions # from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations # APR visualization functions integrated directly from fetch_and_preprocess_data import generate_continuous_random_data from initial_value_fixer import fix_apr_and_roi from load_from_csv import ( load_apr_data_from_csv, load_roi_data_from_csv, load_statistics_from_csv, check_csv_data_availability, get_data_freshness_info ) # Set up logging with appropriate verbosity logging.basicConfig( level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity format="%(asctime)s - %(levelname)s - %(message)s", handlers=[ logging.FileHandler("app_debug.log", mode='a'), # Append mode for persistence logging.StreamHandler() # Also log to console ], force=True # Force reconfiguration of logging ) logger = logging.getLogger(__name__) # Ensure the logger level is set correctly logger.setLevel(logging.INFO) # Test logging to verify it's working logger.info("=== LOGGING SYSTEM INITIALIZED ===") logger.info("Debug logs will be written to app_debug.log") # Reduce third-party library logging logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("httpx").setLevel(logging.WARNING) logging.getLogger("matplotlib").setLevel(logging.WARNING) # Log the startup information logger.info("============= APPLICATION STARTING =============") logger.info(f"Running from directory: {os.getcwd()}") # Global variables to store the data for reuse global_df = None global_roi_df = None global_dummy_apr_df = None # Store dummy APR data separately global_dummy_roi_df = None # Store dummy ROI data separately # Configuration API_BASE_URL = "https://afmdb.autonolas.tech" logger.info(f"Using API endpoint: {API_BASE_URL}") def get_agent_type_by_name(type_name: str) -> Dict[str, Any]: """Get agent type by name""" url = f"{API_BASE_URL}/api/agent-types/name/{type_name}" logger.debug(f"Calling API: {url}") try: response = requests.get(url) logger.debug(f"Response status: {response.status_code}") if response.status_code == 404: logger.error(f"Agent type '{type_name}' not found") return None response.raise_for_status() result = response.json() logger.debug(f"Agent type response: {result}") return result except Exception as e: logger.error(f"Error in get_agent_type_by_name: {e}") return None def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]: """Get attribute definition by name""" url = f"{API_BASE_URL}/api/attributes/name/{attr_name}" logger.debug(f"Calling API: {url}") try: response = requests.get(url) logger.debug(f"Response status: {response.status_code}") if response.status_code == 404: logger.error(f"Attribute definition '{attr_name}' not found") return None response.raise_for_status() result = response.json() logger.debug(f"Attribute definition response: {result}") return result except Exception as e: logger.error(f"Error in get_attribute_definition_by_name: {e}") return None def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]: """Get all agents of a specific type""" url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/" logger.debug(f"Calling API: {url}") try: response = requests.get(url) logger.debug(f"Response status: {response.status_code}") if response.status_code == 404: logger.error(f"No agents found for type ID {type_id}") return [] response.raise_for_status() result = response.json() logger.debug(f"Agents count: {len(result)}") logger.debug(f"First few agents: {result[:2] if result else []}") return result except Exception as e: logger.error(f"Error in get_agents_by_type: {e}") return [] def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]: """Get all attribute values for a specific attribute definition across all agents of a given list""" all_attributes = [] logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}") # For each agent, get their attributes and filter for the one we want for agent in agents: agent_id = agent["agent_id"] # Call the /api/agents/{agent_id}/attributes/ endpoint url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/" logger.debug(f"Calling API for agent {agent_id}: {url}") try: response = requests.get(url, params={"limit": 1000}) if response.status_code == 404: logger.error(f"No attributes found for agent ID {agent_id}") continue response.raise_for_status() agent_attrs = response.json() logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes") # Filter for the specific attribute definition ID filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id] logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes") if filtered_attrs: logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}") all_attributes.extend(filtered_attrs) except requests.exceptions.RequestException as e: logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}") logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}") return all_attributes def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str: """Get agent name from agent ID""" for agent in agents: if agent["agent_id"] == agent_id: return agent["agent_name"] return "Unknown" def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]: """Extract APR value, adjusted APR value, ROI value, and timestamp from JSON value""" try: agent_id = attr.get("agent_id", "unknown") logger.debug(f"Extracting APR value for agent {agent_id}") # The APR value is stored in the json_value field if attr["json_value"] is None: logger.debug(f"Agent {agent_id}: json_value is None") return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False} # If json_value is a string, parse it if isinstance(attr["json_value"], str): logger.debug(f"Agent {agent_id}: json_value is string, parsing") json_data = json.loads(attr["json_value"]) else: json_data = attr["json_value"] apr = json_data.get("apr") adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present timestamp = json_data.get("timestamp") address = json_data.get("portfolio_snapshot", {}).get("portfolio", {}).get("address") # Extract ROI (f_i_ratio) from calculation_metrics if it exists roi = None if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None: roi = json_data["calculation_metrics"].get("f_i_ratio") # Filter ROI values to -10 to 10 range if roi is not None and (roi < -10 or roi > 10): roi = None # Exclude ROI values outside the range logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, timestamp: {timestamp}") # Convert timestamp to datetime if it exists timestamp_dt = None if timestamp: timestamp_dt = datetime.fromtimestamp(timestamp) result = json_data.copy() # Copy the original JSON data for logging result.update({ "apr": apr, "adjusted_apr": adjusted_apr, "roi": roi, "timestamp": timestamp_dt, "agent_id": agent_id, "is_dummy": False, "address": address }) logger.debug(f"Agent {agent_id}: Extracted result: {result}") return result except (json.JSONDecodeError, KeyError, TypeError) as e: logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}") logger.error(f"Problematic json_value: {attr.get('json_value')}") return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False, "address": None} def fetch_apr_data_from_db(): """ Fetch APR data from database using the API. """ global global_df global global_roi_df logger.info("==== Starting APR data fetch ====") try: # Step 1: Find the Optimus agent type logger.info("Finding Optimus agent type") optimus_type = get_agent_type_by_name("Optimus") if not optimus_type: logger.error("Optimus agent type not found, using placeholder data") global_df = pd.DataFrame([]) return global_df type_id = optimus_type["type_id"] logger.info(f"Found Optimus agent type with ID: {type_id}") # Step 2: Find the APR attribute definition logger.info("Finding APR attribute definition") apr_attr_def = get_attribute_definition_by_name("APR") if not apr_attr_def: logger.error("APR attribute definition not found, using placeholder data") global_df = pd.DataFrame([]) return global_df attr_def_id = apr_attr_def["attr_def_id"] logger.info(f"Found APR attribute definition with ID: {attr_def_id}") # Step 3: Get all agents of type Optimus logger.info(f"Getting all agents of type Optimus (type_id: {type_id})") optimus_agents = get_agents_by_type(type_id) if not optimus_agents: logger.error("No agents of type 'Optimus' found") global_df = pd.DataFrame([]) return global_df logger.info(f"Found {len(optimus_agents)} Optimus agents") logger.debug(f"Optimus agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in optimus_agents]}") # Step 4: Fetch all APR values for Optimus agents logger.info(f"Fetching APR values for all Optimus agents (attr_def_id: {attr_def_id})") apr_attributes = get_attribute_values_by_type_and_attr(optimus_agents, attr_def_id) if not apr_attributes: logger.error("No APR values found for 'Optimus' agents") global_df = pd.DataFrame([]) return global_df logger.info(f"Found {len(apr_attributes)} APR attributes total") # Step 5: Extract APR and ROI data logger.info("Extracting APR and ROI data from attributes") apr_data_list = [] roi_data_list = [] for attr in apr_attributes: data = extract_apr_value(attr) if data["timestamp"] is not None: # Get agent name agent_name = get_agent_name(attr["agent_id"], optimus_agents) # Add agent name to the data data["agent_name"] = agent_name # Add is_dummy flag (all real data) data["is_dummy"] = False # Process APR data if data["apr"] is not None: # Include all APR values (including negative ones) EXCEPT zero and -100 if data["apr"] != 0 and data["apr"] != -100: apr_entry = data.copy() apr_entry["metric_type"] = "APR" logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}") # Add to the APR data list apr_data_list.append(apr_entry) else: # Log that we're skipping zero or -100 values logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)") # Process ROI data if data["roi"] is not None: # Include all ROI values roi_entry = { "roi": data["roi"], "timestamp": data["timestamp"], "agent_id": data["agent_id"], "agent_name": agent_name, "is_dummy": False, "metric_type": "ROI" } logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}") # Add to the ROI data list roi_data_list.append(roi_entry) logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points") # Added debug for adjusted APR data after May 10th may_10_2025 = datetime(2025, 5, 10) after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025] with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None] logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}") logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}") # Log detailed information about when data began first_adjusted = None if with_adjusted_after_may_10: first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp']) logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})") # Check all data for first adjusted_apr all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None] if all_with_adjusted: first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp']) logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})") last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp']) logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})") # Calculate overall coverage adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100 logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)") # Log per-agent adjusted APR statistics agent_stats = {} for record in apr_data_list: agent_id = record['agent_id'] has_adjusted = record['adjusted_apr'] is not None if agent_id not in agent_stats: agent_stats[agent_id] = {'total': 0, 'adjusted': 0} agent_stats[agent_id]['total'] += 1 if has_adjusted: agent_stats[agent_id]['adjusted'] += 1 # Log stats for agents with meaningful data for agent_id, stats in agent_stats.items(): if stats['total'] > 0: coverage = (stats['adjusted'] / stats['total']) * 100 if coverage > 0: # Only log agents that have at least some adjusted data logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)") # Check for gaps in adjusted APR data for agent_id in agent_stats: # Get all records for this agent agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id] # Sort by timestamp agent_records.sort(key=lambda x: x['timestamp']) # Find where adjusted APR starts and if there are gaps has_adjusted = False gap_count = 0 streak_length = 0 for record in agent_records: if record['adjusted_apr'] is not None: if not has_adjusted: has_adjusted = True logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}") streak_length += 1 elif has_adjusted: # We had adjusted data but now it's missing gap_count += 1 if streak_length > 0: logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records") streak_length = 0 if gap_count > 0: logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data") elif has_adjusted: logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps") # Provide summary statistics agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0) agents_with_gaps = sum(1 for agent_id in agent_stats if any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and apr_data_list[i+1]['adjusted_apr'] is None for i in range(len(apr_data_list)-1))) logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data") if agents_with_gaps > 0: logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data") logger.warning("These gaps may cause discontinuities in the adjusted APR graph") else: logger.info("No gaps detected in adjusted APR data - graph should be continuous") if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0: logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data") # Log agent IDs with missing adjusted_apr after May 10th agents_after_may_10 = set(d['agent_id'] for d in after_may_10) logger.info(f"Agents with data after May 10th: {agents_after_may_10}") # Check these same agents before May 10th before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025] agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None} # Agents that had adjusted_apr before but not after missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10) if missing_adjusted: logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}") # Find the last valid adjusted_apr date for these agents for agent_id in missing_adjusted: agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None] if agent_data: last_entry = max(agent_data, key=lambda d: d['timestamp']) logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}") # Look at the first entry after the cutoff without adjusted_apr agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id] if agent_after: first_after = min(agent_after, key=lambda d: d['timestamp']) logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr") # If the agent data has the 'adjusted_apr_key' field, log that info if 'adjusted_apr_key' in first_after: logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}") # Add debug logic to check for any adjusted_apr after May 10th and which agents have it elif len(with_adjusted_after_may_10) > 0: logger.info("Found adjusted_apr values after May 10th, 2025") # Group by agent and log agent_counts = {} for item in with_adjusted_after_may_10: agent_id = item['agent_id'] if agent_id in agent_counts: agent_counts[agent_id] += 1 else: agent_counts[agent_id] = 1 logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}") # Log adjusted_apr keys used keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item} if keys_used: logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}") # Convert to DataFrames if not apr_data_list: logger.error("No valid APR data extracted") global_df = pd.DataFrame([]) else: # Convert list of dictionaries to DataFrame for APR global_df = pd.DataFrame(apr_data_list) if not roi_data_list: logger.error("No valid ROI data extracted") global_roi_df = pd.DataFrame([]) else: # Convert list of dictionaries to DataFrame for ROI global_roi_df = pd.DataFrame(roi_data_list) # Handle dummy data generation global global_dummy_apr_df global global_dummy_roi_df logger.info("Handling dummy data...") # Generate dummy APR data only if needed if not global_df.empty: # Check if we already have dummy data if global_dummy_apr_df is None: # First time - generate all dummy data logger.info("Generating initial dummy APR data...") global_dummy_apr_df = generate_continuous_random_data(global_df) # Only keep APR data if not global_dummy_apr_df.empty: global_dummy_apr_df = global_dummy_apr_df[global_dummy_apr_df['metric_type'] == 'APR'] logger.info(f"Generated {len(global_dummy_apr_df)} initial dummy APR data points") else: # We already have dummy data - check if we need to generate more # Find the latest timestamp in the real data latest_real_timestamp = global_df['timestamp'].max() # Find the latest timestamp in the dummy data latest_dummy_timestamp = global_dummy_apr_df['timestamp'].max() if not global_dummy_apr_df.empty else None # If the real data has newer timestamps, generate more dummy data if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp: logger.info("Generating additional dummy APR data for new timestamps...") # Create a temporary dataframe with only the latest real data temp_df = global_df[global_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_df # Generate dummy data for the new timestamps new_dummy_data = generate_continuous_random_data(temp_df) # Only keep APR data if not new_dummy_data.empty: new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'APR'] logger.info(f"Generated {len(new_dummy_data)} additional dummy APR data points") # Append the new dummy data to the existing dummy data global_dummy_apr_df = pd.concat([global_dummy_apr_df, new_dummy_data], ignore_index=True) else: logger.info("No new timestamps in real data, using existing dummy APR data") # Combine real and dummy APR data if not global_dummy_apr_df.empty: apr_dummy_count = len(global_dummy_apr_df) global_df = pd.concat([global_df, global_dummy_apr_df], ignore_index=True) logger.info(f"Added {apr_dummy_count} dummy APR data points to the dataset") # Generate dummy ROI data only if needed if not global_roi_df.empty: # Check if we already have dummy data if global_dummy_roi_df is None: # First time - generate all dummy data logger.info("Generating initial dummy ROI data...") global_dummy_roi_df = generate_continuous_random_data(global_roi_df) # Only keep ROI data if not global_dummy_roi_df.empty: global_dummy_roi_df = global_dummy_roi_df[global_dummy_roi_df['metric_type'] == 'ROI'] logger.info(f"Generated {len(global_dummy_roi_df)} initial dummy ROI data points") else: # We already have dummy data - check if we need to generate more # Find the latest timestamp in the real data latest_real_timestamp = global_roi_df['timestamp'].max() # Find the latest timestamp in the dummy data latest_dummy_timestamp = global_dummy_roi_df['timestamp'].max() if not global_dummy_roi_df.empty else None # If the real data has newer timestamps, generate more dummy data if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp: logger.info("Generating additional dummy ROI data for new timestamps...") # Create a temporary dataframe with only the latest real data temp_df = global_roi_df[global_roi_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_roi_df # Generate dummy data for the new timestamps new_dummy_data = generate_continuous_random_data(temp_df) # Only keep ROI data if not new_dummy_data.empty: new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'ROI'] logger.info(f"Generated {len(new_dummy_data)} additional dummy ROI data points") # Append the new dummy data to the existing dummy data global_dummy_roi_df = pd.concat([global_dummy_roi_df, new_dummy_data], ignore_index=True) else: logger.info("No new timestamps in real data, using existing dummy ROI data") # Combine real and dummy ROI data if not global_dummy_roi_df.empty: roi_dummy_count = len(global_dummy_roi_df) global_roi_df = pd.concat([global_roi_df, global_dummy_roi_df], ignore_index=True) logger.info(f"Added {roi_dummy_count} dummy ROI data points to the dataset") # Log the resulting dataframe logger.info(f"Created DataFrame with {len(global_df)} rows (including dummy data)") logger.info(f"DataFrame columns: {global_df.columns.tolist()}") logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}") # Log adjusted APR statistics if available if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any(): logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}") logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}") # Log the difference between APR and adjusted APR valid_rows = global_df[global_df['adjusted_apr'].notna()] if not valid_rows.empty: avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean() max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max() min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min() logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}") # All values are APR type (excluding zero and -100 values) logger.info("All values are APR type (excluding zero and -100 values)") logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}") # Log the entire dataframe for debugging logger.debug("Final DataFrame contents:") for idx, row in global_df.iterrows(): logger.debug(f"Row {idx}: {row.to_dict()}") # Add this at the end, right before returning logger.info("Analyzing adjusted_apr data availability...") log_adjusted_apr_availability(global_df) return global_df, global_roi_df except requests.exceptions.RequestException as e: logger.error(f"API request error: {e}") global_df = pd.DataFrame([]) global_roi_df = pd.DataFrame([]) return global_df, global_roi_df except Exception as e: logger.error(f"Error fetching APR data: {e}") logger.exception("Exception traceback:") global_df = pd.DataFrame([]) global_roi_df = pd.DataFrame([]) return global_df, global_roi_df def log_adjusted_apr_availability(df): """ Analyzes and logs detailed information about adjusted_apr data availability. Args: df: DataFrame containing the APR data with adjusted_apr column """ if df.empty or 'adjusted_apr' not in df.columns: logger.warning("No adjusted_apr data available for analysis") return # Get only rows with valid adjusted_apr values has_adjusted = df[df['adjusted_apr'].notna()] if has_adjusted.empty: logger.warning("No valid adjusted_apr values found in the dataset") return # 1. When did adjusted_apr data start? first_adjusted = has_adjusted['timestamp'].min() last_adjusted = has_adjusted['timestamp'].max() logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}") logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}") logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days") # Calculate coverage percentage total_records = len(df) records_with_adjusted = len(has_adjusted) coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0 logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)") # 2. How many agents are providing adjusted_apr? agents_with_adjusted = has_adjusted['agent_id'].unique() logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr") logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}") # 3. May 10th cutoff analysis may_10_2025 = datetime(2025, 5, 10) before_cutoff = df[df['timestamp'] < may_10_2025] after_cutoff = df[df['timestamp'] >= may_10_2025] if not before_cutoff.empty and not after_cutoff.empty: before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum() before_pct = (before_with_adjusted / len(before_cutoff)) * 100 after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum() after_pct = (after_with_adjusted / len(after_cutoff)) * 100 logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)") logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)") # Check which agents had data before and after agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) missing_after = agents_before - agents_after if missing_after: logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}") new_after = agents_after - agents_before if new_after: logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}") # 4. Find date ranges for missing adjusted_apr # Group by agent to analyze per-agent data availability logger.info("=== DETAILED AGENT ANALYSIS ===") for agent_id in df['agent_id'].unique(): agent_data = df[df['agent_id'] == agent_id] agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}" # Get the valid adjusted_apr values for this agent agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()] if agent_adjusted.empty: logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available") continue # Get the date range for this agent's data agent_start = agent_data['timestamp'].min() agent_end = agent_data['timestamp'].max() # Get the date range for adjusted_apr data adjusted_start = agent_adjusted['timestamp'].min() adjusted_end = agent_adjusted['timestamp'].max() total_agent_records = len(agent_data) agent_with_adjusted = len(agent_adjusted) coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0 logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)") logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}") logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}") # Calculate if this agent had data before/after May 10th if not before_cutoff.empty and not after_cutoff.empty: agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id] agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id] has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any() has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any() if has_before and not has_after: last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max() logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}") elif not has_before and has_after: first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min() logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}") # Check for gaps in adjusted_apr (periods of 24+ hours without data) if len(agent_adjusted) < 2: continue # Sort by timestamp sorted_data = agent_adjusted.sort_values('timestamp') # Calculate time differences between consecutive data points time_diffs = sorted_data['timestamp'].diff() # Find gaps larger than 24 hours gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)] if not gaps.empty: logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data") # Log the gaps for i, row in gaps.iterrows(): # Find the previous timestamp before the gap prev_idx = sorted_data.index.get_loc(i) - 1 prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None if prev_time: gap_start = prev_time gap_end = row['timestamp'] gap_duration = gap_end - gap_start logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)") def generate_apr_visualizations(): """Generate APR visualizations using CSV data only for consistency with ROI graph""" global global_df # CONSISTENCY FIX: Always use CSV data to match ROI graph behavior logger.info("Loading APR data from CSV files for consistency with ROI graph...") df, csv_file = load_apr_data_from_csv() if not df.empty: logger.info(f"Successfully loaded APR data from CSV: {len(df)} records") global_df = df # Create visualizations using CSV data logger.info("Creating APR visualizations from CSV data...") combined_fig = create_combined_time_series_graph(df) return combined_fig, csv_file # FALLBACK: If CSV not available, return error message logger.error("CSV data not available and API fallback disabled for consistency") # Create empty visualization with a message using Plotly fig = go.Figure() fig.add_annotation( x=0.5, y=0.5, text="No APR data available - CSV file missing", font=dict(size=20), showarrow=False ) fig.update_layout( xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ) return fig, None def generate_roi_visualizations(): """Generate ROI visualizations directly from optimus_apr_values.csv""" global global_roi_df # SIMPLIFIED APPROACH: Load ROI data directly from APR CSV logger.info("Loading ROI data directly from optimus_apr_values.csv...") df_apr, csv_file = load_apr_data_from_csv() if not df_apr.empty and 'roi' in df_apr.columns: # CONSISTENCY FIX: Apply same filtering as APR graph logger.info("=== ROI GRAPH DATA FILTERING DEBUG ===") logger.info(f"Initial APR data loaded: {len(df_apr)} records") logger.info(f"Unique agents in initial data: {df_apr['agent_id'].nunique()}") logger.info(f"Agent IDs in initial data: {sorted(df_apr['agent_id'].unique().tolist())}") # Check metric_type distribution if 'metric_type' in df_apr.columns: metric_counts = df_apr['metric_type'].value_counts() logger.info(f"Metric type distribution: {metric_counts.to_dict()}") else: logger.warning("No 'metric_type' column found in APR data") # First filter by metric_type == 'APR' to match APR graph logic df_apr_filtered = df_apr[df_apr['metric_type'] == 'APR'].copy() logger.info(f"After metric_type == 'APR' filter: {len(df_apr_filtered)} records") logger.info(f"Unique agents after APR filter: {df_apr_filtered['agent_id'].nunique()}") logger.info(f"Agent IDs after APR filter: {sorted(df_apr_filtered['agent_id'].unique().tolist())}") # Then filter for rows with valid ROI values df_roi = df_apr_filtered[df_apr_filtered['roi'].notna()].copy() logger.info(f"After ROI filter: {len(df_roi)} records") logger.info(f"Unique agents after ROI filter: {df_roi['agent_id'].nunique()}") logger.info(f"Agent IDs after ROI filter: {sorted(df_roi['agent_id'].unique().tolist())}") if not df_roi.empty: # Add metric_type column for consistency df_roi['metric_type'] = 'ROI' logger.info(f"Successfully loaded {len(df_roi)} ROI records from APR CSV") global_roi_df = df_roi # Create visualizations using ROI data from APR CSV logger.info("Creating ROI visualizations from APR CSV data...") combined_fig = create_combined_roi_time_series_graph(df_roi) return combined_fig, csv_file else: logger.warning("No valid ROI data found in APR CSV") else: logger.warning("APR CSV not available or missing ROI column") # FALLBACK: If CSV not available, try API logger.info("CSV data not available, falling back to API...") try: # Fetch data from database if not already fetched if global_roi_df is None or global_roi_df.empty: _, df_roi = fetch_apr_data_from_db() else: df_roi = global_roi_df # If we got no data at all, return placeholder figures if df_roi.empty: logger.info("No ROI data available from API either. Using fallback visualization.") # Create empty visualizations with a message using Plotly fig = go.Figure() fig.add_annotation( x=0.5, y=0.5, text="No ROI data available", font=dict(size=20), showarrow=False ) fig.update_layout( xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ) return fig, None # Set global_roi_df for access by other functions global_roi_df = df_roi # Create visualizations using API data logger.info("Creating ROI visualizations from API data...") combined_fig = create_combined_roi_time_series_graph(df_roi) return combined_fig, None except Exception as e: logger.error(f"Error fetching ROI data from API: {e}") # Return error visualization fig = go.Figure() fig.add_annotation( x=0.5, y=0.5, text=f"Error loading data: {str(e)}", font=dict(size=16, color="red"), showarrow=False ) fig.update_layout( xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False) ) return fig, None def aggregate_daily_data(df, metric_column): """ Aggregate data by date and agent, taking the median of values within each day. Args: df: DataFrame with timestamp, agent_id, and metric data metric_column: Name of the metric column ('apr' or 'roi') Returns: DataFrame with daily aggregated data per agent """ if df.empty: return df # Convert timestamp to date only (ignore time) df = df.copy() df['date'] = df['timestamp'].dt.date # DEBUG: Log July 8th data specifically july_8_data = df[df['date'] == pd.to_datetime('2025-07-08').date()] if not july_8_data.empty: july_8_agents = july_8_data['agent_id'].unique() logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agents before aggregation: {len(july_8_agents)}") logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agent IDs: {sorted(july_8_agents.tolist())}") # NEW: Add detailed logging to verify median calculation logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===") # Find days with multiple data points per agent to show the difference sample_groups = df.groupby(['date', 'agent_id']).size() multi_point_days = sample_groups[sample_groups > 1].head(10) # Show up to 10 examples logger.info(f"Found {len(multi_point_days)} agent-days with multiple data points (showing up to 10):") mean_median_differences = [] for (date, agent_id), count in multi_point_days.items(): day_data = df[(df['date'] == date) & (df['agent_id'] == agent_id)] values = day_data[metric_column].tolist() calculated_mean = day_data[metric_column].mean() calculated_median = day_data[metric_column].median() agent_name = day_data['agent_name'].iloc[0] if not day_data.empty else f"Agent {agent_id}" difference = abs(calculated_mean - calculated_median) mean_median_differences.append(difference) logger.info(f" {agent_name} on {date}: {count} values = {values}") logger.info(f" MEAN: {calculated_mean:.4f}, MEDIAN: {calculated_median:.4f}, DIFF: {difference:.4f}") # Summary statistics if mean_median_differences: avg_difference = sum(mean_median_differences) / len(mean_median_differences) max_difference = max(mean_median_differences) logger.info(f"Mean vs Median differences - Avg: {avg_difference:.4f}, Max: {max_difference:.4f}") else: logger.info("No days found with multiple data points per agent") # Show total distribution of data points per day single_point_days = len(sample_groups[sample_groups == 1]) multi_point_days_count = len(sample_groups[sample_groups > 1]) logger.info(f"Data distribution: {single_point_days} agent-days with 1 point, {multi_point_days_count} agent-days with multiple points") # Group by date and agent, calculate median for each day daily_agent_data = df.groupby(['date', 'agent_id']).agg({ metric_column: 'mean', 'agent_name': 'first', 'is_dummy': 'first', 'metric_type': 'first' }).reset_index() # Convert date back to datetime for plotting daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date']) # Log a few sample median values from the result logger.info(f"Sample calculated median values:") for i, row in daily_agent_data.head(5).iterrows(): logger.info(f" {row['agent_name']} on {row['date']}: median {metric_column} = {row[metric_column]:.4f}") logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column} using MEDIAN") return daily_agent_data def calculate_daily_medians(daily_agent_data, metric_column): """ Calculate daily medians across all agents for each date. Args: daily_agent_data: DataFrame with daily aggregated data per agent metric_column: Name of the metric column ('apr' or 'roi') Returns: DataFrame with daily median values """ if daily_agent_data.empty: return daily_agent_data # For each date, calculate median across all agents (excluding missing data) daily_medians = daily_agent_data.groupby('date').agg({ metric_column: 'median' }).reset_index() # Convert date back to datetime for plotting daily_medians['timestamp'] = pd.to_datetime(daily_medians['date']) logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}") return daily_medians def calculate_moving_average_medians(daily_medians, metric_column, window_days=7): """ Calculate moving average of daily medians using a specified time window. Args: daily_medians: DataFrame with daily median values metric_column: Name of the metric column ('apr' or 'roi') window_days: Number of days for the moving average window Returns: DataFrame with moving average values added """ if daily_medians.empty: return daily_medians # Sort by timestamp daily_medians = daily_medians.sort_values('timestamp').copy() # Initialize moving average column daily_medians['moving_avg'] = None # Define the time window time_window = pd.Timedelta(days=window_days) logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}") # Calculate moving averages for each timestamp for i, row in daily_medians.iterrows(): current_time = row['timestamp'] window_start = current_time - time_window # Get all median values within the time window window_data = daily_medians[ (daily_medians['timestamp'] >= window_start) & (daily_medians['timestamp'] <= current_time) ] # Calculate the average of medians for the time window if not window_data.empty: daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean() else: # If no data points in the window, use the current value daily_medians.at[i, 'moving_avg'] = row[metric_column] logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points") return daily_medians def create_combined_roi_time_series_graph(df): """Create a time series graph showing daily median ROI values with 7-day moving average""" if len(df) == 0: logger.error("No data to plot combined ROI graph") fig = go.Figure() fig.add_annotation( text="No ROI data available", x=0.5, y=0.5, showarrow=False, font=dict(size=20) ) return fig # Calculate runtime for each agent from their actual first data point logger.info(f"Calculating runtime for each agent from their actual start date") agent_runtimes = {} for agent_id in df['agent_id'].unique(): agent_data = df[df['agent_id'] == agent_id] agent_name = agent_data['agent_name'].iloc[0] first_report = agent_data['timestamp'].min() # Agent's actual start date last_report = agent_data['timestamp'].max() # Agent's last report runtime_days = (last_report - first_report).total_seconds() / (24 * 3600) # Convert to days agent_runtimes[agent_id] = { 'agent_name': agent_name, 'first_report': first_report, 'last_report': last_report, 'runtime_days': runtime_days } # Calculate average runtime avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0 logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days") # Log individual agent runtimes for debugging for agent_id, data in agent_runtimes.items(): logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}") # SIMPLIFIED: ROI data is already clean from CSV, just ensure proper data types logger.info("Processing ROI data from CSV...") # Remove rows with invalid ROI values initial_count = len(df) df = df[df['roi'].notna()] final_count = len(df) removed_count = initial_count - final_count if removed_count > 0: logger.warning(f"Removed {removed_count} rows with invalid ROI values") # Ensure proper data types df['roi'] = df['roi'].astype(float) df['metric_type'] = df['metric_type'].astype(str) # Get min and max time for shapes min_time = df['timestamp'].min() max_time = df['timestamp'].max() # Use the actual start date from the data instead of a fixed date x_start_date = min_time # CRITICAL: Log the exact dataframe we're using for plotting to help debug logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}") logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}") logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}") # Export full dataframe to CSV for debugging debug_csv = "debug_roi_data.csv" df.to_csv(debug_csv) logger.info(f"Exported ROI graph data to {debug_csv} for debugging") # Create Plotly figure in a clean state fig = go.Figure() # Get min and max time for shapes min_time = df['timestamp'].min() max_time = df['timestamp'].max() # Add background shapes for positive and negative regions # Add shape for positive ROI region (above zero) - use reasonable fixed range fig.add_shape( type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), y0=0, y1=10, # Fixed positive range to avoid extreme outliers affecting the view x0=min_time, x1=max_time, layer="below" ) # Add shape for negative ROI region (below zero) - use reasonable fixed range fig.add_shape( type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), y0=-10, y1=0, # Fixed negative range to avoid extreme outliers affecting the view x0=min_time, x1=max_time, layer="below" ) # Add zero line fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=min_time, x1=max_time ) # Filter ROI outliers for better visualization (±200% range) before_outlier_filter = len(df) df = df[(df['roi'] <= 200) & (df['roi'] >= -200)] after_outlier_filter = len(df) excluded_by_outlier = before_outlier_filter - after_outlier_filter logger.info(f"ROI outlier filtering: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded)") # IMPORTANT: Filter data by hardcoded date range (June 6 to July 8, 2025) min_date = datetime(2025, 6, 6) max_date = datetime(2025, 7, 21, 23, 59, 59) # Include all of July 8th logger.info(f"Filtering ROI data to date range: {min_date} to {max_date}") # Count data points before filtering before_filter_count = len(df) # Apply date filter df = df[(df['timestamp'] >= min_date) & (df['timestamp'] <= max_date)] # Count data points after filtering after_filter_count = len(df) excluded_by_date = before_filter_count - after_filter_count logger.info(f"ROI Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)") # NEW APPROACH: Daily aggregation and median calculation # Step 1: Aggregate data daily per agent (mean of values within each day) daily_agent_data = aggregate_daily_data(df, 'roi') # Step 2: Calculate daily medians across all agents daily_medians = calculate_daily_medians(daily_agent_data, 'roi') # Step 3: Calculate 7-day moving average of daily medians daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7) logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians") # Find the last date where we have valid moving average data last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None # If we don't have any valid moving average data, use the max time from the original data last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max() logger.info(f"Last valid moving average date: {last_valid_ma_date}") logger.info(f"Using last valid date for graph: {last_valid_date}") # Plot individual agent daily data points with agent names in hover, but limit display for scalability if not daily_agent_data.empty: # Group by agent to use different colors for each agent unique_agents = daily_agent_data['agent_name'].unique() colors = px.colors.qualitative.Plotly[:len(unique_agents)] # Create a color map for agents color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} # Calculate the total number of data points per agent to determine which are most active agent_counts = daily_agent_data['agent_name'].value_counts() # Determine how many agents to show individually (limit to top 5 most active) MAX_VISIBLE_AGENTS = 5 top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") # Add daily aggregated data points for each agent, but only make top agents visible by default for agent_name in unique_agents: agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name] # Explicitly convert to Python lists x_values = agent_data['timestamp'].tolist() y_values = agent_data['roi'].tolist() # Change default visibility to False to hide all agent data points is_visible = False # Add data points as markers for ROI fig.add_trace( go.Scatter( x=x_values, y=y_values, mode='markers', # Only markers for original data marker=dict( color=color_map[agent_name], symbol='circle', size=10, line=dict(width=1, color='black') ), name=f'Agent: {agent_name} (Daily ROI)', hovertemplate='Time: %{x}
Daily ROI: %{y:.2f}%
Agent: ' + agent_name + '', visible=is_visible # All agents hidden by default ) ) logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") # Add ROI 7-day moving average of daily medians as a smooth line x_values_ma = daily_medians_with_ma['timestamp'].tolist() y_values_ma = daily_medians_with_ma['moving_avg'].tolist() # Create hover template for the ROI moving average line hover_data_roi = [] for idx, row in daily_medians_with_ma.iterrows(): timestamp = row['timestamp'] # Format timestamp to show only date for daily data formatted_timestamp = timestamp.strftime('%Y-%m-%d') # Calculate number of active agents on this date active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) # DEBUG: Log agent counts for July 8th specifically if formatted_timestamp == '2025-07-08': agents_on_date = daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique() logger.info(f"ROI GRAPH - July 8th active agents: {active_agents}") logger.info(f"ROI GRAPH - July 8th agent IDs: {sorted(agents_on_date.tolist())}") hover_data_roi.append( f"Date: {formatted_timestamp}
Median ROI (7d window): {row['moving_avg']:.2f}%
Active agents: {active_agents}" ) fig.add_trace( go.Scatter( x=x_values_ma, y=y_values_ma, mode='lines', # Only lines for moving average line=dict(color='blue', width=3, shape='spline', smoothing=1.3), # Smooth curved line like APR name='Median ROI (7d window)', hovertext=hover_data_roi, hoverinfo='text', visible=True # Visible by default ) ) logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points") # Update layout with average runtime information in the title fig.update_layout( title=dict( text=f"Optimus Agents ROI (over avg. {avg_runtime:.1f} days runtime)", font=dict( family="Arial, sans-serif", size=22, color="black", weight="bold" ) ), xaxis_title=None, # Remove x-axis title to use annotation instead yaxis_title=None, # Remove the y-axis title as we'll use annotations instead template="plotly_white", height=600, # Reduced height for better fit on smaller screens autosize=True, # Enable auto-sizing for responsiveness legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, groupclick="toggleitem" ), margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title hovermode="closest" ) # Add single annotation for y-axis fig.add_annotation( x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels y=0, # Center of the y-axis xref="paper", yref="y", text="ROI [%]", showarrow=False, font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size textangle=-90, # Rotate text to be vertical align="center" ) # Update layout for legend fig.update_layout( legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, groupclick="toggleitem", font=dict( family="Arial, sans-serif", size=14, # Adjusted font size color="black", weight="bold" ) ) ) # Update y-axis with clipping at -5 fig.update_yaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', range=[-5, 10], # Clip bottom at -5, reasonable top at 10 tickformat=".1f", # Format tick labels with 1 decimal place for better precision tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size title=None # Remove the built-in axis title since we're using annotations ) # Update x-axis with better formatting and hardcoded date range (June 6 to July 8) min_date = datetime(2025, 6, 6) # Hardcoded start date: June 6, 2025 max_date = datetime(2025, 7, 21) # Hardcoded end date: July 8, 2025 logger.info(f"ROI Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}") fig.update_xaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', # Set hardcoded range from June 6 to June 17, 2025 range=[min_date, max_date], autorange=False, # Explicitly disable autoscale tickformat="%b %d", # Simplified date format without time tickangle=-30, # Angle the labels for better readability tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size title=None # Remove built-in title to use annotation instead ) try: # Save the figure graph_file = "optimus_roi_graph.html" fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) # Also save as image for compatibility img_file = "optimus_roi_graph.png" try: fig.write_image(img_file) logger.info(f"ROI graph saved to {graph_file} and {img_file}") except Exception as e: logger.error(f"Error saving ROI image: {e}") logger.info(f"ROI graph saved to {graph_file} only") # Return the figure object for direct use in Gradio return fig except Exception as e: # If the complex graph approach fails, create a simpler one logger.error(f"Error creating advanced ROI graph: {e}") logger.info("Falling back to Simpler ROI graph") # Create a simpler graph as fallback simple_fig = go.Figure() # Add zero line simple_fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=min_time, x1=max_time ) # Add background shapes with fixed reasonable ranges simple_fig.add_shape( type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), y0=0, y1=10, # Fixed positive range to avoid extreme outliers affecting the view x0=min_time, x1=max_time, layer="below" ) simple_fig.add_shape( type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), y0=-10, y1=0, # Fixed negative range to avoid extreme outliers affecting the view x0=min_time, x1=max_time, layer="below" ) # Simply plot the average ROI data with moving average if not avg_roi_data.empty: # Add moving average as a line simple_fig.add_trace( go.Scatter( x=avg_roi_data_with_ma['timestamp'], y=avg_roi_data_with_ma['moving_avg'], mode='lines', name='Average ROI (3d window)', line=dict(width=2, color='blue') # Thinner line ) ) # Simplified layout with adjusted y-axis range simple_fig.update_layout( title=dict( text="Optimus Agents ROI", font=dict( family="Arial, sans-serif", size=22, color="black", weight="bold" ) ), xaxis_title=None, yaxis_title=None, template="plotly_white", height=600, autosize=True, margin=dict(r=30, l=120, t=40, b=50) ) # Update y-axis with fixed range for ROI (-10 to 10) simple_fig.update_yaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', range=[-10, 10], # Set fixed range from -10 to 10 tickformat=".2f", tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), title=None # Remove the built-in axis title since we're using annotations ) # Update x-axis with better formatting and autoscaling simple_fig.update_xaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', autorange=True, # Enable autoscaling tickformat="%b %d", tickangle=-30, tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") ) # Save the figure graph_file = "optimus_roi_graph.html" simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) # Return the simple figure return simple_fig def save_roi_to_csv(df): """Save the ROI data DataFrame to a CSV file and return the file path""" if df.empty: logger.error("No ROI data to save to CSV") return None # Define the CSV file path csv_file = "optimus_roi_values.csv" # Save to CSV df.to_csv(csv_file, index=False) logger.info(f"ROI data saved to {csv_file}") return csv_file def create_time_series_graph_per_agent(df): """Create a time series graph for each agent using Plotly""" # Get unique agents unique_agents = df['agent_id'].unique() if len(unique_agents) == 0: logger.error("No agent data to plot") fig = go.Figure() fig.add_annotation( text="No agent data available", x=0.5, y=0.5, showarrow=False, font=dict(size=20) ) return fig # Create a subplot figure for each agent fig = make_subplots(rows=len(unique_agents), cols=1, subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}" for agent_id in unique_agents], vertical_spacing=0.1) # Plot data for each agent for i, agent_id in enumerate(unique_agents): agent_data = df[df['agent_id'] == agent_id].copy() agent_name = agent_data['agent_name'].iloc[0] row = i + 1 # Add zero line to separate APR and Performance fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), row=row, col=1 ) # Add background colors with dynamic values fig.add_shape( type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), y0=0, y1=agent_data['apr'].max() * 1.1 if not agent_data.empty else 10, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), row=row, col=1, layer="below" ) fig.add_shape( type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), y0=agent_data['apr'].min() * 1.1 if not agent_data.empty else -10, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(), row=row, col=1, layer="below" ) # Create separate dataframes for different data types apr_data = agent_data[agent_data['metric_type'] == 'APR'] perf_data = agent_data[agent_data['metric_type'] == 'Performance'] # Sort all data by timestamp for the line plots combined_agent_data = agent_data.sort_values('timestamp') # Add main line connecting all points fig.add_trace( go.Scatter( x=combined_agent_data['timestamp'], y=combined_agent_data['apr'], mode='lines', line=dict(color='purple', width=2), name=f'{agent_name}', legendgroup=agent_name, showlegend=(i == 0), # Only show in legend once hovertemplate='Time: %{x}
Value: %{y:.2f}' ), row=row, col=1 ) # Add scatter points for APR values if not apr_data.empty: fig.add_trace( go.Scatter( x=apr_data['timestamp'], y=apr_data['apr'], mode='markers', marker=dict(color='blue', size=10, symbol='circle'), name='APR', legendgroup='APR', showlegend=(i == 0), hovertemplate='Time: %{x}
APR: %{y:.2f}' ), row=row, col=1 ) # Add scatter points for Performance values if not perf_data.empty: fig.add_trace( go.Scatter( x=perf_data['timestamp'], y=perf_data['apr'], mode='markers', marker=dict(color='red', size=10, symbol='square'), name='Performance', legendgroup='Performance', showlegend=(i == 0), hovertemplate='Time: %{x}
Performance: %{y:.2f}' ), row=row, col=1 ) # Update axes fig.update_xaxes(title_text="Time", row=row, col=1) fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)') # Update layout fig.update_layout( height=400 * len(unique_agents), width=1000, title_text="APR and Performance Values per Agent", template="plotly_white", legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1 ), margin=dict(r=20, l=20, t=30, b=20), hovermode="closest" ) # Save the figure (still useful for reference) graph_file = "optimus_apr_per_agent_graph.html" fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) # Also save as image for compatibility img_file = "optimus_apr_per_agent_graph.png" fig.write_image(img_file) logger.info(f"Per-agent graph saved to {graph_file} and {img_file}") # Return the figure object for direct use in Gradio return fig def write_debug_info(df, fig): """Minimal debug info function""" try: # Just log minimal information logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces") return True except Exception as e: logger.error(f"Error writing debug info: {e}") return False def create_combined_time_series_graph(df): """Create a time series graph showing average APR values across all agents""" if len(df) == 0: logger.error("No data to plot combined graph") fig = go.Figure() fig.add_annotation( text="No data available", x=0.5, y=0.5, showarrow=False, font=dict(size=20) ) return fig # IMPORTANT: Force data types to ensure consistency df['apr'] = df['apr'].astype(float) # Ensure APR is float df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string # Get min and max time for shapes min_time = df['timestamp'].min() max_time = df['timestamp'].max() # Use the actual start date from the data instead of a fixed date x_start_date = min_time # CRITICAL: Log the exact dataframe we're using for plotting to help debug logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}") logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}") logger.info("Graph data - all positive APR values only") logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}") # Export full dataframe to CSV for debugging debug_csv = "debug_graph_data.csv" df.to_csv(debug_csv) logger.info(f"Exported graph data to {debug_csv} for debugging") # Write detailed data report with open("debug_graph_data_report.txt", "w") as f: f.write("==== GRAPH DATA REPORT ====\n\n") f.write(f"Total data points: {len(df)}\n") f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n") # Output per-agent details unique_agents = df['agent_id'].unique() f.write(f"Number of agents: {len(unique_agents)}\n\n") for agent_id in unique_agents: agent_data = df[df['agent_id'] == agent_id] agent_name = agent_data['agent_name'].iloc[0] f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n") f.write(f" Total data points: {len(agent_data)}\n") apr_data = agent_data[agent_data['metric_type'] == 'APR'] f.write(f" APR data points: {len(apr_data)}\n") if not apr_data.empty: f.write(f" APR values: {apr_data['apr'].tolist()}\n") f.write(f" APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n") f.write("\n") logger.info("Generated detailed graph data report") # ENSURE THERE ARE NO CONFLICTING AXES OR TRACES # Create Plotly figure in a clean state fig = go.Figure() # Enable autoscaling instead of fixed ranges logger.info("Using autoscaling for axes ranges") # Add background shapes for APR and Performance regions min_time = df['timestamp'].min() max_time = df['timestamp'].max() # Add shape for positive APR region (above zero) - use reasonable fixed range fig.add_shape( type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0), y0=0, y1=200, # Fixed positive range to avoid extreme outliers affecting the view x0=min_time, x1=max_time, layer="below" ) # Add shape for negative APR region (below zero) - use reasonable fixed range fig.add_shape( type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0), y0=-200, y1=0, # Fixed negative range to avoid extreme outliers affecting the view x0=min_time, x1=max_time, layer="below" ) # Add zero line fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=min_time, x1=max_time ) # MODIFIED: Calculate average APR values across all agents for each timestamp # Filter for APR data only logger.info("=== APR GRAPH DATA FILTERING DEBUG ===") logger.info(f"Initial APR data loaded: {len(df)} records") logger.info(f"Unique agents in initial data: {df['agent_id'].nunique()}") logger.info(f"Agent IDs in initial data: {sorted(df['agent_id'].unique().tolist())}") # Check metric_type distribution if 'metric_type' in df.columns: metric_counts = df['metric_type'].value_counts() logger.info(f"Metric type distribution: {metric_counts.to_dict()}") else: logger.warning("No 'metric_type' column found in APR data") apr_data = df[df['metric_type'] == 'APR'].copy() logger.info(f"After metric_type == 'APR' filter: {len(apr_data)} records") logger.info(f"Unique agents after APR filter: {apr_data['agent_id'].nunique()}") logger.info(f"Agent IDs after APR filter: {sorted(apr_data['agent_id'].unique().tolist())}") # Date-based APR percentage filtering: ±500% filter until June 22, 2025, then no filter cutoff_date = datetime(2025, 6, 22) before_cutoff = apr_data[apr_data['timestamp'] < cutoff_date] after_cutoff = apr_data[apr_data['timestamp'] >= cutoff_date] # Apply ±500% filter to data before June 22, 2025 before_outlier_filter = len(before_cutoff) before_cutoff_filtered = before_cutoff[(before_cutoff['apr'] <= 500) & (before_cutoff['apr'] >= -500)] after_outlier_filter = len(before_cutoff_filtered) excluded_by_outlier = before_outlier_filter - after_outlier_filter logger.info(f"APR filtering before June 22, 2025: {before_outlier_filter} -> {after_outlier_filter} data points ({excluded_by_outlier} excluded by ±500% filter)") # No filtering for data after June 22, 2025 logger.info(f"APR filtering after June 22, 2025: {len(after_cutoff)} data points (no percentage filter applied)") # Combine filtered before data with unfiltered after data apr_data = pd.concat([before_cutoff_filtered, after_cutoff], ignore_index=True) logger.info(f"Total APR data after date-based filtering: {len(apr_data)} data points") # IMPORTANT: Filter data by hardcoded date range (June 6 to July 8, 2025) min_date = datetime(2025, 6, 6) max_date = datetime(2025, 7, 21, 23, 59, 59) # Include all of July 8th logger.info(f"Filtering APR data to date range: {min_date} to {max_date}") # Count data points before filtering before_filter_count = len(apr_data) # Apply date filter apr_data = apr_data[(apr_data['timestamp'] >= min_date) & (apr_data['timestamp'] <= max_date)] # Count data points after filtering after_filter_count = len(apr_data) excluded_by_date = before_filter_count - after_filter_count logger.info(f"Date filtering: {before_filter_count} -> {after_filter_count} data points ({excluded_by_date} excluded)") # NEW APPROACH: Daily aggregation and median calculation for APR # Step 1: Aggregate data daily per agent (mean of values within each day) daily_agent_data = aggregate_daily_data(apr_data, 'apr') # Step 2: Calculate daily medians across all agents daily_medians = calculate_daily_medians(daily_agent_data, 'apr') # Step 3: Calculate 7-day moving average of daily medians daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7) # Also handle adjusted APR if it exists daily_medians_adjusted = None daily_medians_adjusted_with_ma = None if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any(): # Create a separate dataset for adjusted APR apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy() if not apr_data_with_adjusted.empty: # Step 1: Aggregate adjusted APR data daily per agent daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr') # Step 2: Calculate daily medians for adjusted APR daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr') # Step 3: Calculate 7-day moving average of daily medians for adjusted APR daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7) logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians") if daily_medians_adjusted is not None: logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians") # This old moving average calculation is no longer needed with the new daily median approach # Find the last date where we have valid moving average data last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None # Find the last date where we have valid adjusted moving average data last_valid_adj_ma_date = None if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty: last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None # Determine the last valid date for either moving average last_valid_date = last_valid_ma_date if last_valid_adj_ma_date is not None: last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date # If we don't have any valid moving average data, use the max time from the original data if last_valid_date is None: last_valid_date = df['timestamp'].max() logger.info(f"Last valid moving average date: {last_valid_ma_date}") logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}") logger.info(f"Using last valid date for graph: {last_valid_date}") # Plot individual agent data points with agent names in hover, but limit display for scalability if not apr_data.empty: # Group by agent to use different colors for each agent unique_agents = apr_data['agent_name'].unique() colors = px.colors.qualitative.Plotly[:len(unique_agents)] # Create a color map for agents color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} # Calculate the total number of data points per agent to determine which are most active agent_counts = apr_data['agent_name'].value_counts() # Determine how many agents to show individually (limit to top 5 most active) MAX_VISIBLE_AGENTS = 5 top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents") # Add data points for each agent, but only make top agents visible by default for agent_name in unique_agents: agent_data = apr_data[apr_data['agent_name'] == agent_name] # Explicitly convert to Python lists x_values = agent_data['timestamp'].tolist() y_values = agent_data['apr'].tolist() # Change default visibility to False to hide all agent data points is_visible = False # Add data points as markers for APR fig.add_trace( go.Scatter( x=x_values, y=y_values, mode='markers', # Only markers for original data marker=dict( color=color_map[agent_name], symbol='circle', size=10, line=dict(width=1, color='black') ), name=f'Agent: {agent_name} (APR)', hovertemplate='Time: %{x}
APR: %{y:.2f}
Agent: ' + agent_name + '', visible=is_visible # All agents hidden by default ) ) logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})") # Add data points for adjusted APR if it exists if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any(): x_values_adj = agent_data['timestamp'].tolist() y_values_adj = agent_data['adjusted_apr'].tolist() fig.add_trace( go.Scatter( x=x_values_adj, y=y_values_adj, mode='markers', # Only markers for original data marker=dict( color=color_map[agent_name], symbol='diamond', # Different symbol for adjusted APR size=10, line=dict(width=1, color='black') ), name=f'Agent: {agent_name} (Adjusted APR)', hovertemplate='Time: %{x}
Adjusted APR: %{y:.2f}
Agent: ' + agent_name + '', visible=is_visible # All agents hidden by default ) ) logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})") # Add APR 7-day moving average of daily medians as a smooth line x_values_ma = daily_medians_with_ma['timestamp'].tolist() y_values_ma = daily_medians_with_ma['moving_avg'].tolist() # Create hover template for the APR moving average line # CONSISTENCY FIX: Use ROI daily agent data for active agent counts hover_data_apr = [] for idx, row in daily_medians_with_ma.iterrows(): timestamp = row['timestamp'] # Format timestamp to show only date for daily data formatted_timestamp = timestamp.strftime('%Y-%m-%d') # FIXED: Use ROI data to get consistent active agent counts # Load ROI data to get the correct agent counts try: df_roi_for_counts, _ = load_apr_data_from_csv() if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns: # Filter for ROI data and same date df_roi_filtered = df_roi_for_counts[ (df_roi_for_counts['metric_type'] == 'APR') & (df_roi_for_counts['roi'].notna()) ].copy() # Aggregate daily for ROI data roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi') # Get active agents from ROI data for this date active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) else: # Fallback to APR data if ROI not available active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) except: # Fallback to APR data if there's any error active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) # DEBUG: Log agent counts for July 8th specifically if formatted_timestamp == '2025-07-08': logger.info(f"APR GRAPH - July 8th active agents (using ROI logic): {active_agents}") hover_data_apr.append( f"Date: {formatted_timestamp}
Median APR (7d window): {row['moving_avg']:.2f}%
Active agents: {active_agents}" ) fig.add_trace( go.Scatter( x=x_values_ma, y=y_values_ma, mode='lines', # Only lines for moving average line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line name='Median APR (7d window)', hovertext=hover_data_apr, hoverinfo='text', visible=True # Visible by default ) ) logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points") # Add adjusted APR 7-day moving average line if it exists if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty: x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist() y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist() # Create hover template for the adjusted APR moving average line # CONSISTENCY FIX: Use ROI daily agent data for active agent counts (same as regular APR) hover_data_adj = [] for idx, row in daily_medians_adjusted_with_ma.iterrows(): timestamp = row['timestamp'] # Format timestamp to show only date for daily data formatted_timestamp = timestamp.strftime('%Y-%m-%d') # FIXED: Use ROI data to get consistent active agent counts (same logic as APR) try: df_roi_for_counts, _ = load_apr_data_from_csv() if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns: # Filter for ROI data and same date df_roi_filtered = df_roi_for_counts[ (df_roi_for_counts['metric_type'] == 'APR') & (df_roi_for_counts['roi'].notna()) ].copy() # Aggregate daily for ROI data roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi') # Get active agents from ROI data for this date active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()) else: # Fallback to adjusted APR data if ROI not available active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0 except: # Fallback to adjusted APR data if there's any error active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0 # DEBUG: Log agent counts for July 8th specifically if formatted_timestamp == '2025-07-08': logger.info(f"ADJUSTED APR GRAPH - July 8th active agents (using ROI logic): {active_agents}") hover_data_adj.append( f"Date: {formatted_timestamp}
Median Adjusted APR (7d window): {row['moving_avg']:.2f}%
Active agents: {active_agents}" ) fig.add_trace( go.Scatter( x=x_values_adj_ma, y=y_values_adj_ma, mode='lines', # Only lines for moving average line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line name='Median Adjusted APR (7d window)', hovertext=hover_data_adj, hoverinfo='text', visible=True # Visible by default ) ) logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points") else: logger.warning("No adjusted APR moving average data available to plot") # Removed cumulative APR as requested logger.info("Cumulative APR graph line has been removed as requested") # Update layout - use simple boolean values everywhere # Make chart responsive instead of fixed width fig.update_layout( title=dict( text="Optimus Agents", font=dict( family="Arial, sans-serif", size=22, color="black", weight="bold" ) ), xaxis_title=None, # Remove x-axis title to use annotation instead yaxis_title=None, # Remove the y-axis title as we'll use annotations instead template="plotly_white", height=600, # Reduced height for better fit on smaller screens # Removed fixed width to enable responsiveness autosize=True, # Enable auto-sizing for responsiveness legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, groupclick="toggleitem" ), margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title hovermode="closest" ) # Add two separate annotations for y-axis titles # First annotation for "Percent drawdown (%)" fig.add_annotation( x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels y=-25, # Position in the negative region xref="paper", yref="y", text="Percent drawdown (%)", showarrow=False, font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size textangle=-90, # Rotate text to be vertical align="center" ) # Second annotation for "Agent APR (%)" fig.add_annotation( x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels y=75, # Position in the positive region xref="paper", yref="y", text="Agent APR (%)", showarrow=False, font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size textangle=-90, # Rotate text to be vertical align="center" ) # Remove x-axis title annotation # fig.add_annotation( # x=0.5, # Center of the x-axis # y=-0.15, # Below the x-axis # xref="paper", # yref="paper", # text="Date", # showarrow=False, # font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size # align="center" # ) # Update layout for legend fig.update_layout( legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, groupclick="toggleitem", font=dict( family="Arial, sans-serif", size=14, # Adjusted font size color="black", weight="bold" ) ) ) # Update y-axis with clipping at -50 fig.update_yaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', range=[-50, 200], # Clip bottom at -50, reasonable top at 200 tickformat=".2f", # Format tick labels with 2 decimal places tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size title=None # Remove the built-in axis title since we're using annotations ) # Update x-axis with better formatting and hardcoded date range (June 6 to July 8) min_date = datetime(2025, 6, 6) # Hardcoded start date: June 6, 2025 max_date = datetime(2025, 7, 21) # Hardcoded end date: July 8, 2025 logger.info(f"APR Graph - Hardcoded date range: min_date = {min_date}, max_date = {max_date}") fig.update_xaxes( showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)', # Set hardcoded range from June 6 to June 18, 2025 range=[min_date, max_date], autorange=False, # Explicitly disable autoscale tickformat="%b %d", # Simplified date format without time tickangle=-30, # Angle the labels for better readability tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size title=None # Remove built-in title to use annotation instead ) # SIMPLIFIED APPROACH: Do a direct plot without markers for comparison # This creates a simple, reliable fallback plot if the advanced one fails try: # Write detailed debug information before saving the figure write_debug_info(df, fig) # Save the figure (still useful for reference) graph_file = "optimus_apr_combined_graph.html" fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False) # Also save as image for compatibility img_file = "optimus_apr_combined_graph.png" try: fig.write_image(img_file) logger.info(f"Combined graph saved to {graph_file} and {img_file}") except Exception as e: logger.error(f"Error saving image: {e}") logger.info(f"Combined graph saved to {graph_file} only") # Return the figure object for direct use in Gradio return fig except Exception as e: # If the complex graph approach fails, create a simpler one logger.error(f"Error creating advanced graph: {e}") logger.info("Falling back to Simpler graph") # Create a simpler graph as fallback simple_fig = go.Figure() # Add zero line simple_fig.add_shape( type="line", line=dict(dash="solid", width=1.5, color="black"), y0=0, y1=0, x0=min_time, x1=max_time ) # Define colors for the fallback graph fallback_colors = px.colors.qualitative.Plotly # Simply plot the average APR data with moving average if not avg_apr_data.empty: # Sort by timestamp avg_apr_data = avg_apr_data.sort_values('timestamp') # Calculate both moving averages for the fallback graph avg_apr_data_with_ma = avg_apr_data.copy() avg_apr_data_with_ma['moving_avg'] = None # 2-hour window avg_apr_data_with_ma['infinite_avg'] = None # Infinite window # Define the time window (6 hours) time_window = pd.Timedelta(hours=6) # Calculate the moving averages for each timestamp for i, row in avg_apr_data_with_ma.iterrows(): current_time = row['timestamp'] window_start = current_time - time_window # Get all data points within the 2-hour time window window_data = apr_data[ (apr_data['timestamp'] >= window_start) & (apr_data['timestamp'] <= current_time) ] # Get all data points up to the current timestamp (infinite window) infinite_window_data = apr_data[ apr_data['timestamp'] <= current_time ] # Calculate the average APR for the 2-hour time window if not window_data.empty: avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean() else: # If no data points in the window, use the current value avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr'] # Calculate the average APR for the infinite window if not infinite_window_data.empty: avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean() else: avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr'] # Add data points for each agent, but only make top agents visible by default unique_agents = apr_data['agent_name'].unique() colors = px.colors.qualitative.Plotly[:len(unique_agents)] color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)} # Calculate the total number of data points per agent agent_counts = apr_data['agent_name'].value_counts() # Determine how many agents to show individually (limit to top 5 most active) MAX_VISIBLE_AGENTS = 5 top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist() for agent_name in unique_agents: agent_data = apr_data[apr_data['agent_name'] == agent_name] # Determine if this agent should be visible by default is_visible = agent_name in top_agents # Add data points as markers simple_fig.add_trace( go.Scatter( x=agent_data['timestamp'], y=agent_data['apr'], mode='markers', name=f'Agent: {agent_name}', marker=dict( size=10, color=color_map[agent_name] ), hovertemplate='Time: %{x}
APR: %{y:.2f}
Agent: ' + agent_name + '', visible=is_visible # Only top agents visible by default ) ) # Add 2-hour moving average as a line simple_fig.add_trace( go.Scatter( x=avg_apr_data_with_ma['timestamp'], y=avg_apr_data_with_ma['moving_avg'], mode='lines', name='Average APR (6h window)', line=dict(width=2, color='red') # Thinner line ) ) # Add infinite window moving average as another line simple_fig.add_trace( go.Scatter( x=avg_apr_data_with_ma['timestamp'], y=avg_apr_data_with_ma['infinite_avg'], mode='lines', name='Cumulative Average APR (all data)', line=dict(width=4, color='green') # Thicker solid line ) ) # Simplified layout with fixed y-axis range (-10 to 10) and increased size simple_fig.update_layout( title=dict( text="Optimus Agents", font=dict( family="Arial, sans-serif", size=22, color="black", weight="bold" ) ), xaxis_title=None, # Remove x-axis title to use annotation instead yaxis_title=None, # Remove the y-axis title as we'll use annotations instead yaxis=dict( # Fixed range from -10 to 10 range=[-10, 10], # Set fixed range from -10 to 10 tickformat=".2f", # Format tick labels with 2 decimal places tickfont=dict(size=12) # Larger font for tick labels ), height=600, # Reduced height for better fit # Removed fixed width to enable responsiveness autosize=True, # Enable auto-sizing for responsiveness template="plotly_white", # Use a cleaner template margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title ) # Add annotations for y-axis regions in the fallback graph simple_fig.add_annotation( x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels y=-25, # Middle of the negative region xref="paper", yref="y", text="Percent drawdown [%]", showarrow=False, font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size textangle=-90, # Rotate text to be vertical align="center" ) simple_fig.add_annotation( x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels y=50, # Middle of the positive region xref="paper", yref="y", text="Agent APR [%]", showarrow=False, font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size textangle=-90, # Rotate text to be vertical align="center" ) # Remove x-axis title annotation # simple_fig.add_annotation( # x=0.5, # Center of the x-axis # y=-0.15, # Below the x-axis # xref="paper", # yref="paper", # text="Date", # showarrow=False, # font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size # align="center" # ) # Update legend font for fallback graph simple_fig.update_layout( legend=dict( font=dict( family="Arial, sans-serif", size=14, # Adjusted font size color="black", weight="bold" ) ) ) # Apply autoscaling to the x-axis for the fallback graph simple_fig.update_xaxes( autorange=True, # Enable autoscaling tickformat="%b %d", # Simplified date format without time tickangle=-30, tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size title=None # Remove built-in title to use annotation instead ) # Update y-axis tick font for fallback graph simple_fig.update_yaxes( tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size ) # Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS if len(unique_agents) > MAX_VISIBLE_AGENTS: simple_fig.add_annotation( text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.", xref="paper", yref="paper", x=0.5, y=1.05, showarrow=False, font=dict(size=12, color="gray"), align="center" ) # Return the simple figure return simple_fig def save_to_csv(df): """Save the APR data DataFrame to a CSV file and return the file path""" if df.empty: logger.error("No APR data to save to CSV") return None # Define the CSV file path csv_file = "optimus_apr_values.csv" # Save to CSV df.to_csv(csv_file, index=False) logger.info(f"APR data saved to {csv_file}") # Also generate a statistics CSV file stats_df = generate_statistics_from_data(df) stats_csv = "optimus_apr_statistics.csv" stats_df.to_csv(stats_csv, index=False) logger.info(f"Statistics saved to {stats_csv}") # Log detailed statistics about adjusted APR if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any(): adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()] logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}") for _, row in adjusted_stats.iterrows(): if row['agent_id'] != 'ALL': # Skip the overall stats row logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}") # Log overall adjusted APR stats overall_row = stats_df[stats_df['agent_id'] == 'ALL'] if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]): logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}") return csv_file def generate_statistics_from_data(df): """Generate statistics from the APR data""" if df.empty: return pd.DataFrame() # Get unique agents unique_agents = df['agent_id'].unique() stats_list = [] # Generate per-agent statistics for agent_id in unique_agents: agent_data = df[df['agent_id'] == agent_id] agent_name = agent_data['agent_name'].iloc[0] # APR statistics apr_data = agent_data[agent_data['metric_type'] == 'APR'] real_apr = apr_data[apr_data['is_dummy'] == False] # Performance statistics perf_data = agent_data[agent_data['metric_type'] == 'Performance'] real_perf = perf_data[perf_data['is_dummy'] == False] # Check if adjusted_apr exists and has non-null values has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any() stats = { 'agent_id': agent_id, 'agent_name': agent_name, 'total_points': len(agent_data), 'apr_points': len(apr_data), 'performance_points': len(perf_data), 'real_apr_points': len(real_apr), 'real_performance_points': len(real_perf), 'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None, 'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None, 'max_apr': apr_data['apr'].max() if not apr_data.empty else None, 'min_apr': apr_data['apr'].min() if not apr_data.empty else None, 'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None, 'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None, 'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None, 'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None } stats_list.append(stats) # Generate overall statistics apr_only = df[df['metric_type'] == 'APR'] perf_only = df[df['metric_type'] == 'Performance'] # Check if adjusted_apr exists and has non-null values for overall stats has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any() overall_stats = { 'agent_id': 'ALL', 'agent_name': 'All Agents', 'total_points': len(df), 'apr_points': len(apr_only), 'performance_points': len(perf_only), 'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]), 'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]), 'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None, 'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None, 'max_apr': apr_only['apr'].max() if not apr_only.empty else None, 'min_apr': apr_only['apr'].min() if not apr_only.empty else None, 'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None, 'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None, 'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None, 'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None } stats_list.append(overall_stats) return pd.DataFrame(stats_list) # Create dummy functions for the commented out imports def create_transcation_visualizations(): """Dummy implementation that returns a placeholder graph""" fig = go.Figure() fig.add_annotation( text="Blockchain data loading disabled - placeholder visualization", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20) ) return fig def create_active_agents_visualizations(): """Dummy implementation that returns a placeholder graph""" fig = go.Figure() fig.add_annotation( text="Blockchain data loading disabled - placeholder visualization", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20) ) return fig # Dummy blockchain functions to replace the commented ones def get_transfers(integrator: str, wallet: str) -> str: """Dummy function that returns an empty result""" return {"transfers": []} def fetch_and_aggregate_transactions(): """Dummy function that returns empty data""" return [], {} # Function to parse the transaction data and prepare it for visualization def process_transactions_and_agents(data): """Dummy function that returns empty dataframes""" df_transactions = pd.DataFrame() df_agents = pd.DataFrame(columns=['date', 'agent_count']) df_agents_weekly = pd.DataFrame() return df_transactions, df_agents, df_agents_weekly # Function to create visualizations based on the metrics def create_visualizations(): # Placeholder figures for testing fig_swaps_chain = go.Figure() fig_swaps_chain.add_annotation( text="Blockchain data loading disabled - placeholder visualization", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20) ) fig_bridges_chain = go.Figure() fig_bridges_chain.add_annotation( text="Blockchain data loading disabled - placeholder visualization", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20) ) fig_agents_registered = go.Figure() fig_agents_registered.add_annotation( text="Blockchain data loading disabled - placeholder visualization", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20) ) fig_tvl = go.Figure() fig_tvl.add_annotation( text="Blockchain data loading disabled - placeholder visualization", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False, font=dict(size=20) ) return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl # Modify dashboard function to make the plot container responsive def dashboard(): with gr.Blocks() as demo: gr.Markdown("# Average Optimus Agent Performance") # Create tabs for APR and ROI metrics with gr.Tabs(): # APR Metrics tab with gr.Tab("APR Metrics"): with gr.Column(): refresh_apr_btn = gr.Button("Refresh APR Data") # Create container for plotly figure with responsive sizing with gr.Column(): combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot") # Create compact toggle controls at the bottom of the graph with gr.Row(visible=True): gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title") with gr.Row(): with gr.Column(): with gr.Row(elem_id="apr_toggle_container"): with gr.Column(scale=1, min_width=150): apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle") with gr.Column(scale=1, min_width=150): adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle") # Add a text area for status messages apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) # ROI Metrics tab with gr.Tab("ROI Metrics"): with gr.Column(): refresh_roi_btn = gr.Button("Refresh ROI Data") # Create container for plotly figure with responsive sizing with gr.Column(): combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot") # Create compact toggle controls at the bottom of the graph with gr.Row(visible=True): gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title") with gr.Row(): with gr.Column(): with gr.Row(elem_id="roi_toggle_container"): with gr.Column(scale=1, min_width=150): roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle") # Add a text area for status messages roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False) # Add custom CSS for making the plots responsive gr.HTML(""" """) # Function to update the APR graph def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True): # Generate visualization and get figure object directly try: combined_fig, _ = generate_apr_visualizations() # Update visibility of traces based on toggle values for i, trace in enumerate(combined_fig.data): # Check if this is a moving average trace if trace.name == 'Median APR (7d window)': trace.visible = show_apr_ma elif trace.name == 'Average ETH Adjusted APR (3d window)': trace.visible = show_adjusted_apr_ma return combined_fig except Exception as e: logger.exception("Error generating APR visualization") # Create error figure error_fig = go.Figure() error_fig.add_annotation( text=f"Error: {str(e)}", x=0.5, y=0.5, showarrow=False, font=dict(size=15, color="red") ) return error_fig # Function to update the ROI graph def update_roi_graph(show_roi_ma=True): # Generate visualization and get figure object directly try: combined_fig, _ = generate_roi_visualizations() # Update visibility of traces based on toggle values for i, trace in enumerate(combined_fig.data): # Check if this is a moving average trace if trace.name == 'Median ROI (7d window)': trace.visible = show_roi_ma return combined_fig except Exception as e: logger.exception("Error generating ROI visualization") # Create error figure error_fig = go.Figure() error_fig.add_annotation( text=f"Error: {str(e)}", x=0.5, y=0.5, showarrow=False, font=dict(size=15, color="red") ) return error_fig # Initialize the APR graph on load with a placeholder apr_placeholder_fig = go.Figure() apr_placeholder_fig.add_annotation( text="Click 'Refresh APR Data' to load APR graph", x=0.5, y=0.5, showarrow=False, font=dict(size=15) ) combined_apr_graph.value = apr_placeholder_fig # Initialize the ROI graph on load with a placeholder roi_placeholder_fig = go.Figure() roi_placeholder_fig.add_annotation( text="Click 'Refresh ROI Data' to load ROI graph", x=0.5, y=0.5, showarrow=False, font=dict(size=15) ) combined_roi_graph.value = roi_placeholder_fig # Function to update the APR graph based on toggle states def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible): return update_apr_graph(apr_visible, adjusted_apr_visible) # Function to update the ROI graph based on toggle states def update_roi_graph_with_toggles(roi_visible): return update_roi_graph(roi_visible) # Function to refresh APR data def refresh_apr_data(): """Refresh APR data from the database and update the visualization""" try: # Fetch new APR data logger.info("Manually refreshing APR data...") fetch_apr_data_from_db() # Verify data was fetched successfully if global_df is None or len(global_df) == 0: logger.error("Failed to fetch APR data") return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details." # Log info about fetched data with focus on adjusted_apr may_10_2025 = datetime(2025, 5, 10) if 'timestamp' in global_df and 'adjusted_apr' in global_df: after_may_10 = global_df[global_df['timestamp'] >= may_10_2025] with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()] logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}") logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}") # Generate new visualization logger.info("Generating new APR visualization...") new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value) return new_graph, "APR data refreshed successfully" except Exception as e: logger.error(f"Error refreshing APR data: {e}") return combined_apr_graph.value, f"Error: {str(e)}" # Function to refresh ROI data def refresh_roi_data(): """Refresh ROI data from the database and update the visualization""" try: # Fetch new ROI data logger.info("Manually refreshing ROI data...") fetch_apr_data_from_db() # This also fetches ROI data # Verify data was fetched successfully if global_roi_df is None or len(global_roi_df) == 0: logger.error("Failed to fetch ROI data") return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details." # Generate new visualization logger.info("Generating new ROI visualization...") new_graph = update_roi_graph(roi_toggle.value) return new_graph, "ROI data refreshed successfully" except Exception as e: logger.error(f"Error refreshing ROI data: {e}") return combined_roi_graph.value, f"Error: {str(e)}" # Set up the button click event for APR refresh refresh_apr_btn.click( fn=refresh_apr_data, inputs=[], outputs=[combined_apr_graph, apr_status_text] ) # Set up the button click event for ROI refresh refresh_roi_btn.click( fn=refresh_roi_data, inputs=[], outputs=[combined_roi_graph, roi_status_text] ) # Set up the toggle switch events for APR apr_toggle.change( fn=update_apr_graph_with_toggles, inputs=[apr_toggle, adjusted_apr_toggle], outputs=[combined_apr_graph] ) adjusted_apr_toggle.change( fn=update_apr_graph_with_toggles, inputs=[apr_toggle, adjusted_apr_toggle], outputs=[combined_apr_graph] ) # Set up the toggle switch events for ROI roi_toggle.change( fn=update_roi_graph_with_toggles, inputs=[roi_toggle], outputs=[combined_roi_graph] ) return demo # Launch the dashboard if __name__ == "__main__": dashboard().launch() def generate_adjusted_apr_report(): """ Generate a detailed report about adjusted_apr data availability and save it to a file. Returns the path to the generated report file. """ global global_df if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns: logger.warning("No adjusted_apr data available for report generation") return None # Create a report file report_path = "adjusted_apr_report.txt" with open(report_path, "w") as f: f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n") # Summary statistics total_records = len(global_df) records_with_adjusted = global_df['adjusted_apr'].notna().sum() pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0 f.write(f"Total APR records: {total_records}\n") f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n") # First and last data points if records_with_adjusted > 0: has_adjusted = global_df[global_df['adjusted_apr'].notna()] first_date = has_adjusted['timestamp'].min() last_date = has_adjusted['timestamp'].max() f.write(f"First adjusted_apr record: {first_date}\n") f.write(f"Last adjusted_apr record: {last_date}\n") f.write(f"Date range: {(last_date - first_date).days} days\n\n") # Agent statistics f.write("===== AGENT STATISTICS =====\n\n") # Group by agent agent_stats = [] for agent_id in global_df['agent_id'].unique(): agent_data = global_df[global_df['agent_id'] == agent_id] agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}" total_agent_records = len(agent_data) agent_with_adjusted = agent_data['adjusted_apr'].notna().sum() coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0 agent_stats.append({ 'agent_id': agent_id, 'agent_name': agent_name, 'total_records': total_agent_records, 'with_adjusted': agent_with_adjusted, 'coverage_pct': coverage_pct }) # Sort by coverage percentage (descending) agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True) # Write agent statistics for agent in agent_stats: f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n") f.write(f" Records: {agent['total_records']}\n") f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n") # If agent has adjusted data, show date range agent_data = global_df[global_df['agent_id'] == agent['agent_id']] agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()] if not agent_adjusted.empty: first = agent_adjusted['timestamp'].min() last = agent_adjusted['timestamp'].max() f.write(f" First adjusted_apr: {first}\n") f.write(f" Last adjusted_apr: {last}\n") f.write("\n") # Check for May 10th cutoff issue f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n") may_10_2025 = datetime(2025, 5, 10) before_cutoff = global_df[global_df['timestamp'] < may_10_2025] after_cutoff = global_df[global_df['timestamp'] >= may_10_2025] # Calculate coverage before and after before_total = len(before_cutoff) before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum() before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0 after_total = len(after_cutoff) after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum() after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0 f.write(f"Before May 10th, 2025:\n") f.write(f" Records: {before_total}\n") f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n") f.write(f"After May 10th, 2025:\n") f.write(f" Records: {after_total}\n") f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n") # Check for agents that had data before but not after if before_total > 0 and after_total > 0: agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique()) missing_after = agents_before - agents_after new_after = agents_after - agents_before if missing_after: f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n") # For each missing agent, show the last date with adjusted_apr for agent_id in missing_after: agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) & (before_cutoff['adjusted_apr'].notna())] if not agent_data.empty: last_date = agent_data['timestamp'].max() agent_name = agent_data['agent_name'].iloc[0] f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n") if new_after: f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n") logger.info(f"Adjusted APR report generated: {report_path}") return report_path