gauravlochab
chore: change the system from loading to adding the csv for solving the rate limiter error
175e92c
raw
history blame
129 kB
import requests
import pandas as pd
import gradio as gr
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import json
# Commenting out blockchain-related imports that cause loading issues
# from web3 import Web3
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import random
import logging
from typing import List, Dict, Any, Optional
# Comment out the import for now and replace with dummy functions
# from app_trans_new import create_transcation_visualizations,create_active_agents_visualizations
# APR visualization functions integrated directly
from fetch_and_preprocess_data import generate_continuous_random_data
from initial_value_fixer import fix_apr_and_roi
from load_from_csv import (
load_apr_data_from_csv,
load_roi_data_from_csv,
load_statistics_from_csv,
check_csv_data_availability,
get_data_freshness_info
)
# Set up logging with appropriate verbosity
logging.basicConfig(
level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("app_debug.log"), # Log to file for persistence
logging.StreamHandler() # Also log to console
]
)
logger = logging.getLogger(__name__)
# Reduce third-party library logging
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("httpx").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)
# Log the startup information
logger.info("============= APPLICATION STARTING =============")
logger.info(f"Running from directory: {os.getcwd()}")
# Global variables to store the data for reuse
global_df = None
global_roi_df = None
global_dummy_apr_df = None # Store dummy APR data separately
global_dummy_roi_df = None # Store dummy ROI data separately
# Configuration
API_BASE_URL = "https://afmdb.autonolas.tech"
logger.info(f"Using API endpoint: {API_BASE_URL}")
def get_agent_type_by_name(type_name: str) -> Dict[str, Any]:
"""Get agent type by name"""
url = f"{API_BASE_URL}/api/agent-types/name/{type_name}"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"Agent type '{type_name}' not found")
return None
response.raise_for_status()
result = response.json()
logger.debug(f"Agent type response: {result}")
return result
except Exception as e:
logger.error(f"Error in get_agent_type_by_name: {e}")
return None
def get_attribute_definition_by_name(attr_name: str) -> Dict[str, Any]:
"""Get attribute definition by name"""
url = f"{API_BASE_URL}/api/attributes/name/{attr_name}"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"Attribute definition '{attr_name}' not found")
return None
response.raise_for_status()
result = response.json()
logger.debug(f"Attribute definition response: {result}")
return result
except Exception as e:
logger.error(f"Error in get_attribute_definition_by_name: {e}")
return None
def get_agents_by_type(type_id: int) -> List[Dict[str, Any]]:
"""Get all agents of a specific type"""
url = f"{API_BASE_URL}/api/agent-types/{type_id}/agents/"
logger.debug(f"Calling API: {url}")
try:
response = requests.get(url)
logger.debug(f"Response status: {response.status_code}")
if response.status_code == 404:
logger.error(f"No agents found for type ID {type_id}")
return []
response.raise_for_status()
result = response.json()
logger.debug(f"Agents count: {len(result)}")
logger.debug(f"First few agents: {result[:2] if result else []}")
return result
except Exception as e:
logger.error(f"Error in get_agents_by_type: {e}")
return []
def get_attribute_values_by_type_and_attr(agents: List[Dict[str, Any]], attr_def_id: int) -> List[Dict[str, Any]]:
"""Get all attribute values for a specific attribute definition across all agents of a given list"""
all_attributes = []
logger.debug(f"Getting attributes for {len(agents)} agents with attr_def_id: {attr_def_id}")
# For each agent, get their attributes and filter for the one we want
for agent in agents:
agent_id = agent["agent_id"]
# Call the /api/agents/{agent_id}/attributes/ endpoint
url = f"{API_BASE_URL}/api/agents/{agent_id}/attributes/"
logger.debug(f"Calling API for agent {agent_id}: {url}")
try:
response = requests.get(url, params={"limit": 1000})
if response.status_code == 404:
logger.error(f"No attributes found for agent ID {agent_id}")
continue
response.raise_for_status()
agent_attrs = response.json()
logger.debug(f"Agent {agent_id} has {len(agent_attrs)} attributes")
# Filter for the specific attribute definition ID
filtered_attrs = [attr for attr in agent_attrs if attr.get("attr_def_id") == attr_def_id]
logger.debug(f"Agent {agent_id} has {len(filtered_attrs)} APR attributes")
if filtered_attrs:
logger.debug(f"Sample attribute for agent {agent_id}: {filtered_attrs[0]}")
all_attributes.extend(filtered_attrs)
except requests.exceptions.RequestException as e:
logger.error(f"Error fetching attributes for agent ID {agent_id}: {e}")
logger.info(f"Total APR attributes found across all agents: {len(all_attributes)}")
return all_attributes
def get_agent_name(agent_id: int, agents: List[Dict[str, Any]]) -> str:
"""Get agent name from agent ID"""
for agent in agents:
if agent["agent_id"] == agent_id:
return agent["agent_name"]
return "Unknown"
def extract_apr_value(attr: Dict[str, Any]) -> Dict[str, Any]:
"""Extract APR value, adjusted APR value, ROI value, and timestamp from JSON value"""
try:
agent_id = attr.get("agent_id", "unknown")
logger.debug(f"Extracting APR value for agent {agent_id}")
# The APR value is stored in the json_value field
if attr["json_value"] is None:
logger.debug(f"Agent {agent_id}: json_value is None")
return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": agent_id, "is_dummy": False}
# If json_value is a string, parse it
if isinstance(attr["json_value"], str):
logger.debug(f"Agent {agent_id}: json_value is string, parsing")
json_data = json.loads(attr["json_value"])
else:
json_data = attr["json_value"]
apr = json_data.get("apr")
adjusted_apr = json_data.get("adjusted_apr") # Extract adjusted_apr if present
timestamp = json_data.get("timestamp")
address = json_data.get("portfolio_snapshot", {}).get("portfolio", {}).get("address")
# Extract ROI (f_i_ratio) from calculation_metrics if it exists
roi = None
if "calculation_metrics" in json_data and json_data["calculation_metrics"] is not None:
roi = json_data["calculation_metrics"].get("f_i_ratio")
logger.debug(f"Agent {agent_id}: Raw APR value: {apr}, adjusted APR value: {adjusted_apr}, ROI value: {roi}, timestamp: {timestamp}")
# Convert timestamp to datetime if it exists
timestamp_dt = None
if timestamp:
timestamp_dt = datetime.fromtimestamp(timestamp)
result = json_data.copy() # Copy the original JSON data for logging
result.update({
"apr": apr,
"adjusted_apr": adjusted_apr,
"roi": roi,
"timestamp": timestamp_dt,
"agent_id": agent_id,
"is_dummy": False,
"address": address
})
logger.debug(f"Agent {agent_id}: Extracted result: {result}")
return result
except (json.JSONDecodeError, KeyError, TypeError) as e:
logger.error(f"Error parsing JSON value: {e} for agent_id: {attr.get('agent_id')}")
logger.error(f"Problematic json_value: {attr.get('json_value')}")
return {"apr": None, "adjusted_apr": None, "roi": None, "timestamp": None, "agent_id": attr.get('agent_id'), "is_dummy": False, "address": None}
def fetch_apr_data_from_db():
"""
Fetch APR data from database using the API.
"""
global global_df
global global_roi_df
logger.info("==== Starting APR data fetch ====")
try:
# Step 1: Find the Optimus agent type
logger.info("Finding Optimus agent type")
optimus_type = get_agent_type_by_name("Optimus")
if not optimus_type:
logger.error("Optimus agent type not found, using placeholder data")
global_df = pd.DataFrame([])
return global_df
type_id = optimus_type["type_id"]
logger.info(f"Found Optimus agent type with ID: {type_id}")
# Step 2: Find the APR attribute definition
logger.info("Finding APR attribute definition")
apr_attr_def = get_attribute_definition_by_name("APR")
if not apr_attr_def:
logger.error("APR attribute definition not found, using placeholder data")
global_df = pd.DataFrame([])
return global_df
attr_def_id = apr_attr_def["attr_def_id"]
logger.info(f"Found APR attribute definition with ID: {attr_def_id}")
# Step 3: Get all agents of type Optimus
logger.info(f"Getting all agents of type Optimus (type_id: {type_id})")
optimus_agents = get_agents_by_type(type_id)
if not optimus_agents:
logger.error("No agents of type 'Optimus' found")
global_df = pd.DataFrame([])
return global_df
logger.info(f"Found {len(optimus_agents)} Optimus agents")
logger.debug(f"Optimus agents: {[{'agent_id': a['agent_id'], 'agent_name': a['agent_name']} for a in optimus_agents]}")
# Step 4: Fetch all APR values for Optimus agents
logger.info(f"Fetching APR values for all Optimus agents (attr_def_id: {attr_def_id})")
apr_attributes = get_attribute_values_by_type_and_attr(optimus_agents, attr_def_id)
if not apr_attributes:
logger.error("No APR values found for 'Optimus' agents")
global_df = pd.DataFrame([])
return global_df
logger.info(f"Found {len(apr_attributes)} APR attributes total")
# Step 5: Extract APR and ROI data
logger.info("Extracting APR and ROI data from attributes")
apr_data_list = []
roi_data_list = []
for attr in apr_attributes:
data = extract_apr_value(attr)
if data["timestamp"] is not None:
# Get agent name
agent_name = get_agent_name(attr["agent_id"], optimus_agents)
# Add agent name to the data
data["agent_name"] = agent_name
# Add is_dummy flag (all real data)
data["is_dummy"] = False
# Process APR data
if data["apr"] is not None:
# Include all APR values (including negative ones) EXCEPT zero and -100
if data["apr"] != 0 and data["apr"] != -100:
apr_entry = data.copy()
apr_entry["metric_type"] = "APR"
logger.debug(f"Agent {agent_name} ({attr['agent_id']}): APR value: {data['apr']}")
# Add to the APR data list
apr_data_list.append(apr_entry)
else:
# Log that we're skipping zero or -100 values
logger.debug(f"Skipping APR value for agent {agent_name} ({attr['agent_id']}): {data['apr']} (zero or -100)")
# Process ROI data
if data["roi"] is not None:
# Include all ROI values except extreme outliers
if data["roi"] > -10 and data["roi"] < 10: # Filter extreme outliers
roi_entry = {
"roi": data["roi"],
"timestamp": data["timestamp"],
"agent_id": data["agent_id"],
"agent_name": agent_name,
"is_dummy": False,
"metric_type": "ROI"
}
logger.debug(f"Agent {agent_name} ({attr['agent_id']}): ROI value: {data['roi']}")
# Add to the ROI data list
roi_data_list.append(roi_entry)
else:
# Log that we're skipping extreme outlier values
logger.debug(f"Skipping ROI value for agent {agent_name} ({attr['agent_id']}): {data['roi']} (extreme outlier)")
logger.info(f"Extracted {len(apr_data_list)} valid APR data points and {len(roi_data_list)} valid ROI data points")
# Added debug for adjusted APR data after May 10th
may_10_2025 = datetime(2025, 5, 10)
after_may_10 = [d for d in apr_data_list if d['timestamp'] >= may_10_2025]
with_adjusted_after_may_10 = [d for d in after_may_10 if d['adjusted_apr'] is not None]
logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
# Log detailed information about when data began
first_adjusted = None
if with_adjusted_after_may_10:
first_adjusted_after = min(with_adjusted_after_may_10, key=lambda x: x['timestamp'])
logger.info(f"First adjusted_apr after May 10th: {first_adjusted_after['timestamp']} (Agent: {first_adjusted_after['agent_id']})")
# Check all data for first adjusted_apr
all_with_adjusted = [d for d in apr_data_list if d['adjusted_apr'] is not None]
if all_with_adjusted:
first_adjusted = min(all_with_adjusted, key=lambda x: x['timestamp'])
logger.info(f"First adjusted_apr ever: {first_adjusted['timestamp']} (Agent: {first_adjusted['agent_id']})")
last_adjusted = max(all_with_adjusted, key=lambda x: x['timestamp'])
logger.info(f"Last adjusted_apr ever: {last_adjusted['timestamp']} (Agent: {last_adjusted['agent_id']})")
# Calculate overall coverage
adjusted_ratio = len(all_with_adjusted) / len(apr_data_list) * 100
logger.info(f"Overall adjusted_apr coverage: {adjusted_ratio:.2f}% ({len(all_with_adjusted)}/{len(apr_data_list)} records)")
# Log per-agent adjusted APR statistics
agent_stats = {}
for record in apr_data_list:
agent_id = record['agent_id']
has_adjusted = record['adjusted_apr'] is not None
if agent_id not in agent_stats:
agent_stats[agent_id] = {'total': 0, 'adjusted': 0}
agent_stats[agent_id]['total'] += 1
if has_adjusted:
agent_stats[agent_id]['adjusted'] += 1
# Log stats for agents with meaningful data
for agent_id, stats in agent_stats.items():
if stats['total'] > 0:
coverage = (stats['adjusted'] / stats['total']) * 100
if coverage > 0: # Only log agents that have at least some adjusted data
logger.info(f"Agent {agent_id}: {coverage:.2f}% adjusted coverage ({stats['adjusted']}/{stats['total']} records)")
# Check for gaps in adjusted APR data
for agent_id in agent_stats:
# Get all records for this agent
agent_records = [r for r in apr_data_list if r['agent_id'] == agent_id]
# Sort by timestamp
agent_records.sort(key=lambda x: x['timestamp'])
# Find where adjusted APR starts and if there are gaps
has_adjusted = False
gap_count = 0
streak_length = 0
for record in agent_records:
if record['adjusted_apr'] is not None:
if not has_adjusted:
has_adjusted = True
logger.info(f"Agent {agent_id}: First adjusted APR at {record['timestamp']}")
streak_length += 1
elif has_adjusted:
# We had adjusted data but now it's missing
gap_count += 1
if streak_length > 0:
logger.warning(f"Agent {agent_id}: Gap in adjusted APR data after {streak_length} consecutive records")
streak_length = 0
if gap_count > 0:
logger.warning(f"Agent {agent_id}: Found {gap_count} gaps in adjusted APR data")
elif has_adjusted:
logger.info(f"Agent {agent_id}: Continuous adjusted APR data with no gaps")
# Provide summary statistics
agents_with_data = sum(1 for stats in agent_stats.values() if stats['adjusted'] > 0)
agents_with_gaps = sum(1 for agent_id in agent_stats if
any(apr_data_list[i]['agent_id'] == agent_id and apr_data_list[i]['adjusted_apr'] is not None and
i+1 < len(apr_data_list) and apr_data_list[i+1]['agent_id'] == agent_id and
apr_data_list[i+1]['adjusted_apr'] is None
for i in range(len(apr_data_list)-1)))
logger.info(f"ADJUSTED APR SUMMARY: {agents_with_data}/{len(agent_stats)} agents have adjusted APR data")
if agents_with_gaps > 0:
logger.warning(f"ATTENTION: {agents_with_gaps} agents have gaps in their adjusted APR data")
logger.warning("These gaps may cause discontinuities in the adjusted APR graph")
else:
logger.info("No gaps detected in adjusted APR data - graph should be continuous")
if len(with_adjusted_after_may_10) == 0 and len(after_may_10) > 0:
logger.warning("No adjusted_apr values found after May 10th, 2025 despite having APR data")
# Log agent IDs with missing adjusted_apr after May 10th
agents_after_may_10 = set(d['agent_id'] for d in after_may_10)
logger.info(f"Agents with data after May 10th: {agents_after_may_10}")
# Check these same agents before May 10th
before_may_10 = [d for d in apr_data_list if d['timestamp'] < may_10_2025]
agents_with_adjusted_before = {d['agent_id'] for d in before_may_10 if d['adjusted_apr'] is not None}
# Agents that had adjusted_apr before but not after
missing_adjusted = agents_with_adjusted_before.intersection(agents_after_may_10)
if missing_adjusted:
logger.warning(f"Agents that had adjusted_apr before May 10th but not after: {missing_adjusted}")
# Find the last valid adjusted_apr date for these agents
for agent_id in missing_adjusted:
agent_data = [d for d in before_may_10 if d['agent_id'] == agent_id and d['adjusted_apr'] is not None]
if agent_data:
last_entry = max(agent_data, key=lambda d: d['timestamp'])
logger.info(f"Agent {agent_id}: Last adjusted_apr on {last_entry['timestamp']} with value {last_entry['adjusted_apr']}")
# Look at the first entry after the cutoff without adjusted_apr
agent_after = [d for d in after_may_10 if d['agent_id'] == agent_id]
if agent_after:
first_after = min(agent_after, key=lambda d: d['timestamp'])
logger.info(f"Agent {agent_id}: First entry after cutoff on {first_after['timestamp']} missing adjusted_apr")
# If the agent data has the 'adjusted_apr_key' field, log that info
if 'adjusted_apr_key' in first_after:
logger.info(f"Agent {agent_id}: Key used for adjusted_apr: {first_after['adjusted_apr_key']}")
# Add debug logic to check for any adjusted_apr after May 10th and which agents have it
elif len(with_adjusted_after_may_10) > 0:
logger.info("Found adjusted_apr values after May 10th, 2025")
# Group by agent and log
agent_counts = {}
for item in with_adjusted_after_may_10:
agent_id = item['agent_id']
if agent_id in agent_counts:
agent_counts[agent_id] += 1
else:
agent_counts[agent_id] = 1
logger.info(f"Agents with adjusted_apr after May 10th: {agent_counts}")
# Log adjusted_apr keys used
keys_used = {item.get('adjusted_apr_key') for item in with_adjusted_after_may_10 if 'adjusted_apr_key' in item}
if keys_used:
logger.info(f"Keys used for adjusted_apr after May 10th: {keys_used}")
# Convert to DataFrames
if not apr_data_list:
logger.error("No valid APR data extracted")
global_df = pd.DataFrame([])
else:
# Convert list of dictionaries to DataFrame for APR
global_df = pd.DataFrame(apr_data_list)
if not roi_data_list:
logger.error("No valid ROI data extracted")
global_roi_df = pd.DataFrame([])
else:
# Convert list of dictionaries to DataFrame for ROI
global_roi_df = pd.DataFrame(roi_data_list)
# Handle dummy data generation
global global_dummy_apr_df
global global_dummy_roi_df
logger.info("Handling dummy data...")
# Generate dummy APR data only if needed
if not global_df.empty:
# Check if we already have dummy data
if global_dummy_apr_df is None:
# First time - generate all dummy data
logger.info("Generating initial dummy APR data...")
global_dummy_apr_df = generate_continuous_random_data(global_df)
# Only keep APR data
if not global_dummy_apr_df.empty:
global_dummy_apr_df = global_dummy_apr_df[global_dummy_apr_df['metric_type'] == 'APR']
logger.info(f"Generated {len(global_dummy_apr_df)} initial dummy APR data points")
else:
# We already have dummy data - check if we need to generate more
# Find the latest timestamp in the real data
latest_real_timestamp = global_df['timestamp'].max()
# Find the latest timestamp in the dummy data
latest_dummy_timestamp = global_dummy_apr_df['timestamp'].max() if not global_dummy_apr_df.empty else None
# If the real data has newer timestamps, generate more dummy data
if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
logger.info("Generating additional dummy APR data for new timestamps...")
# Create a temporary dataframe with only the latest real data
temp_df = global_df[global_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_df
# Generate dummy data for the new timestamps
new_dummy_data = generate_continuous_random_data(temp_df)
# Only keep APR data
if not new_dummy_data.empty:
new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'APR']
logger.info(f"Generated {len(new_dummy_data)} additional dummy APR data points")
# Append the new dummy data to the existing dummy data
global_dummy_apr_df = pd.concat([global_dummy_apr_df, new_dummy_data], ignore_index=True)
else:
logger.info("No new timestamps in real data, using existing dummy APR data")
# Combine real and dummy APR data
if not global_dummy_apr_df.empty:
apr_dummy_count = len(global_dummy_apr_df)
global_df = pd.concat([global_df, global_dummy_apr_df], ignore_index=True)
logger.info(f"Added {apr_dummy_count} dummy APR data points to the dataset")
# Generate dummy ROI data only if needed
if not global_roi_df.empty:
# Check if we already have dummy data
if global_dummy_roi_df is None:
# First time - generate all dummy data
logger.info("Generating initial dummy ROI data...")
global_dummy_roi_df = generate_continuous_random_data(global_roi_df)
# Only keep ROI data
if not global_dummy_roi_df.empty:
global_dummy_roi_df = global_dummy_roi_df[global_dummy_roi_df['metric_type'] == 'ROI']
logger.info(f"Generated {len(global_dummy_roi_df)} initial dummy ROI data points")
else:
# We already have dummy data - check if we need to generate more
# Find the latest timestamp in the real data
latest_real_timestamp = global_roi_df['timestamp'].max()
# Find the latest timestamp in the dummy data
latest_dummy_timestamp = global_dummy_roi_df['timestamp'].max() if not global_dummy_roi_df.empty else None
# If the real data has newer timestamps, generate more dummy data
if latest_dummy_timestamp is None or latest_real_timestamp > latest_dummy_timestamp:
logger.info("Generating additional dummy ROI data for new timestamps...")
# Create a temporary dataframe with only the latest real data
temp_df = global_roi_df[global_roi_df['timestamp'] > latest_dummy_timestamp] if latest_dummy_timestamp else global_roi_df
# Generate dummy data for the new timestamps
new_dummy_data = generate_continuous_random_data(temp_df)
# Only keep ROI data
if not new_dummy_data.empty:
new_dummy_data = new_dummy_data[new_dummy_data['metric_type'] == 'ROI']
logger.info(f"Generated {len(new_dummy_data)} additional dummy ROI data points")
# Append the new dummy data to the existing dummy data
global_dummy_roi_df = pd.concat([global_dummy_roi_df, new_dummy_data], ignore_index=True)
else:
logger.info("No new timestamps in real data, using existing dummy ROI data")
# Combine real and dummy ROI data
if not global_dummy_roi_df.empty:
roi_dummy_count = len(global_dummy_roi_df)
global_roi_df = pd.concat([global_roi_df, global_dummy_roi_df], ignore_index=True)
logger.info(f"Added {roi_dummy_count} dummy ROI data points to the dataset")
# Log the resulting dataframe
logger.info(f"Created DataFrame with {len(global_df)} rows (including dummy data)")
logger.info(f"DataFrame columns: {global_df.columns.tolist()}")
logger.info(f"APR statistics: min={global_df['apr'].min()}, max={global_df['apr'].max()}, mean={global_df['apr'].mean()}")
# Log adjusted APR statistics if available
if 'adjusted_apr' in global_df.columns and global_df['adjusted_apr'].notna().any():
logger.info(f"Adjusted APR statistics: min={global_df['adjusted_apr'].min()}, max={global_df['adjusted_apr'].max()}, mean={global_df['adjusted_apr'].mean()}")
logger.info(f"Number of records with adjusted_apr: {global_df['adjusted_apr'].notna().sum()} out of {len(global_df)}")
# Log the difference between APR and adjusted APR
valid_rows = global_df[global_df['adjusted_apr'].notna()]
if not valid_rows.empty:
avg_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).mean()
max_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).max()
min_diff = (valid_rows['apr'] - valid_rows['adjusted_apr']).min()
logger.info(f"APR vs. adjusted APR difference: avg={avg_diff:.2f}, min={min_diff:.2f}, max={max_diff:.2f}")
# All values are APR type (excluding zero and -100 values)
logger.info("All values are APR type (excluding zero and -100 values)")
logger.info(f"Agents count: {global_df['agent_name'].value_counts().to_dict()}")
# Log the entire dataframe for debugging
logger.debug("Final DataFrame contents:")
for idx, row in global_df.iterrows():
logger.debug(f"Row {idx}: {row.to_dict()}")
# Add this at the end, right before returning
logger.info("Analyzing adjusted_apr data availability...")
log_adjusted_apr_availability(global_df)
return global_df, global_roi_df
except requests.exceptions.RequestException as e:
logger.error(f"API request error: {e}")
global_df = pd.DataFrame([])
global_roi_df = pd.DataFrame([])
return global_df, global_roi_df
except Exception as e:
logger.error(f"Error fetching APR data: {e}")
logger.exception("Exception traceback:")
global_df = pd.DataFrame([])
global_roi_df = pd.DataFrame([])
return global_df, global_roi_df
def log_adjusted_apr_availability(df):
"""
Analyzes and logs detailed information about adjusted_apr data availability.
Args:
df: DataFrame containing the APR data with adjusted_apr column
"""
if df.empty or 'adjusted_apr' not in df.columns:
logger.warning("No adjusted_apr data available for analysis")
return
# Get only rows with valid adjusted_apr values
has_adjusted = df[df['adjusted_apr'].notna()]
if has_adjusted.empty:
logger.warning("No valid adjusted_apr values found in the dataset")
return
# 1. When did adjusted_apr data start?
first_adjusted = has_adjusted['timestamp'].min()
last_adjusted = has_adjusted['timestamp'].max()
logger.info(f"ADJUSTED APR SUMMARY: First data point: {first_adjusted}")
logger.info(f"ADJUSTED APR SUMMARY: Last data point: {last_adjusted}")
logger.info(f"ADJUSTED APR SUMMARY: Data spans {(last_adjusted - first_adjusted).days} days")
# Calculate coverage percentage
total_records = len(df)
records_with_adjusted = len(has_adjusted)
coverage_pct = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
logger.info(f"ADJUSTED APR SUMMARY: {records_with_adjusted} out of {total_records} records have adjusted_apr ({coverage_pct:.2f}%)")
# 2. How many agents are providing adjusted_apr?
agents_with_adjusted = has_adjusted['agent_id'].unique()
logger.info(f"ADJUSTED APR SUMMARY: {len(agents_with_adjusted)} agents providing adjusted_apr")
logger.info(f"ADJUSTED APR SUMMARY: Agents providing adjusted_apr: {list(agents_with_adjusted)}")
# 3. May 10th cutoff analysis
may_10_2025 = datetime(2025, 5, 10)
before_cutoff = df[df['timestamp'] < may_10_2025]
after_cutoff = df[df['timestamp'] >= may_10_2025]
if not before_cutoff.empty and not after_cutoff.empty:
before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
before_pct = (before_with_adjusted / len(before_cutoff)) * 100
after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
after_pct = (after_with_adjusted / len(after_cutoff)) * 100
logger.info(f"ADJUSTED APR SUMMARY: Before May 10th: {before_with_adjusted}/{len(before_cutoff)} records with adjusted_apr ({before_pct:.2f}%)")
logger.info(f"ADJUSTED APR SUMMARY: After May 10th: {after_with_adjusted}/{len(after_cutoff)} records with adjusted_apr ({after_pct:.2f}%)")
# Check which agents had data before and after
agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
missing_after = agents_before - agents_after
if missing_after:
logger.warning(f"ADJUSTED APR SUMMARY: {len(missing_after)} agents stopped providing adjusted_apr after May 10th: {list(missing_after)}")
new_after = agents_after - agents_before
if new_after:
logger.info(f"ADJUSTED APR SUMMARY: {len(new_after)} agents started providing adjusted_apr after May 10th: {list(new_after)}")
# 4. Find date ranges for missing adjusted_apr
# Group by agent to analyze per-agent data availability
logger.info("=== DETAILED AGENT ANALYSIS ===")
for agent_id in df['agent_id'].unique():
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
# Get the valid adjusted_apr values for this agent
agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
if agent_adjusted.empty:
logger.info(f"Agent {agent_name} (ID: {agent_id}): No adjusted_apr data available")
continue
# Get the date range for this agent's data
agent_start = agent_data['timestamp'].min()
agent_end = agent_data['timestamp'].max()
# Get the date range for adjusted_apr data
adjusted_start = agent_adjusted['timestamp'].min()
adjusted_end = agent_adjusted['timestamp'].max()
total_agent_records = len(agent_data)
agent_with_adjusted = len(agent_adjusted)
coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
logger.info(f"Agent {agent_name} (ID: {agent_id}): {agent_with_adjusted}/{total_agent_records} records with adjusted_apr ({coverage_pct:.2f}%)")
logger.info(f"Agent {agent_name} (ID: {agent_id}): APR data from {agent_start} to {agent_end}")
logger.info(f"Agent {agent_name} (ID: {agent_id}): Adjusted APR data from {adjusted_start} to {adjusted_end}")
# Calculate if this agent had data before/after May 10th
if not before_cutoff.empty and not after_cutoff.empty:
agent_before = before_cutoff[before_cutoff['agent_id'] == agent_id]
agent_after = after_cutoff[after_cutoff['agent_id'] == agent_id]
has_before = not agent_before.empty and agent_before['adjusted_apr'].notna().any()
has_after = not agent_after.empty and agent_after['adjusted_apr'].notna().any()
if has_before and not has_after:
last_date = agent_before[agent_before['adjusted_apr'].notna()]['timestamp'].max()
logger.warning(f"Agent {agent_name} (ID: {agent_id}): Stopped providing adjusted_apr after May 10th. Last data point: {last_date}")
elif not has_before and has_after:
first_date = agent_after[agent_after['adjusted_apr'].notna()]['timestamp'].min()
logger.info(f"Agent {agent_name} (ID: {agent_id}): Started providing adjusted_apr after May 10th. First data point: {first_date}")
# Check for gaps in adjusted_apr (periods of 24+ hours without data)
if len(agent_adjusted) < 2:
continue
# Sort by timestamp
sorted_data = agent_adjusted.sort_values('timestamp')
# Calculate time differences between consecutive data points
time_diffs = sorted_data['timestamp'].diff()
# Find gaps larger than 24 hours
gaps = sorted_data[time_diffs > pd.Timedelta(hours=24)]
if not gaps.empty:
logger.info(f"Agent {agent_name} (ID: {agent_id}): Found {len(gaps)} gaps in adjusted_apr data")
# Log the gaps
for i, row in gaps.iterrows():
# Find the previous timestamp before the gap
prev_idx = sorted_data.index.get_loc(i) - 1
prev_time = sorted_data.iloc[prev_idx]['timestamp'] if prev_idx >= 0 else None
if prev_time:
gap_start = prev_time
gap_end = row['timestamp']
gap_duration = gap_end - gap_start
logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
def generate_apr_visualizations():
"""Generate APR visualizations with CSV-first approach for Hugging Face Space deployment"""
global global_df
# CSV-FIRST APPROACH: Try to load from CSV first
logger.info("Attempting to load APR data from CSV files...")
df, csv_file = load_apr_data_from_csv()
if not df.empty:
logger.info(f"Successfully loaded APR data from CSV: {len(df)} records")
global_df = df
# Create visualizations using CSV data
logger.info("Creating APR visualizations from CSV data...")
combined_fig = create_combined_time_series_graph(df)
return combined_fig, csv_file
# FALLBACK: If CSV not available, try API
logger.info("CSV data not available, falling back to API...")
try:
df, _ = fetch_apr_data_from_db()
# If we got no data at all, return placeholder figures
if df.empty:
logger.info("No APR data available from API either. Using fallback visualization.")
# Create empty visualizations with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No APR data available",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
# Save as static file for reference
fig.write_html("optimus_apr_combined_graph.html")
fig.write_image("optimus_apr_combined_graph.png")
csv_file = None
return fig, csv_file
# Apply preprocessing to fix APR and ROI values
logger.info("Applying preprocessing to fix APR and ROI values...")
df = fix_apr_and_roi(df) # Apply preprocessing
global_df = df
# Save preprocessed data to CSV before creating visualizations
logger.info("Saving preprocessed APR data to CSV...")
csv_file = save_to_csv(df)
# Create visualizations using the saved CSV data
logger.info("Creating APR visualizations from preprocessed data...")
combined_fig = create_combined_time_series_graph(df)
return combined_fig, csv_file
except Exception as e:
logger.error(f"Error fetching APR data from API: {e}")
# Return error visualization
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text=f"Error loading data: {str(e)}",
font=dict(size=16, color="red"),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
def generate_roi_visualizations():
"""Generate ROI visualizations with CSV-first approach for Hugging Face Space deployment"""
global global_roi_df
# CSV-FIRST APPROACH: Try to load from CSV first
logger.info("Attempting to load ROI data from CSV files...")
df_roi, csv_file = load_roi_data_from_csv()
if not df_roi.empty:
logger.info(f"Successfully loaded ROI data from CSV: {len(df_roi)} records")
global_roi_df = df_roi
# Create visualizations using CSV data
logger.info("Creating ROI visualizations from CSV data...")
combined_fig = create_combined_roi_time_series_graph(df_roi)
return combined_fig, csv_file
# FALLBACK: If CSV not available, try API
logger.info("CSV data not available, falling back to API...")
try:
# Fetch data from database if not already fetched
if global_roi_df is None or global_roi_df.empty:
_, df_roi = fetch_apr_data_from_db()
else:
df_roi = global_roi_df
# If we got no data at all, return placeholder figures
if df_roi.empty:
logger.info("No ROI data available from API either. Using fallback visualization.")
# Create empty visualizations with a message using Plotly
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text="No ROI data available",
font=dict(size=20),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
# Save as static file for reference
fig.write_html("optimus_roi_graph.html")
fig.write_image("optimus_roi_graph.png")
csv_file = None
return fig, csv_file
# Set global_roi_df for access by other functions
global_roi_df = df_roi
# Save preprocessed ROI data to CSV before creating visualizations
logger.info("Saving preprocessed ROI data to CSV...")
csv_file = save_roi_to_csv(df_roi)
# Create visualizations using the saved CSV data
logger.info("Creating ROI visualizations from preprocessed data...")
combined_fig = create_combined_roi_time_series_graph(df_roi)
return combined_fig, csv_file
except Exception as e:
logger.error(f"Error fetching ROI data from API: {e}")
# Return error visualization
fig = go.Figure()
fig.add_annotation(
x=0.5, y=0.5,
text=f"Error loading data: {str(e)}",
font=dict(size=16, color="red"),
showarrow=False
)
fig.update_layout(
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
)
return fig, None
def aggregate_daily_data(df, metric_column):
"""
Aggregate data by date and agent, taking the mean of values within each day.
Args:
df: DataFrame with timestamp, agent_id, and metric data
metric_column: Name of the metric column ('apr' or 'roi')
Returns:
DataFrame with daily aggregated data per agent
"""
if df.empty:
return df
# Convert timestamp to date only (ignore time)
df = df.copy()
df['date'] = df['timestamp'].dt.date
# Group by date and agent, calculate mean for each day
daily_agent_data = df.groupby(['date', 'agent_id']).agg({
metric_column: 'mean',
'agent_name': 'first',
'is_dummy': 'first',
'metric_type': 'first'
}).reset_index()
# Convert date back to datetime for plotting
daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column}")
return daily_agent_data
def calculate_daily_medians(daily_agent_data, metric_column):
"""
Calculate daily medians across all agents for each date.
Args:
daily_agent_data: DataFrame with daily aggregated data per agent
metric_column: Name of the metric column ('apr' or 'roi')
Returns:
DataFrame with daily median values
"""
if daily_agent_data.empty:
return daily_agent_data
# For each date, calculate median across all agents (excluding missing data)
daily_medians = daily_agent_data.groupby('date').agg({
metric_column: 'median'
}).reset_index()
# Convert date back to datetime for plotting
daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
return daily_medians
def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
"""
Calculate moving average of daily medians using a specified time window.
Args:
daily_medians: DataFrame with daily median values
metric_column: Name of the metric column ('apr' or 'roi')
window_days: Number of days for the moving average window
Returns:
DataFrame with moving average values added
"""
if daily_medians.empty:
return daily_medians
# Sort by timestamp
daily_medians = daily_medians.sort_values('timestamp').copy()
# Initialize moving average column
daily_medians['moving_avg'] = None
# Define the time window
time_window = pd.Timedelta(days=window_days)
logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
# Calculate moving averages for each timestamp
for i, row in daily_medians.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all median values within the time window
window_data = daily_medians[
(daily_medians['timestamp'] >= window_start) &
(daily_medians['timestamp'] <= current_time)
]
# Calculate the average of medians for the time window
if not window_data.empty:
daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
else:
# If no data points in the window, use the current value
daily_medians.at[i, 'moving_avg'] = row[metric_column]
logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
return daily_medians
def create_combined_roi_time_series_graph(df):
"""Create a time series graph showing daily median ROI values with 7-day moving average"""
if len(df) == 0:
logger.error("No data to plot combined ROI graph")
fig = go.Figure()
fig.add_annotation(
text="No ROI data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Define fixed start date (May 15, 2025)
fixed_start_date = datetime(2025, 5, 15)
logger.info(f"Using fixed start date for ROI runtime calculation: {fixed_start_date}")
# Calculate runtime for each agent from fixed start date
agent_runtimes = {}
for agent_id in df['agent_id'].unique():
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
last_report = agent_data['timestamp'].max()
runtime_days = (last_report - fixed_start_date).total_seconds() / (24 * 3600) # Convert to days
agent_runtimes[agent_id] = {
'agent_name': agent_name,
'last_report': last_report,
'runtime_days': runtime_days
}
# Calculate average runtime
avg_runtime = sum(data['runtime_days'] for data in agent_runtimes.values()) / len(agent_runtimes) if agent_runtimes else 0
logger.info(f"Average agent runtime from fixed start date: {avg_runtime:.2f} days")
# Log individual agent runtimes for debugging
for agent_id, data in agent_runtimes.items():
logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
# IMPORTANT: Force data types to ensure consistency
df['roi'] = df['roi'].astype(float) # Ensure ROI is float
# Convert ROI values to percentages (multiply by 100)
df['roi'] = df['roi'] * 100
df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Use the actual start date from the data instead of a fixed date
x_start_date = min_time
# CRITICAL: Log the exact dataframe we're using for plotting to help debug
logger.info(f"ROI Graph data - shape: {df.shape}, columns: {df.columns}")
logger.info(f"ROI Graph data - unique agents: {df['agent_name'].unique().tolist()}")
logger.info(f"ROI Graph data - min ROI: {df['roi'].min()}, max ROI: {df['roi'].max()}")
# Export full dataframe to CSV for debugging
debug_csv = "debug_roi_data.csv"
df.to_csv(debug_csv)
logger.info(f"Exported ROI graph data to {debug_csv} for debugging")
# Create Plotly figure in a clean state
fig = go.Figure()
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Add background shapes for positive and negative regions
# Add shape for positive ROI region (above zero)
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=df['roi'].max() * 1.1 if not df.empty else 10, # Dynamic positive value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add shape for negative ROI region (below zero)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=df['roi'].min() * 1.1 if not df.empty else -10, y1=0, # Dynamic negative value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add zero line
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Filter out outliers (ROI values above 200% or below -200%)
outlier_data = df[(df['roi'] > 200) | (df['roi'] < -200)].copy()
df_filtered = df[(df['roi'] <= 200) & (df['roi'] >= -200)].copy()
# Log the outliers for better debugging
if len(outlier_data) > 0:
excluded_count = len(outlier_data)
logger.info(f"Excluded {excluded_count} data points with outlier ROI values (>200% or <-200%)")
# Group outliers by agent for detailed logging
outlier_agents = outlier_data.groupby('agent_name')
for agent_name, agent_outliers in outlier_agents:
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
for idx, row in agent_outliers.iterrows():
logger.info(f" - ROI: {row['roi']}, timestamp: {row['timestamp']}")
# Use the filtered data for all subsequent operations
df = df_filtered
# NEW APPROACH: Daily aggregation and median calculation
# Step 1: Aggregate data daily per agent (mean of values within each day)
daily_agent_data = aggregate_daily_data(df, 'roi')
# Step 2: Calculate daily medians across all agents
daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
# Step 3: Calculate 7-day moving average of daily medians
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
# Find the last date where we have valid moving average data
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
# If we don't have any valid moving average data, use the max time from the original data
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
logger.info(f"Using last valid date for graph: {last_valid_date}")
# Plot individual agent daily data points with agent names in hover, but limit display for scalability
if not daily_agent_data.empty:
# Group by agent to use different colors for each agent
unique_agents = daily_agent_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
# Create a color map for agents
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent to determine which are most active
agent_counts = daily_agent_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
# Add daily aggregated data points for each agent, but only make top agents visible by default
for agent_name in unique_agents:
agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
# Explicitly convert to Python lists
x_values = agent_data['timestamp'].tolist()
y_values = agent_data['roi'].tolist()
# Change default visibility to False to hide all agent data points
is_visible = False
# Add data points as markers for ROI
fig.add_trace(
go.Scatter(
x=x_values,
y=y_values,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='circle',
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (Daily ROI)',
hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
# Add ROI 7-day moving average of daily medians as a smooth line
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
# Create hover template for the ROI moving average line
hover_data_roi = []
for idx, row in daily_medians_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only date for daily data
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
# Calculate number of active agents on this date
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
hover_data_roi.append(
f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_ma,
y=y_values_ma,
mode='lines', # Only lines for moving average
line=dict(color='blue', width=3, shape='spline', smoothing=1.3), # Smooth curved line like APR
name='Median ROI (7d window)',
hovertext=hover_data_roi,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
# Update layout with average runtime information in the title
fig.update_layout(
title=dict(
text=f"Optimus Agents ROI (over avg. {avg_runtime:.1f} days runtime)",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=600, # Reduced height for better fit on smaller screens
autosize=True, # Enable auto-sizing for responsiveness
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem"
),
margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
hovermode="closest"
)
# Add single annotation for y-axis
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=0, # Center of the y-axis
xref="paper",
yref="y",
text="ROI [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Update layout for legend
fig.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem",
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Update y-axis with autoscaling for ROI
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use autoscaling instead of fixed range
autorange=True, # Enable autoscaling to fit the data
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and autoscaling
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use autoscaling instead of fixed range
autorange=True, # Enable autoscaling
tickformat="%b %d", # Simplified date format without time
tickangle=-30, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
try:
# Save the figure
graph_file = "optimus_roi_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "optimus_roi_graph.png"
try:
fig.write_image(img_file)
logger.info(f"ROI graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving ROI image: {e}")
logger.info(f"ROI graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
# If the complex graph approach fails, create a simpler one
logger.error(f"Error creating advanced ROI graph: {e}")
logger.info("Falling back to Simpler ROI graph")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Add background shapes with dynamic values
simple_fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=df['roi'].max() * 1.1 if not df.empty else 10, # Dynamic positive value based on data
x0=min_time, x1=max_time,
layer="below"
)
simple_fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=df['roi'].min() * 1.1 if not df.empty else -10, y1=0, # Dynamic negative value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add background shapes with dynamic values
simple_fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=df['roi'].max() * 1.1 if not df.empty else 10, # Dynamic positive value based on data
x0=min_time, x1=max_time,
layer="below"
)
simple_fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=df['roi'].min() * 1.1 if not df.empty else -10, y1=0, # Dynamic negative value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Simply plot the average ROI data with moving average
if not avg_roi_data.empty:
# Add moving average as a line
simple_fig.add_trace(
go.Scatter(
x=avg_roi_data_with_ma['timestamp'],
y=avg_roi_data_with_ma['moving_avg'],
mode='lines',
name='Average ROI (3d window)',
line=dict(width=2, color='blue') # Thinner line
)
)
# Simplified layout with adjusted y-axis range
simple_fig.update_layout(
title=dict(
text="Optimus Agents ROI",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None,
yaxis_title=None,
template="plotly_white",
height=600,
autosize=True,
margin=dict(r=30, l=120, t=40, b=50)
)
# Update y-axis with fixed range for ROI (-10 to 10)
simple_fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
range=[-10, 10], # Set fixed range from -10 to 10
tickformat=".2f",
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and autoscaling
simple_fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
autorange=True, # Enable autoscaling
tickformat="%b %d",
tickangle=-30,
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold")
)
# Save the figure
graph_file = "optimus_roi_graph.html"
simple_fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Return the simple figure
return simple_fig
def save_roi_to_csv(df):
"""Save the ROI data DataFrame to a CSV file and return the file path"""
if df.empty:
logger.error("No ROI data to save to CSV")
return None
# Define the CSV file path
csv_file = "optimus_roi_values.csv"
# Save to CSV
df.to_csv(csv_file, index=False)
logger.info(f"ROI data saved to {csv_file}")
return csv_file
def create_time_series_graph_per_agent(df):
"""Create a time series graph for each agent using Plotly"""
# Get unique agents
unique_agents = df['agent_id'].unique()
if len(unique_agents) == 0:
logger.error("No agent data to plot")
fig = go.Figure()
fig.add_annotation(
text="No agent data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# Create a subplot figure for each agent
fig = make_subplots(rows=len(unique_agents), cols=1,
subplot_titles=[f"Agent: {df[df['agent_id'] == agent_id]['agent_name'].iloc[0]}"
for agent_id in unique_agents],
vertical_spacing=0.1)
# Plot data for each agent
for i, agent_id in enumerate(unique_agents):
agent_data = df[df['agent_id'] == agent_id].copy()
agent_name = agent_data['agent_name'].iloc[0]
row = i + 1
# Add zero line to separate APR and Performance
fig.add_shape(
type="line", line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0, x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1
)
# Add background colors with dynamic values
fig.add_shape(
type="rect", fillcolor="rgba(230, 243, 255, 0.3)", line=dict(width=0),
y0=0, y1=agent_data['apr'].max() * 1.1 if not agent_data.empty else 10,
x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1, layer="below"
)
fig.add_shape(
type="rect", fillcolor="rgba(255, 230, 230, 0.3)", line=dict(width=0),
y0=agent_data['apr'].min() * 1.1 if not agent_data.empty else -10, y1=0,
x0=agent_data['timestamp'].min(), x1=agent_data['timestamp'].max(),
row=row, col=1, layer="below"
)
# Create separate dataframes for different data types
apr_data = agent_data[agent_data['metric_type'] == 'APR']
perf_data = agent_data[agent_data['metric_type'] == 'Performance']
# Sort all data by timestamp for the line plots
combined_agent_data = agent_data.sort_values('timestamp')
# Add main line connecting all points
fig.add_trace(
go.Scatter(
x=combined_agent_data['timestamp'],
y=combined_agent_data['apr'],
mode='lines',
line=dict(color='purple', width=2),
name=f'{agent_name}',
legendgroup=agent_name,
showlegend=(i == 0), # Only show in legend once
hovertemplate='Time: %{x}<br>Value: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Add scatter points for APR values
if not apr_data.empty:
fig.add_trace(
go.Scatter(
x=apr_data['timestamp'],
y=apr_data['apr'],
mode='markers',
marker=dict(color='blue', size=10, symbol='circle'),
name='APR',
legendgroup='APR',
showlegend=(i == 0),
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Add scatter points for Performance values
if not perf_data.empty:
fig.add_trace(
go.Scatter(
x=perf_data['timestamp'],
y=perf_data['apr'],
mode='markers',
marker=dict(color='red', size=10, symbol='square'),
name='Performance',
legendgroup='Performance',
showlegend=(i == 0),
hovertemplate='Time: %{x}<br>Performance: %{y:.2f}<extra></extra>'
),
row=row, col=1
)
# Update axes
fig.update_xaxes(title_text="Time", row=row, col=1)
fig.update_yaxes(title_text="Value", row=row, col=1, gridcolor='rgba(0,0,0,0.1)')
# Update layout
fig.update_layout(
height=400 * len(unique_agents),
width=1000,
title_text="APR and Performance Values per Agent",
template="plotly_white",
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
),
margin=dict(r=20, l=20, t=30, b=20),
hovermode="closest"
)
# Save the figure (still useful for reference)
graph_file = "optimus_apr_per_agent_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "optimus_apr_per_agent_graph.png"
fig.write_image(img_file)
logger.info(f"Per-agent graph saved to {graph_file} and {img_file}")
# Return the figure object for direct use in Gradio
return fig
def write_debug_info(df, fig):
"""Minimal debug info function"""
try:
# Just log minimal information
logger.debug(f"Graph created with {len(df)} data points and {len(fig.data)} traces")
return True
except Exception as e:
logger.error(f"Error writing debug info: {e}")
return False
def create_combined_time_series_graph(df):
"""Create a time series graph showing average APR values across all agents"""
if len(df) == 0:
logger.error("No data to plot combined graph")
fig = go.Figure()
fig.add_annotation(
text="No data available",
x=0.5, y=0.5,
showarrow=False, font=dict(size=20)
)
return fig
# IMPORTANT: Force data types to ensure consistency
df['apr'] = df['apr'].astype(float) # Ensure APR is float
df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
# Get min and max time for shapes
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Use the actual start date from the data instead of a fixed date
x_start_date = min_time
# CRITICAL: Log the exact dataframe we're using for plotting to help debug
logger.info(f"Graph data - shape: {df.shape}, columns: {df.columns}")
logger.info(f"Graph data - unique agents: {df['agent_name'].unique().tolist()}")
logger.info("Graph data - all positive APR values only")
logger.info(f"Graph data - min APR: {df['apr'].min()}, max APR: {df['apr'].max()}")
# Export full dataframe to CSV for debugging
debug_csv = "debug_graph_data.csv"
df.to_csv(debug_csv)
logger.info(f"Exported graph data to {debug_csv} for debugging")
# Write detailed data report
with open("debug_graph_data_report.txt", "w") as f:
f.write("==== GRAPH DATA REPORT ====\n\n")
f.write(f"Total data points: {len(df)}\n")
f.write(f"Timestamp range: {df['timestamp'].min()} to {df['timestamp'].max()}\n\n")
# Output per-agent details
unique_agents = df['agent_id'].unique()
f.write(f"Number of agents: {len(unique_agents)}\n\n")
for agent_id in unique_agents:
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
f.write(f"== Agent: {agent_name} (ID: {agent_id}) ==\n")
f.write(f" Total data points: {len(agent_data)}\n")
apr_data = agent_data[agent_data['metric_type'] == 'APR']
f.write(f" APR data points: {len(apr_data)}\n")
if not apr_data.empty:
f.write(f" APR values: {apr_data['apr'].tolist()}\n")
f.write(f" APR timestamps: {[ts.strftime('%Y-%m-%d %H:%M:%S') if ts is not None else 'None' for ts in apr_data['timestamp']]}\n")
f.write("\n")
logger.info("Generated detailed graph data report")
# ENSURE THERE ARE NO CONFLICTING AXES OR TRACES
# Create Plotly figure in a clean state
fig = go.Figure()
# Enable autoscaling instead of fixed ranges
logger.info("Using autoscaling for axes ranges")
# Add background shapes for APR and Performance regions
min_time = df['timestamp'].min()
max_time = df['timestamp'].max()
# Add shape for positive APR region (above zero)
fig.add_shape(
type="rect",
fillcolor="rgba(230, 243, 255, 0.3)",
line=dict(width=0),
y0=0, y1=apr_data['apr'].max() * 1.1 if not apr_data.empty else 10, # Dynamic positive value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add shape for negative APR region (below zero)
fig.add_shape(
type="rect",
fillcolor="rgba(255, 230, 230, 0.3)",
line=dict(width=0),
y0=apr_data['apr'].min() * 1.1 if not apr_data.empty else -10, y1=0, # Dynamic negative value based on data
x0=min_time, x1=max_time,
layer="below"
)
# Add zero line
fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# MODIFIED: Calculate average APR values across all agents for each timestamp
# Filter for APR data only
apr_data = df[df['metric_type'] == 'APR'].copy()
# Filter out outliers (APR values above 200 or below -200)
outlier_data = apr_data[(apr_data['apr'] > 200) | (apr_data['apr'] < -200)].copy()
apr_data_filtered = apr_data[(apr_data['apr'] <= 200) & (apr_data['apr'] >= -200)].copy()
# Log the outliers for better debugging
if len(outlier_data) > 0:
excluded_count = len(outlier_data)
logger.info(f"Excluded {excluded_count} data points with outlier APR values (>200 or <-200)")
# Group outliers by agent for detailed logging
outlier_agents = outlier_data.groupby('agent_name')
for agent_name, agent_outliers in outlier_agents:
logger.info(f"Agent '{agent_name}' has {len(agent_outliers)} outlier values:")
for idx, row in agent_outliers.iterrows():
logger.info(f" - APR: {row['apr']}, timestamp: {row['timestamp']}")
# Use the filtered data for all subsequent operations
apr_data = apr_data_filtered
# NEW APPROACH: Daily aggregation and median calculation for APR
# Step 1: Aggregate data daily per agent (mean of values within each day)
daily_agent_data = aggregate_daily_data(apr_data, 'apr')
# Step 2: Calculate daily medians across all agents
daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
# Step 3: Calculate 7-day moving average of daily medians
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
# Also handle adjusted APR if it exists
daily_medians_adjusted = None
daily_medians_adjusted_with_ma = None
if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
# Create a separate dataset for adjusted APR
apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
if not apr_data_with_adjusted.empty:
# Step 1: Aggregate adjusted APR data daily per agent
daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
# Step 2: Calculate daily medians for adjusted APR
daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
# Step 3: Calculate 7-day moving average of daily medians for adjusted APR
daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
if daily_medians_adjusted is not None:
logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
# This old moving average calculation is no longer needed with the new daily median approach
# Find the last date where we have valid moving average data
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
# Find the last date where we have valid adjusted moving average data
last_valid_adj_ma_date = None
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
# Determine the last valid date for either moving average
last_valid_date = last_valid_ma_date
if last_valid_adj_ma_date is not None:
last_valid_date = max(last_valid_date, last_valid_adj_ma_date) if last_valid_date is not None else last_valid_adj_ma_date
# If we don't have any valid moving average data, use the max time from the original data
if last_valid_date is None:
last_valid_date = df['timestamp'].max()
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
logger.info(f"Last valid adjusted moving average date: {last_valid_adj_ma_date}")
logger.info(f"Using last valid date for graph: {last_valid_date}")
# Plot individual agent data points with agent names in hover, but limit display for scalability
if not apr_data.empty:
# Group by agent to use different colors for each agent
unique_agents = apr_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
# Create a color map for agents
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent to determine which are most active
agent_counts = apr_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
# Add data points for each agent, but only make top agents visible by default
for agent_name in unique_agents:
agent_data = apr_data[apr_data['agent_name'] == agent_name]
# Explicitly convert to Python lists
x_values = agent_data['timestamp'].tolist()
y_values = agent_data['apr'].tolist()
# Change default visibility to False to hide all agent data points
is_visible = False
# Add data points as markers for APR
fig.add_trace(
go.Scatter(
x=x_values,
y=y_values,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='circle',
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (APR)',
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added APR data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
# Add data points for adjusted APR if it exists
if 'adjusted_apr' in agent_data.columns and agent_data['adjusted_apr'].notna().any():
x_values_adj = agent_data['timestamp'].tolist()
y_values_adj = agent_data['adjusted_apr'].tolist()
fig.add_trace(
go.Scatter(
x=x_values_adj,
y=y_values_adj,
mode='markers', # Only markers for original data
marker=dict(
color=color_map[agent_name],
symbol='diamond', # Different symbol for adjusted APR
size=10,
line=dict(width=1, color='black')
),
name=f'Agent: {agent_name} (Adjusted APR)',
hovertemplate='Time: %{x}<br>Adjusted APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # All agents hidden by default
)
)
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
# Add APR 7-day moving average of daily medians as a smooth line
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
# Create hover template for the APR moving average line
hover_data_apr = []
for idx, row in daily_medians_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only date for daily data
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
# Calculate number of active agents on this date
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
hover_data_apr.append(
f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_ma,
y=y_values_ma,
mode='lines', # Only lines for moving average
line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line
name='Median APR (7d window)',
hovertext=hover_data_apr,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
# Add adjusted APR 7-day moving average line if it exists
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
# Create hover template for the adjusted APR moving average line
hover_data_adj = []
for idx, row in daily_medians_adjusted_with_ma.iterrows():
timestamp = row['timestamp']
# Format timestamp to show only date for daily data
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
# Calculate number of active agents on this date
active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
hover_data_adj.append(
f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
)
fig.add_trace(
go.Scatter(
x=x_values_adj_ma,
y=y_values_adj_ma,
mode='lines', # Only lines for moving average
line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line
name='Median Adjusted APR (7d window)',
hovertext=hover_data_adj,
hoverinfo='text',
visible=True # Visible by default
)
)
logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
else:
logger.warning("No adjusted APR moving average data available to plot")
# Removed cumulative APR as requested
logger.info("Cumulative APR graph line has been removed as requested")
# Update layout - use simple boolean values everywhere
# Make chart responsive instead of fixed width
fig.update_layout(
title=dict(
text="Optimus Agents",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
template="plotly_white",
height=600, # Reduced height for better fit on smaller screens
# Removed fixed width to enable responsiveness
autosize=True, # Enable auto-sizing for responsiveness
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem"
),
margin=dict(r=30, l=120, t=40, b=50), # Increased bottom margin for x-axis title
hovermode="closest"
)
# Add annotations for y-axis regions
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Remove x-axis title annotation
# fig.add_annotation(
# x=0.5, # Center of the x-axis
# y=-0.15, # Below the x-axis
# xref="paper",
# yref="paper",
# text="Date",
# showarrow=False,
# font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
# align="center"
# )
# Update layout for legend
fig.update_layout(
legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
groupclick="toggleitem",
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Update y-axis with autoscaling
fig.update_yaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use autoscaling instead of fixed range
autorange=True, # Enable autoscaling to fit the data
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove the built-in axis title since we're using annotations
)
# Update x-axis with better formatting and autoscaling
fig.update_xaxes(
showgrid=True,
gridwidth=1,
gridcolor='rgba(0,0,0,0.1)',
# Use autoscaling instead of fixed range
autorange=True, # Enable autoscaling
tickformat="%b %d", # Simplified date format without time
tickangle=-30, # Angle the labels for better readability
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
# SIMPLIFIED APPROACH: Do a direct plot without markers for comparison
# This creates a simple, reliable fallback plot if the advanced one fails
try:
# Write detailed debug information before saving the figure
write_debug_info(df, fig)
# Save the figure (still useful for reference)
graph_file = "optimus_apr_combined_graph.html"
fig.write_html(graph_file, include_plotlyjs='cdn', full_html=False)
# Also save as image for compatibility
img_file = "optimus_apr_combined_graph.png"
try:
fig.write_image(img_file)
logger.info(f"Combined graph saved to {graph_file} and {img_file}")
except Exception as e:
logger.error(f"Error saving image: {e}")
logger.info(f"Combined graph saved to {graph_file} only")
# Return the figure object for direct use in Gradio
return fig
except Exception as e:
# If the complex graph approach fails, create a simpler one
logger.error(f"Error creating advanced graph: {e}")
logger.info("Falling back to Simpler graph")
# Create a simpler graph as fallback
simple_fig = go.Figure()
# Add zero line
simple_fig.add_shape(
type="line",
line=dict(dash="solid", width=1.5, color="black"),
y0=0, y1=0,
x0=min_time, x1=max_time
)
# Define colors for the fallback graph
fallback_colors = px.colors.qualitative.Plotly
# Simply plot the average APR data with moving average
if not avg_apr_data.empty:
# Sort by timestamp
avg_apr_data = avg_apr_data.sort_values('timestamp')
# Calculate both moving averages for the fallback graph
avg_apr_data_with_ma = avg_apr_data.copy()
avg_apr_data_with_ma['moving_avg'] = None # 2-hour window
avg_apr_data_with_ma['infinite_avg'] = None # Infinite window
# Define the time window (6 hours)
time_window = pd.Timedelta(hours=6)
# Calculate the moving averages for each timestamp
for i, row in avg_apr_data_with_ma.iterrows():
current_time = row['timestamp']
window_start = current_time - time_window
# Get all data points within the 2-hour time window
window_data = apr_data[
(apr_data['timestamp'] >= window_start) &
(apr_data['timestamp'] <= current_time)
]
# Get all data points up to the current timestamp (infinite window)
infinite_window_data = apr_data[
apr_data['timestamp'] <= current_time
]
# Calculate the average APR for the 2-hour time window
if not window_data.empty:
avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
else:
# If no data points in the window, use the current value
avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
# Calculate the average APR for the infinite window
if not infinite_window_data.empty:
avg_apr_data_with_ma.at[i, 'infinite_avg'] = infinite_window_data['apr'].mean()
else:
avg_apr_data_with_ma.at[i, 'infinite_avg'] = row['apr']
# Add data points for each agent, but only make top agents visible by default
unique_agents = apr_data['agent_name'].unique()
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
# Calculate the total number of data points per agent
agent_counts = apr_data['agent_name'].value_counts()
# Determine how many agents to show individually (limit to top 5 most active)
MAX_VISIBLE_AGENTS = 5
top_agents = agent_counts.nlargest(min(MAX_VISIBLE_AGENTS, len(agent_counts))).index.tolist()
for agent_name in unique_agents:
agent_data = apr_data[apr_data['agent_name'] == agent_name]
# Determine if this agent should be visible by default
is_visible = agent_name in top_agents
# Add data points as markers
simple_fig.add_trace(
go.Scatter(
x=agent_data['timestamp'],
y=agent_data['apr'],
mode='markers',
name=f'Agent: {agent_name}',
marker=dict(
size=10,
color=color_map[agent_name]
),
hovertemplate='Time: %{x}<br>APR: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
visible=is_visible # Only top agents visible by default
)
)
# Add 2-hour moving average as a line
simple_fig.add_trace(
go.Scatter(
x=avg_apr_data_with_ma['timestamp'],
y=avg_apr_data_with_ma['moving_avg'],
mode='lines',
name='Average APR (6h window)',
line=dict(width=2, color='red') # Thinner line
)
)
# Add infinite window moving average as another line
simple_fig.add_trace(
go.Scatter(
x=avg_apr_data_with_ma['timestamp'],
y=avg_apr_data_with_ma['infinite_avg'],
mode='lines',
name='Cumulative Average APR (all data)',
line=dict(width=4, color='green') # Thicker solid line
)
)
# Simplified layout with fixed y-axis range (-10 to 10) and increased size
simple_fig.update_layout(
title=dict(
text="Optimus Agents",
font=dict(
family="Arial, sans-serif",
size=22,
color="black",
weight="bold"
)
),
xaxis_title=None, # Remove x-axis title to use annotation instead
yaxis_title=None, # Remove the y-axis title as we'll use annotations instead
yaxis=dict(
# Fixed range from -10 to 10
range=[-10, 10], # Set fixed range from -10 to 10
tickformat=".2f", # Format tick labels with 2 decimal places
tickfont=dict(size=12) # Larger font for tick labels
),
height=600, # Reduced height for better fit
# Removed fixed width to enable responsiveness
autosize=True, # Enable auto-sizing for responsiveness
template="plotly_white", # Use a cleaner template
margin=dict(r=30, l=120, t=40, b=50) # Increased bottom margin for x-axis title
)
# Add annotations for y-axis regions in the fallback graph
simple_fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=-25, # Middle of the negative region
xref="paper",
yref="y",
text="Percent drawdown [%]",
showarrow=False,
font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
simple_fig.add_annotation(
x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
y=50, # Middle of the positive region
xref="paper",
yref="y",
text="Agent APR [%]",
showarrow=False,
font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
textangle=-90, # Rotate text to be vertical
align="center"
)
# Remove x-axis title annotation
# simple_fig.add_annotation(
# x=0.5, # Center of the x-axis
# y=-0.15, # Below the x-axis
# xref="paper",
# yref="paper",
# text="Date",
# showarrow=False,
# font=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
# align="center"
# )
# Update legend font for fallback graph
simple_fig.update_layout(
legend=dict(
font=dict(
family="Arial, sans-serif",
size=14, # Adjusted font size
color="black",
weight="bold"
)
)
)
# Apply autoscaling to the x-axis for the fallback graph
simple_fig.update_xaxes(
autorange=True, # Enable autoscaling
tickformat="%b %d", # Simplified date format without time
tickangle=-30,
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
title=None # Remove built-in title to use annotation instead
)
# Update y-axis tick font for fallback graph
simple_fig.update_yaxes(
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold") # Adjusted font size
)
# Add a note about hidden agents if there are more than MAX_VISIBLE_AGENTS
if len(unique_agents) > MAX_VISIBLE_AGENTS:
simple_fig.add_annotation(
text=f"Note: Only showing top {MAX_VISIBLE_AGENTS} agents by default. Toggle others in legend.",
xref="paper", yref="paper",
x=0.5, y=1.05,
showarrow=False,
font=dict(size=12, color="gray"),
align="center"
)
# Return the simple figure
return simple_fig
def save_to_csv(df):
"""Save the APR data DataFrame to a CSV file and return the file path"""
if df.empty:
logger.error("No APR data to save to CSV")
return None
# Define the CSV file path
csv_file = "optimus_apr_values.csv"
# Save to CSV
df.to_csv(csv_file, index=False)
logger.info(f"APR data saved to {csv_file}")
# Also generate a statistics CSV file
stats_df = generate_statistics_from_data(df)
stats_csv = "optimus_apr_statistics.csv"
stats_df.to_csv(stats_csv, index=False)
logger.info(f"Statistics saved to {stats_csv}")
# Log detailed statistics about adjusted APR
if 'adjusted_apr' in df.columns and df['adjusted_apr'].notna().any():
adjusted_stats = stats_df[stats_df['avg_adjusted_apr'].notna()]
logger.info(f"Agents with adjusted APR data: {len(adjusted_stats)} out of {len(stats_df)}")
for _, row in adjusted_stats.iterrows():
if row['agent_id'] != 'ALL': # Skip the overall stats row
logger.info(f"Agent {row['agent_name']} adjusted APR stats: avg={row['avg_adjusted_apr']:.2f}, min={row['min_adjusted_apr']:.2f}, max={row['max_adjusted_apr']:.2f}")
# Log overall adjusted APR stats
overall_row = stats_df[stats_df['agent_id'] == 'ALL']
if not overall_row.empty and pd.notna(overall_row['avg_adjusted_apr'].iloc[0]):
logger.info(f"Overall adjusted APR stats: avg={overall_row['avg_adjusted_apr'].iloc[0]:.2f}, min={overall_row['min_adjusted_apr'].iloc[0]:.2f}, max={overall_row['max_adjusted_apr'].iloc[0]:.2f}")
return csv_file
def generate_statistics_from_data(df):
"""Generate statistics from the APR data"""
if df.empty:
return pd.DataFrame()
# Get unique agents
unique_agents = df['agent_id'].unique()
stats_list = []
# Generate per-agent statistics
for agent_id in unique_agents:
agent_data = df[df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0]
# APR statistics
apr_data = agent_data[agent_data['metric_type'] == 'APR']
real_apr = apr_data[apr_data['is_dummy'] == False]
# Performance statistics
perf_data = agent_data[agent_data['metric_type'] == 'Performance']
real_perf = perf_data[perf_data['is_dummy'] == False]
# Check if adjusted_apr exists and has non-null values
has_adjusted_apr = 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any()
stats = {
'agent_id': agent_id,
'agent_name': agent_name,
'total_points': len(agent_data),
'apr_points': len(apr_data),
'performance_points': len(perf_data),
'real_apr_points': len(real_apr),
'real_performance_points': len(real_perf),
'avg_apr': apr_data['apr'].mean() if not apr_data.empty else None,
'avg_performance': perf_data['apr'].mean() if not perf_data.empty else None,
'max_apr': apr_data['apr'].max() if not apr_data.empty else None,
'min_apr': apr_data['apr'].min() if not apr_data.empty else None,
'avg_adjusted_apr': apr_data['adjusted_apr'].mean() if has_adjusted_apr else None,
'max_adjusted_apr': apr_data['adjusted_apr'].max() if has_adjusted_apr else None,
'min_adjusted_apr': apr_data['adjusted_apr'].min() if has_adjusted_apr else None,
'latest_timestamp': agent_data['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not agent_data.empty else None
}
stats_list.append(stats)
# Generate overall statistics
apr_only = df[df['metric_type'] == 'APR']
perf_only = df[df['metric_type'] == 'Performance']
# Check if adjusted_apr exists and has non-null values for overall stats
has_adjusted_apr_overall = 'adjusted_apr' in apr_only.columns and apr_only['adjusted_apr'].notna().any()
overall_stats = {
'agent_id': 'ALL',
'agent_name': 'All Agents',
'total_points': len(df),
'apr_points': len(apr_only),
'performance_points': len(perf_only),
'real_apr_points': len(apr_only[apr_only['is_dummy'] == False]),
'real_performance_points': len(perf_only[perf_only['is_dummy'] == False]),
'avg_apr': apr_only['apr'].mean() if not apr_only.empty else None,
'avg_performance': perf_only['apr'].mean() if not perf_only.empty else None,
'max_apr': apr_only['apr'].max() if not apr_only.empty else None,
'min_apr': apr_only['apr'].min() if not apr_only.empty else None,
'avg_adjusted_apr': apr_only['adjusted_apr'].mean() if has_adjusted_apr_overall else None,
'max_adjusted_apr': apr_only['adjusted_apr'].max() if has_adjusted_apr_overall else None,
'min_adjusted_apr': apr_only['adjusted_apr'].min() if has_adjusted_apr_overall else None,
'latest_timestamp': df['timestamp'].max().strftime('%Y-%m-%d %H:%M:%S') if not df.empty else None
}
stats_list.append(overall_stats)
return pd.DataFrame(stats_list)
# Create dummy functions for the commented out imports
def create_transcation_visualizations():
"""Dummy implementation that returns a placeholder graph"""
fig = go.Figure()
fig.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig
def create_active_agents_visualizations():
"""Dummy implementation that returns a placeholder graph"""
fig = go.Figure()
fig.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig
# Dummy blockchain functions to replace the commented ones
def get_transfers(integrator: str, wallet: str) -> str:
"""Dummy function that returns an empty result"""
return {"transfers": []}
def fetch_and_aggregate_transactions():
"""Dummy function that returns empty data"""
return [], {}
# Function to parse the transaction data and prepare it for visualization
def process_transactions_and_agents(data):
"""Dummy function that returns empty dataframes"""
df_transactions = pd.DataFrame()
df_agents = pd.DataFrame(columns=['date', 'agent_count'])
df_agents_weekly = pd.DataFrame()
return df_transactions, df_agents, df_agents_weekly
# Function to create visualizations based on the metrics
def create_visualizations():
# Placeholder figures for testing
fig_swaps_chain = go.Figure()
fig_swaps_chain.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_bridges_chain = go.Figure()
fig_bridges_chain.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_agents_registered = go.Figure()
fig_agents_registered.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
fig_tvl = go.Figure()
fig_tvl.add_annotation(
text="Blockchain data loading disabled - placeholder visualization",
x=0.5, y=0.5, xref="paper", yref="paper",
showarrow=False, font=dict(size=20)
)
return fig_swaps_chain, fig_bridges_chain, fig_agents_registered, fig_tvl
# Modify dashboard function to make the plot container responsive
def dashboard():
with gr.Blocks() as demo:
gr.Markdown("# Average Optimus Agent Performance")
# Create tabs for APR and ROI metrics
with gr.Tabs():
# APR Metrics tab
with gr.Tab("APR Metrics"):
with gr.Column():
refresh_apr_btn = gr.Button("Refresh APR Data")
# Create container for plotly figure with responsive sizing
with gr.Column():
combined_apr_graph = gr.Plot(label="APR for All Agents", elem_id="responsive_apr_plot")
# Create compact toggle controls at the bottom of the graph
with gr.Row(visible=True):
gr.Markdown("##### Toggle Graph Lines", elem_id="apr_toggle_title")
with gr.Row():
with gr.Column():
with gr.Row(elem_id="apr_toggle_container"):
with gr.Column(scale=1, min_width=150):
apr_toggle = gr.Checkbox(label="APR Average", value=True, elem_id="apr_toggle")
with gr.Column(scale=1, min_width=150):
adjusted_apr_toggle = gr.Checkbox(label="ETH Adjusted APR Average", value=True, elem_id="adjusted_apr_toggle")
# Add a text area for status messages
apr_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
# ROI Metrics tab
with gr.Tab("ROI Metrics"):
with gr.Column():
refresh_roi_btn = gr.Button("Refresh ROI Data")
# Create container for plotly figure with responsive sizing
with gr.Column():
combined_roi_graph = gr.Plot(label="ROI for All Agents", elem_id="responsive_roi_plot")
# Create compact toggle controls at the bottom of the graph
with gr.Row(visible=True):
gr.Markdown("##### Toggle Graph Lines", elem_id="roi_toggle_title")
with gr.Row():
with gr.Column():
with gr.Row(elem_id="roi_toggle_container"):
with gr.Column(scale=1, min_width=150):
roi_toggle = gr.Checkbox(label="ROI Average", value=True, elem_id="roi_toggle")
# Add a text area for status messages
roi_status_text = gr.Textbox(label="Status", value="Ready", interactive=False)
# Add custom CSS for making the plots responsive
gr.HTML("""
<style>
/* Make plots responsive */
#responsive_apr_plot, #responsive_roi_plot {
width: 100% !important;
max-width: 100% !important;
}
#responsive_apr_plot > div, #responsive_roi_plot > div {
width: 100% !important;
height: auto !important;
min-height: 500px !important;
}
/* Toggle checkbox styling */
#apr_toggle .gr-checkbox {
accent-color: #e74c3c !important;
}
#adjusted_apr_toggle .gr-checkbox {
accent-color: #2ecc71 !important;
}
#roi_toggle .gr-checkbox {
accent-color: #3498db !important;
}
/* Make the toggle section more compact */
#apr_toggle_title, #roi_toggle_title {
margin-bottom: 0;
margin-top: 10px;
}
#apr_toggle_container, #roi_toggle_container {
margin-top: 5px;
}
/* Style the checkbox labels */
.gr-form.gr-box {
border: none !important;
background: transparent !important;
}
/* Make checkboxes and labels appear on the same line */
.gr-checkbox-container {
display: flex !important;
align-items: center !important;
}
/* Add colored indicators */
#apr_toggle .gr-checkbox-label::before {
content: "●";
color: #e74c3c;
margin-right: 5px;
}
#adjusted_apr_toggle .gr-checkbox-label::before {
content: "●";
color: #2ecc71;
margin-right: 5px;
}
#roi_toggle .gr-checkbox-label::before {
content: "●";
color: #3498db;
margin-right: 5px;
}
</style>
""")
# Function to update the APR graph
def update_apr_graph(show_apr_ma=True, show_adjusted_apr_ma=True):
# Generate visualization and get figure object directly
try:
combined_fig, _ = generate_apr_visualizations()
# Update visibility of traces based on toggle values
for i, trace in enumerate(combined_fig.data):
# Check if this is a moving average trace
if trace.name == 'Median APR (7d window)':
trace.visible = show_apr_ma
elif trace.name == 'Average ETH Adjusted APR (3d window)':
trace.visible = show_adjusted_apr_ma
return combined_fig
except Exception as e:
logger.exception("Error generating APR visualization")
# Create error figure
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return error_fig
# Function to update the ROI graph
def update_roi_graph(show_roi_ma=True):
# Generate visualization and get figure object directly
try:
combined_fig, _ = generate_roi_visualizations()
# Update visibility of traces based on toggle values
for i, trace in enumerate(combined_fig.data):
# Check if this is a moving average trace
if trace.name == 'Median ROI (7d window)':
trace.visible = show_roi_ma
return combined_fig
except Exception as e:
logger.exception("Error generating ROI visualization")
# Create error figure
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error: {str(e)}",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15, color="red")
)
return error_fig
# Initialize the APR graph on load with a placeholder
apr_placeholder_fig = go.Figure()
apr_placeholder_fig.add_annotation(
text="Click 'Refresh APR Data' to load APR graph",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15)
)
combined_apr_graph.value = apr_placeholder_fig
# Initialize the ROI graph on load with a placeholder
roi_placeholder_fig = go.Figure()
roi_placeholder_fig.add_annotation(
text="Click 'Refresh ROI Data' to load ROI graph",
x=0.5, y=0.5,
showarrow=False,
font=dict(size=15)
)
combined_roi_graph.value = roi_placeholder_fig
# Function to update the APR graph based on toggle states
def update_apr_graph_with_toggles(apr_visible, adjusted_apr_visible):
return update_apr_graph(apr_visible, adjusted_apr_visible)
# Function to update the ROI graph based on toggle states
def update_roi_graph_with_toggles(roi_visible):
return update_roi_graph(roi_visible)
# Function to refresh APR data
def refresh_apr_data():
"""Refresh APR data from the database and update the visualization"""
try:
# Fetch new APR data
logger.info("Manually refreshing APR data...")
fetch_apr_data_from_db()
# Verify data was fetched successfully
if global_df is None or len(global_df) == 0:
logger.error("Failed to fetch APR data")
return combined_apr_graph.value, "Error: Failed to fetch APR data. Check the logs for details."
# Log info about fetched data with focus on adjusted_apr
may_10_2025 = datetime(2025, 5, 10)
if 'timestamp' in global_df and 'adjusted_apr' in global_df:
after_may_10 = global_df[global_df['timestamp'] >= may_10_2025]
with_adjusted_after_may_10 = after_may_10[after_may_10['adjusted_apr'].notna()]
logger.info(f"Data points after May 10th, 2025: {len(after_may_10)}")
logger.info(f"Data points with adjusted_apr after May 10th, 2025: {len(with_adjusted_after_may_10)}")
# Generate new visualization
logger.info("Generating new APR visualization...")
new_graph = update_apr_graph(apr_toggle.value, adjusted_apr_toggle.value)
return new_graph, "APR data refreshed successfully"
except Exception as e:
logger.error(f"Error refreshing APR data: {e}")
return combined_apr_graph.value, f"Error: {str(e)}"
# Function to refresh ROI data
def refresh_roi_data():
"""Refresh ROI data from the database and update the visualization"""
try:
# Fetch new ROI data
logger.info("Manually refreshing ROI data...")
fetch_apr_data_from_db() # This also fetches ROI data
# Verify data was fetched successfully
if global_roi_df is None or len(global_roi_df) == 0:
logger.error("Failed to fetch ROI data")
return combined_roi_graph.value, "Error: Failed to fetch ROI data. Check the logs for details."
# Generate new visualization
logger.info("Generating new ROI visualization...")
new_graph = update_roi_graph(roi_toggle.value)
return new_graph, "ROI data refreshed successfully"
except Exception as e:
logger.error(f"Error refreshing ROI data: {e}")
return combined_roi_graph.value, f"Error: {str(e)}"
# Set up the button click event for APR refresh
refresh_apr_btn.click(
fn=refresh_apr_data,
inputs=[],
outputs=[combined_apr_graph, apr_status_text]
)
# Set up the button click event for ROI refresh
refresh_roi_btn.click(
fn=refresh_roi_data,
inputs=[],
outputs=[combined_roi_graph, roi_status_text]
)
# Set up the toggle switch events for APR
apr_toggle.change(
fn=update_apr_graph_with_toggles,
inputs=[apr_toggle, adjusted_apr_toggle],
outputs=[combined_apr_graph]
)
adjusted_apr_toggle.change(
fn=update_apr_graph_with_toggles,
inputs=[apr_toggle, adjusted_apr_toggle],
outputs=[combined_apr_graph]
)
# Set up the toggle switch events for ROI
roi_toggle.change(
fn=update_roi_graph_with_toggles,
inputs=[roi_toggle],
outputs=[combined_roi_graph]
)
return demo
# Launch the dashboard
if __name__ == "__main__":
dashboard().launch()
def generate_adjusted_apr_report():
"""
Generate a detailed report about adjusted_apr data availability and save it to a file.
Returns the path to the generated report file.
"""
global global_df
if global_df is None or global_df.empty or 'adjusted_apr' not in global_df.columns:
logger.warning("No adjusted_apr data available for report generation")
return None
# Create a report file
report_path = "adjusted_apr_report.txt"
with open(report_path, "w") as f:
f.write("======== ADJUSTED APR DATA AVAILABILITY REPORT ========\n\n")
# Summary statistics
total_records = len(global_df)
records_with_adjusted = global_df['adjusted_apr'].notna().sum()
pct_with_adjusted = (records_with_adjusted / total_records) * 100 if total_records > 0 else 0
f.write(f"Total APR records: {total_records}\n")
f.write(f"Records with adjusted_apr: {records_with_adjusted} ({pct_with_adjusted:.2f}%)\n\n")
# First and last data points
if records_with_adjusted > 0:
has_adjusted = global_df[global_df['adjusted_apr'].notna()]
first_date = has_adjusted['timestamp'].min()
last_date = has_adjusted['timestamp'].max()
f.write(f"First adjusted_apr record: {first_date}\n")
f.write(f"Last adjusted_apr record: {last_date}\n")
f.write(f"Date range: {(last_date - first_date).days} days\n\n")
# Agent statistics
f.write("===== AGENT STATISTICS =====\n\n")
# Group by agent
agent_stats = []
for agent_id in global_df['agent_id'].unique():
agent_data = global_df[global_df['agent_id'] == agent_id]
agent_name = agent_data['agent_name'].iloc[0] if not agent_data.empty else f"Agent {agent_id}"
total_agent_records = len(agent_data)
agent_with_adjusted = agent_data['adjusted_apr'].notna().sum()
coverage_pct = (agent_with_adjusted / total_agent_records) * 100 if total_agent_records > 0 else 0
agent_stats.append({
'agent_id': agent_id,
'agent_name': agent_name,
'total_records': total_agent_records,
'with_adjusted': agent_with_adjusted,
'coverage_pct': coverage_pct
})
# Sort by coverage percentage (descending)
agent_stats.sort(key=lambda x: x['coverage_pct'], reverse=True)
# Write agent statistics
for agent in agent_stats:
f.write(f"Agent: {agent['agent_name']} (ID: {agent['agent_id']})\n")
f.write(f" Records: {agent['total_records']}\n")
f.write(f" With adjusted_apr: {agent['with_adjusted']} ({agent['coverage_pct']:.2f}%)\n")
# If agent has adjusted data, show date range
agent_data = global_df[global_df['agent_id'] == agent['agent_id']]
agent_adjusted = agent_data[agent_data['adjusted_apr'].notna()]
if not agent_adjusted.empty:
first = agent_adjusted['timestamp'].min()
last = agent_adjusted['timestamp'].max()
f.write(f" First adjusted_apr: {first}\n")
f.write(f" Last adjusted_apr: {last}\n")
f.write("\n")
# Check for May 10th cutoff issue
f.write("===== MAY 10TH CUTOFF ANALYSIS =====\n\n")
may_10_2025 = datetime(2025, 5, 10)
before_cutoff = global_df[global_df['timestamp'] < may_10_2025]
after_cutoff = global_df[global_df['timestamp'] >= may_10_2025]
# Calculate coverage before and after
before_total = len(before_cutoff)
before_with_adjusted = before_cutoff['adjusted_apr'].notna().sum()
before_pct = (before_with_adjusted / before_total) * 100 if before_total > 0 else 0
after_total = len(after_cutoff)
after_with_adjusted = after_cutoff['adjusted_apr'].notna().sum()
after_pct = (after_with_adjusted / after_total) * 100 if after_total > 0 else 0
f.write(f"Before May 10th, 2025:\n")
f.write(f" Records: {before_total}\n")
f.write(f" With adjusted_apr: {before_with_adjusted} ({before_pct:.2f}%)\n\n")
f.write(f"After May 10th, 2025:\n")
f.write(f" Records: {after_total}\n")
f.write(f" With adjusted_apr: {after_with_adjusted} ({after_pct:.2f}%)\n\n")
# Check for agents that had data before but not after
if before_total > 0 and after_total > 0:
agents_before = set(before_cutoff[before_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
agents_after = set(after_cutoff[after_cutoff['adjusted_apr'].notna()]['agent_id'].unique())
missing_after = agents_before - agents_after
new_after = agents_after - agents_before
if missing_after:
f.write(f"Agents with adjusted_apr before May 10th but not after: {list(missing_after)}\n")
# For each missing agent, show the last date with adjusted_apr
for agent_id in missing_after:
agent_data = before_cutoff[(before_cutoff['agent_id'] == agent_id) &
(before_cutoff['adjusted_apr'].notna())]
if not agent_data.empty:
last_date = agent_data['timestamp'].max()
agent_name = agent_data['agent_name'].iloc[0]
f.write(f" {agent_name} (ID: {agent_id}): Last adjusted_apr on {last_date}\n")
if new_after:
f.write(f"\nAgents with adjusted_apr after May 10th but not before: {list(new_after)}\n")
logger.info(f"Adjusted APR report generated: {report_path}")
return report_path