gauravlochab commited on
Commit
75be6c1
·
1 Parent(s): 6c05858

chore: fix counts for daily active agents

Browse files
Files changed (1) hide show
  1. app.py +162 -239
app.py CHANGED
@@ -33,12 +33,20 @@ logging.basicConfig(
33
  level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity
34
  format="%(asctime)s - %(levelname)s - %(message)s",
35
  handlers=[
36
- logging.FileHandler("app_debug.log"), # Log to file for persistence
37
  logging.StreamHandler() # Also log to console
38
- ]
 
39
  )
40
  logger = logging.getLogger(__name__)
41
 
 
 
 
 
 
 
 
42
  # Reduce third-party library logging
43
  logging.getLogger("urllib3").setLevel(logging.WARNING)
44
  logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -759,11 +767,11 @@ def log_adjusted_apr_availability(df):
759
  logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
760
 
761
  def generate_apr_visualizations():
762
- """Generate APR visualizations with CSV-first approach for Hugging Face Space deployment"""
763
  global global_df
764
 
765
- # CSV-FIRST APPROACH: Try to load from CSV first
766
- logger.info("Attempting to load APR data from CSV files...")
767
  df, csv_file = load_apr_data_from_csv()
768
 
769
  if not df.empty:
@@ -775,136 +783,70 @@ def generate_apr_visualizations():
775
  combined_fig = create_combined_time_series_graph(df)
776
  return combined_fig, csv_file
777
 
778
- # FALLBACK: If CSV not available, try API
779
- logger.info("CSV data not available, falling back to API...")
780
- try:
781
- df, _ = fetch_apr_data_from_db()
782
-
783
- # If we got no data at all, return placeholder figures
784
- if df.empty:
785
- logger.info("No APR data available from API either. Using fallback visualization.")
786
- # Create empty visualizations with a message using Plotly
787
- fig = go.Figure()
788
- fig.add_annotation(
789
- x=0.5, y=0.5,
790
- text="No APR data available",
791
- font=dict(size=20),
792
- showarrow=False
793
- )
794
- fig.update_layout(
795
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
796
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
797
- )
798
-
799
- # Save as static file for reference
800
- fig.write_html("optimus_apr_combined_graph.html")
801
- fig.write_image("optimus_apr_combined_graph.png")
802
-
803
- csv_file = None
804
- return fig, csv_file
805
-
806
- # Apply preprocessing to fix APR and ROI values
807
- logger.info("Applying preprocessing to fix APR and ROI values...")
808
- df = fix_apr_and_roi(df) # Apply preprocessing
809
- global_df = df
810
-
811
- # IMPORTANT: Also fix the ROI DataFrame with corrected values
812
- logger.info("Extracting corrected ROI values from fixed APR data...")
813
- if not df.empty and 'roi' in df.columns:
814
- # Create corrected ROI DataFrame from the fixed APR data
815
- corrected_roi_data = []
816
- for idx, row in df.iterrows():
817
- if not row['is_dummy'] and pd.notna(row['roi']):
818
- roi_entry = {
819
- "roi": row["roi"], # This is now the corrected ROI value
820
- "timestamp": row["timestamp"],
821
- "agent_id": row["agent_id"],
822
- "agent_name": row["agent_name"],
823
- "is_dummy": False,
824
- "metric_type": "ROI"
825
- }
826
- corrected_roi_data.append(roi_entry)
827
-
828
- # Replace the original ROI DataFrame with corrected values
829
- if corrected_roi_data:
830
- corrected_roi_df = pd.DataFrame(corrected_roi_data)
831
-
832
- # Combine with dummy ROI data if it exists
833
- if global_roi_df is not None and not global_roi_df.empty:
834
- dummy_roi_data = global_roi_df[global_roi_df['is_dummy'] == True]
835
- if not dummy_roi_data.empty:
836
- global_roi_df = pd.concat([corrected_roi_df, dummy_roi_data], ignore_index=True)
837
- else:
838
- global_roi_df = corrected_roi_df
839
- else:
840
- global_roi_df = corrected_roi_df
841
-
842
- logger.info(f"Updated ROI DataFrame with {len(corrected_roi_data)} corrected ROI values")
843
- else:
844
- logger.warning("No corrected ROI values found to update ROI DataFrame")
845
-
846
- # Save preprocessed data to CSV before creating visualizations
847
- logger.info("Saving preprocessed APR data to CSV...")
848
- csv_file = save_to_csv(df)
849
-
850
- # Create visualizations using the saved CSV data
851
- logger.info("Creating APR visualizations from preprocessed data...")
852
- combined_fig = create_combined_time_series_graph(df)
853
-
854
- return combined_fig, csv_file
855
-
856
- except Exception as e:
857
- logger.error(f"Error fetching APR data from API: {e}")
858
- # Return error visualization
859
- fig = go.Figure()
860
- fig.add_annotation(
861
- x=0.5, y=0.5,
862
- text=f"Error loading data: {str(e)}",
863
- font=dict(size=16, color="red"),
864
- showarrow=False
865
- )
866
- fig.update_layout(
867
- xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
868
- yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
869
- )
870
- return fig, None
871
 
872
  def generate_roi_visualizations():
873
- """Generate ROI visualizations with CSV-first approach for Hugging Face Space deployment"""
874
  global global_roi_df
875
 
876
- # FIXED APPROACH: Use corrected ROI data from APR CSV instead of problematic ROI CSV
877
- logger.info("Loading corrected ROI data from APR CSV files...")
878
  df_apr, csv_file = load_apr_data_from_csv()
879
 
880
  if not df_apr.empty and 'roi' in df_apr.columns:
881
- # Extract ROI data from the APR CSV (which contains corrected values)
882
- roi_data = []
883
- for idx, row in df_apr.iterrows():
884
- if pd.notna(row['roi']):
885
- roi_entry = {
886
- "roi": row["roi"], # Use corrected ROI from APR data
887
- "timestamp": row["timestamp"],
888
- "agent_id": row["agent_id"],
889
- "agent_name": row["agent_name"],
890
- "is_dummy": row["is_dummy"],
891
- "metric_type": "ROI",
892
- "apr": row.get("apr"),
893
- "adjusted_apr": row.get("adjusted_apr")
894
- }
895
- roi_data.append(roi_entry)
896
-
897
- if roi_data:
898
- df_roi = pd.DataFrame(roi_data)
899
- logger.info(f"Successfully extracted {len(df_roi)} corrected ROI records from APR CSV")
 
 
 
 
 
 
 
 
 
 
 
900
  global_roi_df = df_roi
901
 
902
- # Create visualizations using corrected ROI data
903
- logger.info("Creating ROI visualizations from corrected APR CSV data...")
904
  combined_fig = create_combined_roi_time_series_graph(df_roi)
905
- return combined_fig, "optimus_apr_values.csv" # Source is APR CSV
906
  else:
907
- logger.warning("No ROI data found in APR CSV")
908
  else:
909
  logger.warning("APR CSV not available or missing ROI column")
910
 
@@ -933,68 +875,16 @@ def generate_roi_visualizations():
933
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
934
  )
935
 
936
- # Save as static file for reference
937
- fig.write_html("optimus_roi_graph.html")
938
- fig.write_image("optimus_roi_graph.png")
939
-
940
- csv_file = None
941
- return fig, csv_file
942
 
943
  # Set global_roi_df for access by other functions
944
  global_roi_df = df_roi
945
 
946
- # IMPORTANT: Apply ROI corrections if we have fresh API data
947
- logger.info("Checking if ROI data needs correction...")
948
- if not df_roi.empty:
949
- # Check if this ROI data contains uncorrected values (from API)
950
- uncorrected_roi = df_roi[df_roi['is_dummy'] == False]
951
- if not uncorrected_roi.empty:
952
- logger.info("ROI data contains uncorrected values, applying corrections...")
953
-
954
- # We need to get the corrected APR data to extract corrected ROI values
955
- if global_df is not None and not global_df.empty:
956
- # Extract corrected ROI values from the fixed APR data
957
- corrected_roi_data = []
958
- for idx, row in global_df.iterrows():
959
- if not row['is_dummy'] and pd.notna(row['roi']):
960
- roi_entry = {
961
- "roi": row["roi"], # This is the corrected ROI value
962
- "timestamp": row["timestamp"],
963
- "agent_id": row["agent_id"],
964
- "agent_name": row["agent_name"],
965
- "is_dummy": False,
966
- "metric_type": "ROI"
967
- }
968
- corrected_roi_data.append(roi_entry)
969
-
970
- if corrected_roi_data:
971
- corrected_roi_df = pd.DataFrame(corrected_roi_data)
972
-
973
- # Combine with dummy ROI data if it exists
974
- dummy_roi_data = df_roi[df_roi['is_dummy'] == True]
975
- if not dummy_roi_data.empty:
976
- df_roi = pd.concat([corrected_roi_df, dummy_roi_data], ignore_index=True)
977
- else:
978
- df_roi = corrected_roi_df
979
-
980
- global_roi_df = df_roi
981
- logger.info(f"Updated ROI DataFrame with {len(corrected_roi_data)} corrected ROI values")
982
- else:
983
- logger.warning("No corrected ROI values found in APR data")
984
- else:
985
- logger.warning("No corrected APR data available to extract ROI values from")
986
- else:
987
- logger.info("ROI data contains only dummy values, no correction needed")
988
-
989
- # Save preprocessed ROI data to CSV before creating visualizations
990
- logger.info("Saving preprocessed ROI data to CSV...")
991
- csv_file = save_roi_to_csv(df_roi)
992
-
993
- # Create visualizations using the saved CSV data
994
- logger.info("Creating ROI visualizations from preprocessed data...")
995
  combined_fig = create_combined_roi_time_series_graph(df_roi)
996
 
997
- return combined_fig, csv_file
998
 
999
  except Exception as e:
1000
  logger.error(f"Error fetching ROI data from API: {e}")
@@ -1030,6 +920,13 @@ def aggregate_daily_data(df, metric_column):
1030
  df = df.copy()
1031
  df['date'] = df['timestamp'].dt.date
1032
 
 
 
 
 
 
 
 
1033
  # NEW: Add detailed logging to verify median calculation
1034
  logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===")
1035
 
@@ -1197,39 +1094,8 @@ def create_combined_roi_time_series_graph(df):
1197
  for agent_id, data in agent_runtimes.items():
1198
  logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
1199
 
1200
- # IMPORTANT: Clean and convert ROI data to ensure consistency
1201
- logger.info("Cleaning ROI data before conversion...")
1202
-
1203
- def clean_roi_value(value):
1204
- """Clean and convert ROI value to float"""
1205
- if pd.isna(value):
1206
- return None
1207
-
1208
- # If it's already a number, return it
1209
- if isinstance(value, (int, float)):
1210
- return float(value)
1211
-
1212
- # If it's a string, try to extract numeric value
1213
- if isinstance(value, str):
1214
- # Remove any non-numeric characters except decimal point and minus sign
1215
- import re
1216
- # Look for patterns like "value': 16.007665648354" and extract the number
1217
- match = re.search(r'[\d\.-]+', value)
1218
- if match:
1219
- try:
1220
- return float(match.group())
1221
- except ValueError:
1222
- logger.warning(f"Could not convert ROI value to float: {value}")
1223
- return None
1224
- else:
1225
- logger.warning(f"No numeric value found in ROI string: {value}")
1226
- return None
1227
-
1228
- logger.warning(f"Unexpected ROI value type: {type(value)} - {value}")
1229
- return None
1230
-
1231
- # Apply cleaning function to ROI column
1232
- df['roi'] = df['roi'].apply(clean_roi_value)
1233
 
1234
  # Remove rows with invalid ROI values
1235
  initial_count = len(df)
@@ -1240,10 +1106,9 @@ def create_combined_roi_time_series_graph(df):
1240
  if removed_count > 0:
1241
  logger.warning(f"Removed {removed_count} rows with invalid ROI values")
1242
 
1243
- # Ensure ROI is float after cleaning
1244
  df['roi'] = df['roi'].astype(float)
1245
- # ROI values are already in percentage format from initial_value_fixer.py
1246
- df['metric_type'] = df['metric_type'].astype(str) # Ensure metric_type is string
1247
 
1248
  # Get min and max time for shapes
1249
  min_time = df['timestamp'].min()
@@ -1406,6 +1271,12 @@ def create_combined_roi_time_series_graph(df):
1406
  # Calculate number of active agents on this date
1407
  active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
1408
 
 
 
 
 
 
 
1409
  hover_data_roi.append(
1410
  f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
1411
  )
@@ -1858,22 +1729,22 @@ def create_combined_time_series_graph(df):
1858
  min_time = df['timestamp'].min()
1859
  max_time = df['timestamp'].max()
1860
 
1861
- # Add shape for positive APR region (above zero)
1862
  fig.add_shape(
1863
  type="rect",
1864
  fillcolor="rgba(230, 243, 255, 0.3)",
1865
  line=dict(width=0),
1866
- y0=0, y1=apr_data['apr'].max() * 1.1 if not apr_data.empty else 10, # Dynamic positive value based on data
1867
  x0=min_time, x1=max_time,
1868
  layer="below"
1869
  )
1870
 
1871
- # Add shape for negative APR region (below zero)
1872
  fig.add_shape(
1873
  type="rect",
1874
  fillcolor="rgba(255, 230, 230, 0.3)",
1875
  line=dict(width=0),
1876
- y0=apr_data['apr'].min() * 1.1 if not apr_data.empty else -10, y1=0, # Dynamic negative value based on data
1877
  x0=min_time, x1=max_time,
1878
  layer="below"
1879
  )
@@ -1888,7 +1759,22 @@ def create_combined_time_series_graph(df):
1888
 
1889
  # MODIFIED: Calculate average APR values across all agents for each timestamp
1890
  # Filter for APR data only
 
 
 
 
 
 
 
 
 
 
 
 
1891
  apr_data = df[df['metric_type'] == 'APR'].copy()
 
 
 
1892
 
1893
  # Filter APR outliers (±200% range)
1894
  before_outlier_filter = len(apr_data)
@@ -2046,14 +1932,39 @@ def create_combined_time_series_graph(df):
2046
  y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
2047
 
2048
  # Create hover template for the APR moving average line
 
2049
  hover_data_apr = []
2050
  for idx, row in daily_medians_with_ma.iterrows():
2051
  timestamp = row['timestamp']
2052
  # Format timestamp to show only date for daily data
2053
  formatted_timestamp = timestamp.strftime('%Y-%m-%d')
2054
 
2055
- # Calculate number of active agents on this date
2056
- active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2057
 
2058
  hover_data_apr.append(
2059
  f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
@@ -2079,14 +1990,38 @@ def create_combined_time_series_graph(df):
2079
  y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
2080
 
2081
  # Create hover template for the adjusted APR moving average line
 
2082
  hover_data_adj = []
2083
  for idx, row in daily_medians_adjusted_with_ma.iterrows():
2084
  timestamp = row['timestamp']
2085
  # Format timestamp to show only date for daily data
2086
  formatted_timestamp = timestamp.strftime('%Y-%m-%d')
2087
 
2088
- # Calculate number of active agents on this date
2089
- active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2090
 
2091
  hover_data_adj.append(
2092
  f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
@@ -2141,25 +2076,13 @@ def create_combined_time_series_graph(df):
2141
  hovermode="closest"
2142
  )
2143
 
2144
- # Add annotations for y-axis regions
2145
- fig.add_annotation(
2146
- x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
2147
- y=-25, # Middle of the negative region
2148
- xref="paper",
2149
- yref="y",
2150
- text="Percent drawdown [%]",
2151
- showarrow=False,
2152
- font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
2153
- textangle=-90, # Rotate text to be vertical
2154
- align="center"
2155
- )
2156
-
2157
  fig.add_annotation(
2158
  x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
2159
- y=50, # Middle of the positive region
2160
  xref="paper",
2161
  yref="y",
2162
- text="Agent APR [%]",
2163
  showarrow=False,
2164
  font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
2165
  textangle=-90, # Rotate text to be vertical
 
33
  level=logging.INFO, # Use INFO level instead of DEBUG to reduce verbosity
34
  format="%(asctime)s - %(levelname)s - %(message)s",
35
  handlers=[
36
+ logging.FileHandler("app_debug.log", mode='a'), # Append mode for persistence
37
  logging.StreamHandler() # Also log to console
38
+ ],
39
+ force=True # Force reconfiguration of logging
40
  )
41
  logger = logging.getLogger(__name__)
42
 
43
+ # Ensure the logger level is set correctly
44
+ logger.setLevel(logging.INFO)
45
+
46
+ # Test logging to verify it's working
47
+ logger.info("=== LOGGING SYSTEM INITIALIZED ===")
48
+ logger.info("Debug logs will be written to app_debug.log")
49
+
50
  # Reduce third-party library logging
51
  logging.getLogger("urllib3").setLevel(logging.WARNING)
52
  logging.getLogger("httpx").setLevel(logging.WARNING)
 
767
  logger.info(f"Agent {agent_name} (ID: {agent_id}): Missing adjusted_apr from {gap_start} to {gap_end} ({gap_duration.days} days, {gap_duration.seconds//3600} hours)")
768
 
769
  def generate_apr_visualizations():
770
+ """Generate APR visualizations using CSV data only for consistency with ROI graph"""
771
  global global_df
772
 
773
+ # CONSISTENCY FIX: Always use CSV data to match ROI graph behavior
774
+ logger.info("Loading APR data from CSV files for consistency with ROI graph...")
775
  df, csv_file = load_apr_data_from_csv()
776
 
777
  if not df.empty:
 
783
  combined_fig = create_combined_time_series_graph(df)
784
  return combined_fig, csv_file
785
 
786
+ # FALLBACK: If CSV not available, return error message
787
+ logger.error("CSV data not available and API fallback disabled for consistency")
788
+ # Create empty visualization with a message using Plotly
789
+ fig = go.Figure()
790
+ fig.add_annotation(
791
+ x=0.5, y=0.5,
792
+ text="No APR data available - CSV file missing",
793
+ font=dict(size=20),
794
+ showarrow=False
795
+ )
796
+ fig.update_layout(
797
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
798
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
799
+ )
800
+
801
+ return fig, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
802
 
803
  def generate_roi_visualizations():
804
+ """Generate ROI visualizations directly from optimus_apr_values.csv"""
805
  global global_roi_df
806
 
807
+ # SIMPLIFIED APPROACH: Load ROI data directly from APR CSV
808
+ logger.info("Loading ROI data directly from optimus_apr_values.csv...")
809
  df_apr, csv_file = load_apr_data_from_csv()
810
 
811
  if not df_apr.empty and 'roi' in df_apr.columns:
812
+ # CONSISTENCY FIX: Apply same filtering as APR graph
813
+ logger.info("=== ROI GRAPH DATA FILTERING DEBUG ===")
814
+ logger.info(f"Initial APR data loaded: {len(df_apr)} records")
815
+ logger.info(f"Unique agents in initial data: {df_apr['agent_id'].nunique()}")
816
+ logger.info(f"Agent IDs in initial data: {sorted(df_apr['agent_id'].unique().tolist())}")
817
+
818
+ # Check metric_type distribution
819
+ if 'metric_type' in df_apr.columns:
820
+ metric_counts = df_apr['metric_type'].value_counts()
821
+ logger.info(f"Metric type distribution: {metric_counts.to_dict()}")
822
+ else:
823
+ logger.warning("No 'metric_type' column found in APR data")
824
+
825
+ # First filter by metric_type == 'APR' to match APR graph logic
826
+ df_apr_filtered = df_apr[df_apr['metric_type'] == 'APR'].copy()
827
+ logger.info(f"After metric_type == 'APR' filter: {len(df_apr_filtered)} records")
828
+ logger.info(f"Unique agents after APR filter: {df_apr_filtered['agent_id'].nunique()}")
829
+ logger.info(f"Agent IDs after APR filter: {sorted(df_apr_filtered['agent_id'].unique().tolist())}")
830
+
831
+ # Then filter for rows with valid ROI values
832
+ df_roi = df_apr_filtered[df_apr_filtered['roi'].notna()].copy()
833
+ logger.info(f"After ROI filter: {len(df_roi)} records")
834
+ logger.info(f"Unique agents after ROI filter: {df_roi['agent_id'].nunique()}")
835
+ logger.info(f"Agent IDs after ROI filter: {sorted(df_roi['agent_id'].unique().tolist())}")
836
+
837
+ if not df_roi.empty:
838
+ # Add metric_type column for consistency
839
+ df_roi['metric_type'] = 'ROI'
840
+
841
+ logger.info(f"Successfully loaded {len(df_roi)} ROI records from APR CSV")
842
  global_roi_df = df_roi
843
 
844
+ # Create visualizations using ROI data from APR CSV
845
+ logger.info("Creating ROI visualizations from APR CSV data...")
846
  combined_fig = create_combined_roi_time_series_graph(df_roi)
847
+ return combined_fig, csv_file
848
  else:
849
+ logger.warning("No valid ROI data found in APR CSV")
850
  else:
851
  logger.warning("APR CSV not available or missing ROI column")
852
 
 
875
  yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
876
  )
877
 
878
+ return fig, None
 
 
 
 
 
879
 
880
  # Set global_roi_df for access by other functions
881
  global_roi_df = df_roi
882
 
883
+ # Create visualizations using API data
884
+ logger.info("Creating ROI visualizations from API data...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
885
  combined_fig = create_combined_roi_time_series_graph(df_roi)
886
 
887
+ return combined_fig, None
888
 
889
  except Exception as e:
890
  logger.error(f"Error fetching ROI data from API: {e}")
 
920
  df = df.copy()
921
  df['date'] = df['timestamp'].dt.date
922
 
923
+ # DEBUG: Log July 8th data specifically
924
+ july_8_data = df[df['date'] == pd.to_datetime('2025-07-08').date()]
925
+ if not july_8_data.empty:
926
+ july_8_agents = july_8_data['agent_id'].unique()
927
+ logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agents before aggregation: {len(july_8_agents)}")
928
+ logger.info(f"DAILY AGGREGATION DEBUG ({metric_column}) - July 8th agent IDs: {sorted(july_8_agents.tolist())}")
929
+
930
  # NEW: Add detailed logging to verify median calculation
931
  logger.info(f"=== MEDIAN CALCULATION DEBUG for {metric_column} ===")
932
 
 
1094
  for agent_id, data in agent_runtimes.items():
1095
  logger.info(f"Agent {data['agent_name']} (ID: {agent_id}): Runtime = {data['runtime_days']:.2f} days, Last report: {data['last_report']}")
1096
 
1097
+ # SIMPLIFIED: ROI data is already clean from CSV, just ensure proper data types
1098
+ logger.info("Processing ROI data from CSV...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1099
 
1100
  # Remove rows with invalid ROI values
1101
  initial_count = len(df)
 
1106
  if removed_count > 0:
1107
  logger.warning(f"Removed {removed_count} rows with invalid ROI values")
1108
 
1109
+ # Ensure proper data types
1110
  df['roi'] = df['roi'].astype(float)
1111
+ df['metric_type'] = df['metric_type'].astype(str)
 
1112
 
1113
  # Get min and max time for shapes
1114
  min_time = df['timestamp'].min()
 
1271
  # Calculate number of active agents on this date
1272
  active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
1273
 
1274
+ # DEBUG: Log agent counts for July 8th specifically
1275
+ if formatted_timestamp == '2025-07-08':
1276
+ agents_on_date = daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique()
1277
+ logger.info(f"ROI GRAPH - July 8th active agents: {active_agents}")
1278
+ logger.info(f"ROI GRAPH - July 8th agent IDs: {sorted(agents_on_date.tolist())}")
1279
+
1280
  hover_data_roi.append(
1281
  f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
1282
  )
 
1729
  min_time = df['timestamp'].min()
1730
  max_time = df['timestamp'].max()
1731
 
1732
+ # Add shape for positive APR region (above zero) - use reasonable fixed range
1733
  fig.add_shape(
1734
  type="rect",
1735
  fillcolor="rgba(230, 243, 255, 0.3)",
1736
  line=dict(width=0),
1737
+ y0=0, y1=200, # Fixed positive range to avoid extreme outliers affecting the view
1738
  x0=min_time, x1=max_time,
1739
  layer="below"
1740
  )
1741
 
1742
+ # Add shape for negative APR region (below zero) - use reasonable fixed range
1743
  fig.add_shape(
1744
  type="rect",
1745
  fillcolor="rgba(255, 230, 230, 0.3)",
1746
  line=dict(width=0),
1747
+ y0=-200, y1=0, # Fixed negative range to avoid extreme outliers affecting the view
1748
  x0=min_time, x1=max_time,
1749
  layer="below"
1750
  )
 
1759
 
1760
  # MODIFIED: Calculate average APR values across all agents for each timestamp
1761
  # Filter for APR data only
1762
+ logger.info("=== APR GRAPH DATA FILTERING DEBUG ===")
1763
+ logger.info(f"Initial APR data loaded: {len(df)} records")
1764
+ logger.info(f"Unique agents in initial data: {df['agent_id'].nunique()}")
1765
+ logger.info(f"Agent IDs in initial data: {sorted(df['agent_id'].unique().tolist())}")
1766
+
1767
+ # Check metric_type distribution
1768
+ if 'metric_type' in df.columns:
1769
+ metric_counts = df['metric_type'].value_counts()
1770
+ logger.info(f"Metric type distribution: {metric_counts.to_dict()}")
1771
+ else:
1772
+ logger.warning("No 'metric_type' column found in APR data")
1773
+
1774
  apr_data = df[df['metric_type'] == 'APR'].copy()
1775
+ logger.info(f"After metric_type == 'APR' filter: {len(apr_data)} records")
1776
+ logger.info(f"Unique agents after APR filter: {apr_data['agent_id'].nunique()}")
1777
+ logger.info(f"Agent IDs after APR filter: {sorted(apr_data['agent_id'].unique().tolist())}")
1778
 
1779
  # Filter APR outliers (±200% range)
1780
  before_outlier_filter = len(apr_data)
 
1932
  y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
1933
 
1934
  # Create hover template for the APR moving average line
1935
+ # CONSISTENCY FIX: Use ROI daily agent data for active agent counts
1936
  hover_data_apr = []
1937
  for idx, row in daily_medians_with_ma.iterrows():
1938
  timestamp = row['timestamp']
1939
  # Format timestamp to show only date for daily data
1940
  formatted_timestamp = timestamp.strftime('%Y-%m-%d')
1941
 
1942
+ # FIXED: Use ROI data to get consistent active agent counts
1943
+ # Load ROI data to get the correct agent counts
1944
+ try:
1945
+ df_roi_for_counts, _ = load_apr_data_from_csv()
1946
+ if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns:
1947
+ # Filter for ROI data and same date
1948
+ df_roi_filtered = df_roi_for_counts[
1949
+ (df_roi_for_counts['metric_type'] == 'APR') &
1950
+ (df_roi_for_counts['roi'].notna())
1951
+ ].copy()
1952
+
1953
+ # Aggregate daily for ROI data
1954
+ roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi')
1955
+
1956
+ # Get active agents from ROI data for this date
1957
+ active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
1958
+ else:
1959
+ # Fallback to APR data if ROI not available
1960
+ active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
1961
+ except:
1962
+ # Fallback to APR data if there's any error
1963
+ active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
1964
+
1965
+ # DEBUG: Log agent counts for July 8th specifically
1966
+ if formatted_timestamp == '2025-07-08':
1967
+ logger.info(f"APR GRAPH - July 8th active agents (using ROI logic): {active_agents}")
1968
 
1969
  hover_data_apr.append(
1970
  f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
 
1990
  y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
1991
 
1992
  # Create hover template for the adjusted APR moving average line
1993
+ # CONSISTENCY FIX: Use ROI daily agent data for active agent counts (same as regular APR)
1994
  hover_data_adj = []
1995
  for idx, row in daily_medians_adjusted_with_ma.iterrows():
1996
  timestamp = row['timestamp']
1997
  # Format timestamp to show only date for daily data
1998
  formatted_timestamp = timestamp.strftime('%Y-%m-%d')
1999
 
2000
+ # FIXED: Use ROI data to get consistent active agent counts (same logic as APR)
2001
+ try:
2002
+ df_roi_for_counts, _ = load_apr_data_from_csv()
2003
+ if not df_roi_for_counts.empty and 'roi' in df_roi_for_counts.columns:
2004
+ # Filter for ROI data and same date
2005
+ df_roi_filtered = df_roi_for_counts[
2006
+ (df_roi_for_counts['metric_type'] == 'APR') &
2007
+ (df_roi_for_counts['roi'].notna())
2008
+ ].copy()
2009
+
2010
+ # Aggregate daily for ROI data
2011
+ roi_daily_agent_data = aggregate_daily_data(df_roi_filtered, 'roi')
2012
+
2013
+ # Get active agents from ROI data for this date
2014
+ active_agents = len(roi_daily_agent_data[roi_daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
2015
+ else:
2016
+ # Fallback to adjusted APR data if ROI not available
2017
+ active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
2018
+ except:
2019
+ # Fallback to adjusted APR data if there's any error
2020
+ active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
2021
+
2022
+ # DEBUG: Log agent counts for July 8th specifically
2023
+ if formatted_timestamp == '2025-07-08':
2024
+ logger.info(f"ADJUSTED APR GRAPH - July 8th active agents (using ROI logic): {active_agents}")
2025
 
2026
  hover_data_adj.append(
2027
  f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
 
2076
  hovermode="closest"
2077
  )
2078
 
2079
+ # Add single annotation for y-axis with proper spacing
 
 
 
 
 
 
 
 
 
 
 
 
2080
  fig.add_annotation(
2081
  x=-0.08, # Position further from the y-axis to avoid overlapping with tick labels
2082
+ y=0, # Center of the y-axis
2083
  xref="paper",
2084
  yref="y",
2085
+ text="Percent drawdown (%) Agent APR (%)",
2086
  showarrow=False,
2087
  font=dict(size=16, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
2088
  textangle=-90, # Rotate text to be vertical