gauravlochab commited on
Commit
cf88990
·
1 Parent(s): 144269c

chore: add preprocessing to the dataframe

Browse files
Files changed (1) hide show
  1. app.py +204 -204
app.py CHANGED
@@ -838,8 +838,114 @@ def generate_roi_visualizations():
838
 
839
  return combined_fig, csv_file
840
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
841
  def create_combined_roi_time_series_graph(df):
842
- """Create a time series graph showing average ROI values across all agents"""
843
  if len(df) == 0:
844
  logger.error("No data to plot combined ROI graph")
845
  fig = go.Figure()
@@ -953,53 +1059,20 @@ def create_combined_roi_time_series_graph(df):
953
  # Use the filtered data for all subsequent operations
954
  df = df_filtered
955
 
956
- # Group by timestamp and calculate mean ROI
957
- avg_roi_data = df.groupby('timestamp')['roi'].mean().reset_index()
 
958
 
959
- # Sort by timestamp
960
- avg_roi_data = avg_roi_data.sort_values('timestamp')
961
-
962
- # Log the average ROI data
963
- logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points")
964
- for idx, row in avg_roi_data.iterrows():
965
- logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}")
966
-
967
- # Calculate moving average based on a time window (3 days)
968
- # Sort data by timestamp
969
- df_sorted = df.sort_values('timestamp')
970
 
971
- # Create a new dataframe for the moving average
972
- avg_roi_data_with_ma = avg_roi_data.copy()
973
- avg_roi_data_with_ma['moving_avg'] = None # Initialize the moving average column
974
 
975
- # Define the time window for the moving average (3 days)
976
- time_window = pd.Timedelta(days=3)
977
- logger.info(f"Calculating moving average with time window of {time_window}")
978
-
979
- # Calculate the moving averages for each timestamp
980
- for i, row in avg_roi_data_with_ma.iterrows():
981
- current_time = row['timestamp']
982
- window_start = current_time - time_window
983
-
984
- # Get all data points within the 3-day time window
985
- window_data = df_sorted[
986
- (df_sorted['timestamp'] >= window_start) &
987
- (df_sorted['timestamp'] <= current_time)
988
- ]
989
-
990
- # Calculate the average ROI for the 3-day time window
991
- if not window_data.empty:
992
- avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean()
993
- logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}")
994
- else:
995
- # If no data points in the window, use the current value
996
- avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi']
997
- logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}")
998
-
999
- logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points")
1000
 
1001
  # Find the last date where we have valid moving average data
1002
- last_valid_ma_date = avg_roi_data_with_ma[avg_roi_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_roi_data_with_ma['moving_avg'].dropna().empty else None
1003
 
1004
  # If we don't have any valid moving average data, use the max time from the original data
1005
  last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
@@ -1007,17 +1080,17 @@ def create_combined_roi_time_series_graph(df):
1007
  logger.info(f"Last valid moving average date: {last_valid_ma_date}")
1008
  logger.info(f"Using last valid date for graph: {last_valid_date}")
1009
 
1010
- # Plot individual agent data points with agent names in hover, but limit display for scalability
1011
- if not df.empty:
1012
  # Group by agent to use different colors for each agent
1013
- unique_agents = df['agent_name'].unique()
1014
  colors = px.colors.qualitative.Plotly[:len(unique_agents)]
1015
 
1016
  # Create a color map for agents
1017
  color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
1018
 
1019
  # Calculate the total number of data points per agent to determine which are most active
1020
- agent_counts = df['agent_name'].value_counts()
1021
 
1022
  # Determine how many agents to show individually (limit to top 5 most active)
1023
  MAX_VISIBLE_AGENTS = 5
@@ -1025,9 +1098,9 @@ def create_combined_roi_time_series_graph(df):
1025
 
1026
  logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
1027
 
1028
- # Add data points for each agent, but only make top agents visible by default
1029
  for agent_name in unique_agents:
1030
- agent_data = df[df['agent_name'] == agent_name]
1031
 
1032
  # Explicitly convert to Python lists
1033
  x_values = agent_data['timestamp'].tolist()
@@ -1048,31 +1121,29 @@ def create_combined_roi_time_series_graph(df):
1048
  size=10,
1049
  line=dict(width=1, color='black')
1050
  ),
1051
- name=f'Agent: {agent_name} (ROI)',
1052
- hovertemplate='Time: %{x}<br>ROI: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
1053
  visible=is_visible # All agents hidden by default
1054
  )
1055
  )
1056
- logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
1057
 
1058
- # Add ROI moving average as a smooth line
1059
- x_values_ma = avg_roi_data_with_ma['timestamp'].tolist()
1060
- y_values_ma = avg_roi_data_with_ma['moving_avg'].tolist()
1061
 
1062
  # Create hover template for the ROI moving average line
1063
  hover_data_roi = []
1064
- for idx, row in avg_roi_data_with_ma.iterrows():
1065
  timestamp = row['timestamp']
1066
- # Format timestamp to show only up to seconds (not milliseconds)
1067
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
1068
 
1069
- # Calculate number of active agents in the last 24 hours
1070
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
1071
- active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
1072
- (df['timestamp'] <= timestamp)]['agent_id'].unique())
1073
 
1074
  hover_data_roi.append(
1075
- f"Time: {formatted_timestamp}<br>Avg ROI (3d window): {row['moving_avg']:.2f}%<br>Active agents (24h): {active_agents}"
1076
  )
1077
 
1078
  fig.add_trace(
@@ -1080,14 +1151,14 @@ def create_combined_roi_time_series_graph(df):
1080
  x=x_values_ma,
1081
  y=y_values_ma,
1082
  mode='lines', # Only lines for moving average
1083
- line=dict(color='blue', width=2), # Thinner line
1084
- name='Average ROI (3d window)',
1085
  hovertext=hover_data_roi,
1086
  hoverinfo='text',
1087
  visible=True # Visible by default
1088
  )
1089
  )
1090
- logger.info(f"Added 3-day moving average ROI trace with {len(x_values_ma)} points")
1091
 
1092
  # Update layout with average runtime information in the title
1093
  fig.update_layout(
@@ -1148,13 +1219,13 @@ def create_combined_roi_time_series_graph(df):
1148
  )
1149
  )
1150
 
1151
- # Update y-axis with fixed range for ROI (-10 to 10)
1152
  fig.update_yaxes(
1153
  showgrid=True,
1154
  gridwidth=1,
1155
  gridcolor='rgba(0,0,0,0.1)',
1156
- # Use fixed range instead of autoscaling
1157
- range=[-10, 10], # Set fixed range from -10 to 10
1158
  tickformat=".2f", # Format tick labels with 2 decimal places
1159
  tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
1160
  title=None # Remove the built-in axis title since we're using annotations
@@ -1590,79 +1661,47 @@ def create_combined_time_series_graph(df):
1590
  # Use the filtered data for all subsequent operations
1591
  apr_data = apr_data_filtered
1592
 
1593
- # Group by timestamp and calculate mean APR
1594
- avg_apr_data = apr_data.groupby('timestamp')['apr'].mean().reset_index()
1595
-
1596
- # Sort by timestamp
1597
- avg_apr_data = avg_apr_data.sort_values('timestamp')
1598
-
1599
- # Log the average APR data
1600
- logger.info(f"Calculated average APR data with {len(avg_apr_data)} points")
1601
- for idx, row in avg_apr_data.iterrows():
1602
- logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}")
1603
-
1604
- # Calculate moving average based on a time window (2 hours)
1605
- # Sort data by timestamp
1606
- apr_data_sorted = apr_data.sort_values('timestamp')
1607
-
1608
- # Create a new dataframe for the moving average
1609
- avg_apr_data_with_ma = avg_apr_data.copy()
1610
- avg_apr_data_with_ma['moving_avg'] = None # Initialize the moving average column
1611
 
1612
- # Define the time window for the moving average (3 days)
1613
- time_window = pd.Timedelta(days=3)
1614
- logger.info(f"Calculating moving average with time window of {time_window}")
1615
 
1616
- # Calculate moving averages: one for APR and one for adjusted APR
1617
- avg_apr_data_with_ma['moving_avg'] = None # 3-day window for APR
1618
- avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
1619
 
1620
- # Keep track of the last valid adjusted_moving_avg value to handle gaps
1621
- last_valid_adjusted_moving_avg = None
 
1622
 
1623
- # Calculate the moving averages for each timestamp
1624
- for i, row in avg_apr_data_with_ma.iterrows():
1625
- current_time = row['timestamp']
1626
- window_start = current_time - time_window
1627
-
1628
- # Get all data points within the 3-day time window
1629
- window_data = apr_data_sorted[
1630
- (apr_data_sorted['timestamp'] >= window_start) &
1631
- (apr_data_sorted['timestamp'] <= current_time)
1632
- ]
1633
 
1634
- # Calculate the average APR for the 3-day time window
1635
- if not window_data.empty:
1636
- avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
1637
- logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
1638
-
1639
- # Calculate adjusted APR moving average if data exists
1640
- has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any()
1641
- if has_adjusted_apr:
1642
- adjusted_avg = window_data['adjusted_apr'].dropna().mean()
1643
- avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg
1644
- last_valid_adjusted_moving_avg = adjusted_avg
1645
- logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}")
1646
- else:
1647
- # If we don't have adjusted_apr data in this window but had some previously,
1648
- # use the last valid value to maintain continuity in the graph
1649
- if last_valid_adjusted_moving_avg is not None:
1650
- avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
1651
- logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
1652
- else:
1653
- # If no data points in the window, use the current value
1654
- avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
1655
- logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}")
1656
 
1657
- logger.info(f"Calculated time-based moving averages with {len(avg_apr_data_with_ma)} points")
1658
 
1659
  # Find the last date where we have valid moving average data
1660
- last_valid_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_apr_data_with_ma['moving_avg'].dropna().empty else None
1661
 
1662
  # Find the last date where we have valid adjusted moving average data
1663
  last_valid_adj_ma_date = None
1664
- if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
1665
- last_valid_adj_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['adjusted_moving_avg'].notna()]['timestamp'].max()
1666
 
1667
  # Determine the last valid date for either moving average
1668
  last_valid_date = last_valid_ma_date
@@ -1748,36 +1787,22 @@ def create_combined_time_series_graph(df):
1748
  )
1749
  logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
1750
 
1751
- # Add APR moving average as a smooth line
1752
- x_values_ma = avg_apr_data_with_ma['timestamp'].tolist()
1753
- y_values_ma = avg_apr_data_with_ma['moving_avg'].tolist()
1754
 
1755
  # Create hover template for the APR moving average line
1756
  hover_data_apr = []
1757
- for idx, row in avg_apr_data_with_ma.iterrows():
1758
  timestamp = row['timestamp']
1759
- # Format timestamp to show only up to seconds (not milliseconds)
1760
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
1761
-
1762
- # Calculate number of active agents in the last 24 hours
1763
- # Use ROI data after April 25th, 2025, and APR data before that date
1764
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
1765
- april_25_2025 = datetime(2025, 4, 25)
1766
-
1767
- if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
1768
- # After April 25th, 2025: Use ROI data
1769
- roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
1770
- (global_roi_df['timestamp'] <= timestamp)]
1771
- active_agents = len(roi_window_data['agent_id'].unique())
1772
- logger.debug(f"Using ROI data for active agent count at {timestamp} (after Apr 25): {active_agents} agents")
1773
- else:
1774
- # Before April 25th, 2025 or if ROI data is not available: Use APR data
1775
- active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
1776
- (apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
1777
- logger.debug(f"Using APR data for active agent count at {timestamp} (before Apr 25): {active_agents} agents")
1778
 
1779
  hover_data_apr.append(
1780
- f"Time: {formatted_timestamp}<br>Avg APR (3d window): {row['moving_avg']:.2f}<br>Active agents (24h): {active_agents}"
1781
  )
1782
 
1783
  fig.add_trace(
@@ -1785,72 +1810,47 @@ def create_combined_time_series_graph(df):
1785
  x=x_values_ma,
1786
  y=y_values_ma,
1787
  mode='lines', # Only lines for moving average
1788
- line=dict(color='red', width=2), # Thinner line
1789
- name='Average APR (3d window)',
1790
  hovertext=hover_data_apr,
1791
  hoverinfo='text',
1792
  visible=True # Visible by default
1793
  )
1794
  )
1795
- logger.info(f"Added 3-day moving average APR trace with {len(x_values_ma)} points")
1796
 
1797
- # Add adjusted APR moving average line if it exists
1798
- if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
1799
- # Create a copy of the dataframe with forward-filled adjusted_moving_avg values
1800
- # to ensure the line continues even when we have missing data
1801
- filled_avg_apr_data = avg_apr_data_with_ma.copy()
1802
- filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
1803
-
1804
- # Use the filled dataframe for the adjusted APR line
1805
- x_values_adj = filled_avg_apr_data['timestamp'].tolist()
1806
- y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
1807
 
1808
  # Create hover template for the adjusted APR moving average line
1809
  hover_data_adj = []
1810
- for idx, row in filled_avg_apr_data.iterrows():
1811
  timestamp = row['timestamp']
1812
- # Format timestamp to show only up to seconds (not milliseconds)
1813
- formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
1814
 
1815
- # Calculate number of active agents in the last 24 hours
1816
- # Use ROI data after April 25th, 2025, and APR data before that date
1817
- time_24h_ago = timestamp - pd.Timedelta(hours=24)
1818
- april_25_2025 = datetime(2025, 4, 25)
1819
-
1820
- if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
1821
- # After April 25th, 2025: Use ROI data
1822
- roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
1823
- (global_roi_df['timestamp'] <= timestamp)]
1824
- active_agents = len(roi_window_data['agent_id'].unique())
1825
- logger.debug(f"Using ROI data for adjusted APR active agent count at {timestamp} (after Apr 25)")
1826
- else:
1827
- # Before April 25th, 2025 or if ROI data is not available: Use APR data
1828
- active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
1829
- (apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
1830
- logger.debug(f"Using APR data for adjusted APR active agent count at {timestamp} (before Apr 25)")
1831
 
1832
- if pd.notna(row['adjusted_moving_avg']):
1833
- hover_data_adj.append(
1834
- f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): {row['adjusted_moving_avg']:.2f}<br>Active agents (24h): {active_agents}"
1835
- )
1836
- else:
1837
- hover_data_adj.append(
1838
- f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): N/A<br>Active agents (24h): {active_agents}"
1839
- )
1840
 
1841
  fig.add_trace(
1842
  go.Scatter(
1843
- x=x_values_adj,
1844
  y=y_values_adj_ma,
1845
  mode='lines', # Only lines for moving average
1846
- line=dict(color='green', width=4), # Thicker solid line for adjusted APR
1847
- name='Average ETH Adjusted APR (3d window)',
1848
  hovertext=hover_data_adj,
1849
  hoverinfo='text',
1850
  visible=True # Visible by default
1851
  )
1852
  )
1853
- logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)")
1854
  else:
1855
  logger.warning("No adjusted APR moving average data available to plot")
1856
 
@@ -1942,13 +1942,13 @@ def create_combined_time_series_graph(df):
1942
  )
1943
  )
1944
 
1945
- # Update y-axis with fixed range (-10 to 10)
1946
  fig.update_yaxes(
1947
  showgrid=True,
1948
  gridwidth=1,
1949
  gridcolor='rgba(0,0,0,0.1)',
1950
- # Use fixed range instead of autoscaling
1951
- range=[-10, 10], # Set fixed range from -10 to 10
1952
  tickformat=".2f", # Format tick labels with 2 decimal places
1953
  tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
1954
  title=None # Remove the built-in axis title since we're using annotations
@@ -2519,7 +2519,7 @@ def dashboard():
2519
  # Update visibility of traces based on toggle values
2520
  for i, trace in enumerate(combined_fig.data):
2521
  # Check if this is a moving average trace
2522
- if trace.name == 'Average APR (3d window)':
2523
  trace.visible = show_apr_ma
2524
  elif trace.name == 'Average ETH Adjusted APR (3d window)':
2525
  trace.visible = show_adjusted_apr_ma
@@ -2546,7 +2546,7 @@ def dashboard():
2546
  # Update visibility of traces based on toggle values
2547
  for i, trace in enumerate(combined_fig.data):
2548
  # Check if this is a moving average trace
2549
- if trace.name == 'Average ROI (3d window)':
2550
  trace.visible = show_roi_ma
2551
 
2552
  return combined_fig
 
838
 
839
  return combined_fig, csv_file
840
 
841
+ def aggregate_daily_data(df, metric_column):
842
+ """
843
+ Aggregate data by date and agent, taking the mean of values within each day.
844
+
845
+ Args:
846
+ df: DataFrame with timestamp, agent_id, and metric data
847
+ metric_column: Name of the metric column ('apr' or 'roi')
848
+
849
+ Returns:
850
+ DataFrame with daily aggregated data per agent
851
+ """
852
+ if df.empty:
853
+ return df
854
+
855
+ # Convert timestamp to date only (ignore time)
856
+ df = df.copy()
857
+ df['date'] = df['timestamp'].dt.date
858
+
859
+ # Group by date and agent, calculate mean for each day
860
+ daily_agent_data = df.groupby(['date', 'agent_id']).agg({
861
+ metric_column: 'mean',
862
+ 'agent_name': 'first',
863
+ 'is_dummy': 'first',
864
+ 'metric_type': 'first'
865
+ }).reset_index()
866
+
867
+ # Convert date back to datetime for plotting
868
+ daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
869
+
870
+ logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column}")
871
+
872
+ return daily_agent_data
873
+
874
+ def calculate_daily_medians(daily_agent_data, metric_column):
875
+ """
876
+ Calculate daily medians across all agents for each date.
877
+
878
+ Args:
879
+ daily_agent_data: DataFrame with daily aggregated data per agent
880
+ metric_column: Name of the metric column ('apr' or 'roi')
881
+
882
+ Returns:
883
+ DataFrame with daily median values
884
+ """
885
+ if daily_agent_data.empty:
886
+ return daily_agent_data
887
+
888
+ # For each date, calculate median across all agents (excluding missing data)
889
+ daily_medians = daily_agent_data.groupby('date').agg({
890
+ metric_column: 'median'
891
+ }).reset_index()
892
+
893
+ # Convert date back to datetime for plotting
894
+ daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
895
+
896
+ logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
897
+
898
+ return daily_medians
899
+
900
+ def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
901
+ """
902
+ Calculate moving average of daily medians using a specified time window.
903
+
904
+ Args:
905
+ daily_medians: DataFrame with daily median values
906
+ metric_column: Name of the metric column ('apr' or 'roi')
907
+ window_days: Number of days for the moving average window
908
+
909
+ Returns:
910
+ DataFrame with moving average values added
911
+ """
912
+ if daily_medians.empty:
913
+ return daily_medians
914
+
915
+ # Sort by timestamp
916
+ daily_medians = daily_medians.sort_values('timestamp').copy()
917
+
918
+ # Initialize moving average column
919
+ daily_medians['moving_avg'] = None
920
+
921
+ # Define the time window
922
+ time_window = pd.Timedelta(days=window_days)
923
+ logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
924
+
925
+ # Calculate moving averages for each timestamp
926
+ for i, row in daily_medians.iterrows():
927
+ current_time = row['timestamp']
928
+ window_start = current_time - time_window
929
+
930
+ # Get all median values within the time window
931
+ window_data = daily_medians[
932
+ (daily_medians['timestamp'] >= window_start) &
933
+ (daily_medians['timestamp'] <= current_time)
934
+ ]
935
+
936
+ # Calculate the average of medians for the time window
937
+ if not window_data.empty:
938
+ daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
939
+ else:
940
+ # If no data points in the window, use the current value
941
+ daily_medians.at[i, 'moving_avg'] = row[metric_column]
942
+
943
+ logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
944
+
945
+ return daily_medians
946
+
947
  def create_combined_roi_time_series_graph(df):
948
+ """Create a time series graph showing daily median ROI values with 7-day moving average"""
949
  if len(df) == 0:
950
  logger.error("No data to plot combined ROI graph")
951
  fig = go.Figure()
 
1059
  # Use the filtered data for all subsequent operations
1060
  df = df_filtered
1061
 
1062
+ # NEW APPROACH: Daily aggregation and median calculation
1063
+ # Step 1: Aggregate data daily per agent (mean of values within each day)
1064
+ daily_agent_data = aggregate_daily_data(df, 'roi')
1065
 
1066
+ # Step 2: Calculate daily medians across all agents
1067
+ daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
 
 
 
 
 
 
 
 
 
1068
 
1069
+ # Step 3: Calculate 7-day moving average of daily medians
1070
+ daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
 
1071
 
1072
+ logger.info(f"NEW APPROACH: Processed {len(df)} raw points {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1073
 
1074
  # Find the last date where we have valid moving average data
1075
+ last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
1076
 
1077
  # If we don't have any valid moving average data, use the max time from the original data
1078
  last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
 
1080
  logger.info(f"Last valid moving average date: {last_valid_ma_date}")
1081
  logger.info(f"Using last valid date for graph: {last_valid_date}")
1082
 
1083
+ # Plot individual agent daily data points with agent names in hover, but limit display for scalability
1084
+ if not daily_agent_data.empty:
1085
  # Group by agent to use different colors for each agent
1086
+ unique_agents = daily_agent_data['agent_name'].unique()
1087
  colors = px.colors.qualitative.Plotly[:len(unique_agents)]
1088
 
1089
  # Create a color map for agents
1090
  color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
1091
 
1092
  # Calculate the total number of data points per agent to determine which are most active
1093
+ agent_counts = daily_agent_data['agent_name'].value_counts()
1094
 
1095
  # Determine how many agents to show individually (limit to top 5 most active)
1096
  MAX_VISIBLE_AGENTS = 5
 
1098
 
1099
  logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
1100
 
1101
+ # Add daily aggregated data points for each agent, but only make top agents visible by default
1102
  for agent_name in unique_agents:
1103
+ agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
1104
 
1105
  # Explicitly convert to Python lists
1106
  x_values = agent_data['timestamp'].tolist()
 
1121
  size=10,
1122
  line=dict(width=1, color='black')
1123
  ),
1124
+ name=f'Agent: {agent_name} (Daily ROI)',
1125
+ hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
1126
  visible=is_visible # All agents hidden by default
1127
  )
1128
  )
1129
+ logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
1130
 
1131
+ # Add ROI 7-day moving average of daily medians as a smooth line
1132
+ x_values_ma = daily_medians_with_ma['timestamp'].tolist()
1133
+ y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
1134
 
1135
  # Create hover template for the ROI moving average line
1136
  hover_data_roi = []
1137
+ for idx, row in daily_medians_with_ma.iterrows():
1138
  timestamp = row['timestamp']
1139
+ # Format timestamp to show only date for daily data
1140
+ formatted_timestamp = timestamp.strftime('%Y-%m-%d')
1141
 
1142
+ # Calculate number of active agents on this date
1143
+ active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
 
 
1144
 
1145
  hover_data_roi.append(
1146
+ f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
1147
  )
1148
 
1149
  fig.add_trace(
 
1151
  x=x_values_ma,
1152
  y=y_values_ma,
1153
  mode='lines', # Only lines for moving average
1154
+ line=dict(color='blue', width=3), # Thicker line for main trend
1155
+ name='Median ROI (7d window)',
1156
  hovertext=hover_data_roi,
1157
  hoverinfo='text',
1158
  visible=True # Visible by default
1159
  )
1160
  )
1161
+ logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
1162
 
1163
  # Update layout with average runtime information in the title
1164
  fig.update_layout(
 
1219
  )
1220
  )
1221
 
1222
+ # Update y-axis with autoscaling for ROI
1223
  fig.update_yaxes(
1224
  showgrid=True,
1225
  gridwidth=1,
1226
  gridcolor='rgba(0,0,0,0.1)',
1227
+ # Use autoscaling instead of fixed range
1228
+ autorange=True, # Enable autoscaling to fit the data
1229
  tickformat=".2f", # Format tick labels with 2 decimal places
1230
  tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
1231
  title=None # Remove the built-in axis title since we're using annotations
 
1661
  # Use the filtered data for all subsequent operations
1662
  apr_data = apr_data_filtered
1663
 
1664
+ # NEW APPROACH: Daily aggregation and median calculation for APR
1665
+ # Step 1: Aggregate data daily per agent (mean of values within each day)
1666
+ daily_agent_data = aggregate_daily_data(apr_data, 'apr')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1667
 
1668
+ # Step 2: Calculate daily medians across all agents
1669
+ daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
 
1670
 
1671
+ # Step 3: Calculate 7-day moving average of daily medians
1672
+ daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
 
1673
 
1674
+ # Also handle adjusted APR if it exists
1675
+ daily_medians_adjusted = None
1676
+ daily_medians_adjusted_with_ma = None
1677
 
1678
+ if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
1679
+ # Create a separate dataset for adjusted APR
1680
+ apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
 
 
 
 
 
 
 
1681
 
1682
+ if not apr_data_with_adjusted.empty:
1683
+ # Step 1: Aggregate adjusted APR data daily per agent
1684
+ daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
1685
+
1686
+ # Step 2: Calculate daily medians for adjusted APR
1687
+ daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
1688
+
1689
+ # Step 3: Calculate 7-day moving average of daily medians for adjusted APR
1690
+ daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
1691
+
1692
+ logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
1693
+ if daily_medians_adjusted is not None:
1694
+ logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
 
 
 
 
 
 
 
 
 
1695
 
1696
+ # This old moving average calculation is no longer needed with the new daily median approach
1697
 
1698
  # Find the last date where we have valid moving average data
1699
+ last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
1700
 
1701
  # Find the last date where we have valid adjusted moving average data
1702
  last_valid_adj_ma_date = None
1703
+ if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
1704
+ last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
1705
 
1706
  # Determine the last valid date for either moving average
1707
  last_valid_date = last_valid_ma_date
 
1787
  )
1788
  logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
1789
 
1790
+ # Add APR 7-day moving average of daily medians as a smooth line
1791
+ x_values_ma = daily_medians_with_ma['timestamp'].tolist()
1792
+ y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
1793
 
1794
  # Create hover template for the APR moving average line
1795
  hover_data_apr = []
1796
+ for idx, row in daily_medians_with_ma.iterrows():
1797
  timestamp = row['timestamp']
1798
+ # Format timestamp to show only date for daily data
1799
+ formatted_timestamp = timestamp.strftime('%Y-%m-%d')
1800
+
1801
+ # Calculate number of active agents on this date
1802
+ active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1803
 
1804
  hover_data_apr.append(
1805
+ f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
1806
  )
1807
 
1808
  fig.add_trace(
 
1810
  x=x_values_ma,
1811
  y=y_values_ma,
1812
  mode='lines', # Only lines for moving average
1813
+ line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line
1814
+ name='Median APR (7d window)',
1815
  hovertext=hover_data_apr,
1816
  hoverinfo='text',
1817
  visible=True # Visible by default
1818
  )
1819
  )
1820
+ logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
1821
 
1822
+ # Add adjusted APR 7-day moving average line if it exists
1823
+ if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
1824
+ x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
1825
+ y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
 
 
 
 
 
 
1826
 
1827
  # Create hover template for the adjusted APR moving average line
1828
  hover_data_adj = []
1829
+ for idx, row in daily_medians_adjusted_with_ma.iterrows():
1830
  timestamp = row['timestamp']
1831
+ # Format timestamp to show only date for daily data
1832
+ formatted_timestamp = timestamp.strftime('%Y-%m-%d')
1833
 
1834
+ # Calculate number of active agents on this date
1835
+ active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1836
 
1837
+ hover_data_adj.append(
1838
+ f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
1839
+ )
 
 
 
 
 
1840
 
1841
  fig.add_trace(
1842
  go.Scatter(
1843
+ x=x_values_adj_ma,
1844
  y=y_values_adj_ma,
1845
  mode='lines', # Only lines for moving average
1846
+ line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line
1847
+ name='Median Adjusted APR (7d window)',
1848
  hovertext=hover_data_adj,
1849
  hoverinfo='text',
1850
  visible=True # Visible by default
1851
  )
1852
  )
1853
+ logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
1854
  else:
1855
  logger.warning("No adjusted APR moving average data available to plot")
1856
 
 
1942
  )
1943
  )
1944
 
1945
+ # Update y-axis with autoscaling
1946
  fig.update_yaxes(
1947
  showgrid=True,
1948
  gridwidth=1,
1949
  gridcolor='rgba(0,0,0,0.1)',
1950
+ # Use autoscaling instead of fixed range
1951
+ autorange=True, # Enable autoscaling to fit the data
1952
  tickformat=".2f", # Format tick labels with 2 decimal places
1953
  tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
1954
  title=None # Remove the built-in axis title since we're using annotations
 
2519
  # Update visibility of traces based on toggle values
2520
  for i, trace in enumerate(combined_fig.data):
2521
  # Check if this is a moving average trace
2522
+ if trace.name == 'Median APR (7d window)':
2523
  trace.visible = show_apr_ma
2524
  elif trace.name == 'Average ETH Adjusted APR (3d window)':
2525
  trace.visible = show_adjusted_apr_ma
 
2546
  # Update visibility of traces based on toggle values
2547
  for i, trace in enumerate(combined_fig.data):
2548
  # Check if this is a moving average trace
2549
+ if trace.name == 'Median ROI (7d window)':
2550
  trace.visible = show_roi_ma
2551
 
2552
  return combined_fig