Testing-Agent-1

Sleeping

App Files Files Community

gauravlochab commited on Jun 13

Commit

cf88990

1 Parent(s): 144269c

chore: add preprocessing to the dataframe

Browse files

Files changed (1) hide show

app.py +204 -204

app.py CHANGED Viewed

@@ -838,8 +838,114 @@ def generate_roi_visualizations():
     return combined_fig, csv_file
 def create_combined_roi_time_series_graph(df):
-    """Create a time series graph showing average ROI values across all agents"""
     if len(df) == 0:
         logger.error("No data to plot combined ROI graph")
         fig = go.Figure()
@@ -953,53 +1059,20 @@ def create_combined_roi_time_series_graph(df):
     # Use the filtered data for all subsequent operations
     df = df_filtered
-    # Group by timestamp and calculate mean ROI
-    avg_roi_data = df.groupby('timestamp')['roi'].mean().reset_index()
-    # Sort by timestamp
-    avg_roi_data = avg_roi_data.sort_values('timestamp')
-    # Log the average ROI data
-    logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points")
-    for idx, row in avg_roi_data.iterrows():
-        logger.info(f"  Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}")
-    # Calculate moving average based on a time window (3 days)
-    # Sort data by timestamp
-    df_sorted = df.sort_values('timestamp')
-    # Create a new dataframe for the moving average
-    avg_roi_data_with_ma = avg_roi_data.copy()
-    avg_roi_data_with_ma['moving_avg'] = None  # Initialize the moving average column
-    # Define the time window for the moving average (3 days)
-    time_window = pd.Timedelta(days=3)
-    logger.info(f"Calculating moving average with time window of {time_window}")
-    # Calculate the moving averages for each timestamp
-    for i, row in avg_roi_data_with_ma.iterrows():
-        current_time = row['timestamp']
-        window_start = current_time - time_window
-        # Get all data points within the 3-day time window
-        window_data = df_sorted[
-            (df_sorted['timestamp'] >= window_start) &
-            (df_sorted['timestamp'] <= current_time)
-        ]
-        # Calculate the average ROI for the 3-day time window
-        if not window_data.empty:
-            avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean()
-            logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}")
-        else:
-            # If no data points in the window, use the current value
-            avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi']
-            logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}")
-    logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points")
     # Find the last date where we have valid moving average data
-    last_valid_ma_date = avg_roi_data_with_ma[avg_roi_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_roi_data_with_ma['moving_avg'].dropna().empty else None
     # If we don't have any valid moving average data, use the max time from the original data
     last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
@@ -1007,17 +1080,17 @@ def create_combined_roi_time_series_graph(df):
     logger.info(f"Last valid moving average date: {last_valid_ma_date}")
     logger.info(f"Using last valid date for graph: {last_valid_date}")
-    # Plot individual agent data points with agent names in hover, but limit display for scalability
-    if not df.empty:
         # Group by agent to use different colors for each agent
-        unique_agents = df['agent_name'].unique()
         colors = px.colors.qualitative.Plotly[:len(unique_agents)]
         # Create a color map for agents
         color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
         # Calculate the total number of data points per agent to determine which are most active
-        agent_counts = df['agent_name'].value_counts()
         # Determine how many agents to show individually (limit to top 5 most active)
         MAX_VISIBLE_AGENTS = 5
@@ -1025,9 +1098,9 @@ def create_combined_roi_time_series_graph(df):
         logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
-        # Add data points for each agent, but only make top agents visible by default
         for agent_name in unique_agents:
-            agent_data = df[df['agent_name'] == agent_name]
             # Explicitly convert to Python lists
             x_values = agent_data['timestamp'].tolist()
@@ -1048,31 +1121,29 @@ def create_combined_roi_time_series_graph(df):
                         size=10,
                         line=dict(width=1, color='black')
                     ),
-                    name=f'Agent: {agent_name} (ROI)',
-                    hovertemplate='Time: %{x}<br>ROI: %{y:.2f}<br>Agent: ' + agent_name + '<extra></extra>',
                     visible=is_visible  # All agents hidden by default
                 )
             )
-            logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
-        # Add ROI moving average as a smooth line
-        x_values_ma = avg_roi_data_with_ma['timestamp'].tolist()
-        y_values_ma = avg_roi_data_with_ma['moving_avg'].tolist()
         # Create hover template for the ROI moving average line
         hover_data_roi = []
-        for idx, row in avg_roi_data_with_ma.iterrows():
             timestamp = row['timestamp']
-            # Format timestamp to show only up to seconds (not milliseconds)
-            formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-            # Calculate number of active agents in the last 24 hours
-            time_24h_ago = timestamp - pd.Timedelta(hours=24)
-            active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
-                                  (df['timestamp'] <= timestamp)]['agent_id'].unique())
             hover_data_roi.append(
-                f"Time: {formatted_timestamp}<br>Avg ROI (3d window): {row['moving_avg']:.2f}%<br>Active agents (24h): {active_agents}"
             )
         fig.add_trace(
@@ -1080,14 +1151,14 @@ def create_combined_roi_time_series_graph(df):
                 x=x_values_ma,
                 y=y_values_ma,
                 mode='lines',  # Only lines for moving average
-                line=dict(color='blue', width=2),  # Thinner line
-                name='Average ROI (3d window)',
                 hovertext=hover_data_roi,
                 hoverinfo='text',
                 visible=True  # Visible by default
             )
         )
-        logger.info(f"Added 3-day moving average ROI trace with {len(x_values_ma)} points")
     # Update layout with average runtime information in the title
     fig.update_layout(
@@ -1148,13 +1219,13 @@ def create_combined_roi_time_series_graph(df):
         )
     )
-    # Update y-axis with fixed range for ROI (-10 to 10)
     fig.update_yaxes(
         showgrid=True,
         gridwidth=1,
         gridcolor='rgba(0,0,0,0.1)',
-        # Use fixed range instead of autoscaling
-        range=[-10, 10],  # Set fixed range from -10 to 10
         tickformat=".2f",  # Format tick labels with 2 decimal places
         tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
         title=None  # Remove the built-in axis title since we're using annotations
@@ -1590,79 +1661,47 @@ def create_combined_time_series_graph(df):
     # Use the filtered data for all subsequent operations
     apr_data = apr_data_filtered
-    # Group by timestamp and calculate mean APR
-    avg_apr_data = apr_data.groupby('timestamp')['apr'].mean().reset_index()
-    # Sort by timestamp
-    avg_apr_data = avg_apr_data.sort_values('timestamp')
-    # Log the average APR data
-    logger.info(f"Calculated average APR data with {len(avg_apr_data)} points")
-    for idx, row in avg_apr_data.iterrows():
-        logger.info(f"  Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}")
-    # Calculate moving average based on a time window (2 hours)
-    # Sort data by timestamp
-    apr_data_sorted = apr_data.sort_values('timestamp')
-    # Create a new dataframe for the moving average
-    avg_apr_data_with_ma = avg_apr_data.copy()
-    avg_apr_data_with_ma['moving_avg'] = None  # Initialize the moving average column
-    # Define the time window for the moving average (3 days)
-    time_window = pd.Timedelta(days=3)
-    logger.info(f"Calculating moving average with time window of {time_window}")
-    # Calculate moving averages: one for APR and one for adjusted APR
-    avg_apr_data_with_ma['moving_avg'] = None  # 3-day window for APR
-    avg_apr_data_with_ma['adjusted_moving_avg'] = None  # 3-day window for adjusted APR
-    # Keep track of the last valid adjusted_moving_avg value to handle gaps
-    last_valid_adjusted_moving_avg = None
-    # Calculate the moving averages for each timestamp
-    for i, row in avg_apr_data_with_ma.iterrows():
-        current_time = row['timestamp']
-        window_start = current_time - time_window
-        # Get all data points within the 3-day time window
-        window_data = apr_data_sorted[
-            (apr_data_sorted['timestamp'] >= window_start) &
-            (apr_data_sorted['timestamp'] <= current_time)
-        ]
-        # Calculate the average APR for the 3-day time window
-        if not window_data.empty:
-            avg_apr_data_with_ma.at[i, 'moving_avg'] = window_data['apr'].mean()
-            logger.debug(f"APR time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['apr'].mean()}")
-            # Calculate adjusted APR moving average if data exists
-            has_adjusted_apr = 'adjusted_apr' in window_data.columns and window_data['adjusted_apr'].notna().any()
-            if has_adjusted_apr:
-                adjusted_avg = window_data['adjusted_apr'].dropna().mean()
-                avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = adjusted_avg
-                last_valid_adjusted_moving_avg = adjusted_avg
-                logger.debug(f"Adjusted APR time window {window_start} to {current_time}: {len(window_data)} points, avg={adjusted_avg}")
-            else:
-                # If we don't have adjusted_apr data in this window but had some previously,
-                # use the last valid value to maintain continuity in the graph
-                if last_valid_adjusted_moving_avg is not None:
-                    avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
-                    logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
-        else:
-            # If no data points in the window, use the current value
-            avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
-            logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}")
-    logger.info(f"Calculated time-based moving averages with {len(avg_apr_data_with_ma)} points")
     # Find the last date where we have valid moving average data
-    last_valid_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['moving_avg'].notna()]['timestamp'].max() if not avg_apr_data_with_ma['moving_avg'].dropna().empty else None
     # Find the last date where we have valid adjusted moving average data
     last_valid_adj_ma_date = None
-    if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
-        last_valid_adj_ma_date = avg_apr_data_with_ma[avg_apr_data_with_ma['adjusted_moving_avg'].notna()]['timestamp'].max()
     # Determine the last valid date for either moving average
     last_valid_date = last_valid_ma_date
@@ -1748,36 +1787,22 @@ def create_combined_time_series_graph(df):
                 )
                 logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
-        # Add APR moving average as a smooth line
-        x_values_ma = avg_apr_data_with_ma['timestamp'].tolist()
-        y_values_ma = avg_apr_data_with_ma['moving_avg'].tolist()
         # Create hover template for the APR moving average line
         hover_data_apr = []
-        for idx, row in avg_apr_data_with_ma.iterrows():
             timestamp = row['timestamp']
-            # Format timestamp to show only up to seconds (not milliseconds)
-            formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-            # Calculate number of active agents in the last 24 hours
-            # Use ROI data after April 25th, 2025, and APR data before that date
-            time_24h_ago = timestamp - pd.Timedelta(hours=24)
-            april_25_2025 = datetime(2025, 4, 25)
-            if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
-                # After April 25th, 2025: Use ROI data
-                roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
-                                              (global_roi_df['timestamp'] <= timestamp)]
-                active_agents = len(roi_window_data['agent_id'].unique())
-                logger.debug(f"Using ROI data for active agent count at {timestamp} (after Apr 25): {active_agents} agents")
-            else:
-                # Before April 25th, 2025 or if ROI data is not available: Use APR data
-                active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
-                                           (apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
-                logger.debug(f"Using APR data for active agent count at {timestamp} (before Apr 25): {active_agents} agents")
             hover_data_apr.append(
-                f"Time: {formatted_timestamp}<br>Avg APR (3d window): {row['moving_avg']:.2f}<br>Active agents (24h): {active_agents}"
             )
         fig.add_trace(
@@ -1785,72 +1810,47 @@ def create_combined_time_series_graph(df):
                 x=x_values_ma,
                 y=y_values_ma,
                 mode='lines',  # Only lines for moving average
-                line=dict(color='red', width=2),  # Thinner line
-                name='Average APR (3d window)',
                 hovertext=hover_data_apr,
                 hoverinfo='text',
                 visible=True  # Visible by default
             )
         )
-        logger.info(f"Added 3-day moving average APR trace with {len(x_values_ma)} points")
-        # Add adjusted APR moving average line if it exists
-        if 'adjusted_moving_avg' in avg_apr_data_with_ma.columns and avg_apr_data_with_ma['adjusted_moving_avg'].notna().any():
-            # Create a copy of the dataframe with forward-filled adjusted_moving_avg values
-            # to ensure the line continues even when we have missing data
-            filled_avg_apr_data = avg_apr_data_with_ma.copy()
-            filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
-            # Use the filled dataframe for the adjusted APR line
-            x_values_adj = filled_avg_apr_data['timestamp'].tolist()
-            y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
             # Create hover template for the adjusted APR moving average line
             hover_data_adj = []
-            for idx, row in filled_avg_apr_data.iterrows():
                 timestamp = row['timestamp']
-                # Format timestamp to show only up to seconds (not milliseconds)
-                formatted_timestamp = timestamp.strftime('%Y-%m-%d %H:%M:%S')
-                # Calculate number of active agents in the last 24 hours
-                # Use ROI data after April 25th, 2025, and APR data before that date
-                time_24h_ago = timestamp - pd.Timedelta(hours=24)
-                april_25_2025 = datetime(2025, 4, 25)
-                if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
-                    # After April 25th, 2025: Use ROI data
-                    roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
-                                                  (global_roi_df['timestamp'] <= timestamp)]
-                    active_agents = len(roi_window_data['agent_id'].unique())
-                    logger.debug(f"Using ROI data for adjusted APR active agent count at {timestamp} (after Apr 25)")
-                else:
-                    # Before April 25th, 2025 or if ROI data is not available: Use APR data
-                    active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
-                                               (apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
-                    logger.debug(f"Using APR data for adjusted APR active agent count at {timestamp} (before Apr 25)")
-                if pd.notna(row['adjusted_moving_avg']):
-                    hover_data_adj.append(
-                        f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): {row['adjusted_moving_avg']:.2f}<br>Active agents (24h): {active_agents}"
-                    )
-                else:
-                    hover_data_adj.append(
-                        f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): N/A<br>Active agents (24h): {active_agents}"
-                    )
             fig.add_trace(
                 go.Scatter(
-                    x=x_values_adj,
                     y=y_values_adj_ma,
                     mode='lines',  # Only lines for moving average
-                    line=dict(color='green', width=4),  # Thicker solid line for adjusted APR
-                    name='Average ETH Adjusted APR (3d window)',
                     hovertext=hover_data_adj,
                     hoverinfo='text',
                     visible=True  # Visible by default
                 )
             )
-            logger.info(f"Added 3-day moving average Adjusted APR trace with {len(x_values_adj)} points (with forward-filling for missing values)")
         else:
             logger.warning("No adjusted APR moving average data available to plot")
@@ -1942,13 +1942,13 @@ def create_combined_time_series_graph(df):
         )
     )
-    # Update y-axis with fixed range (-10 to 10)
     fig.update_yaxes(
         showgrid=True,
         gridwidth=1,
         gridcolor='rgba(0,0,0,0.1)',
-        # Use fixed range instead of autoscaling
-        range=[-10, 10],  # Set fixed range from -10 to 10
         tickformat=".2f",  # Format tick labels with 2 decimal places
         tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
         title=None  # Remove the built-in axis title since we're using annotations
@@ -2519,7 +2519,7 @@ def dashboard():
                 # Update visibility of traces based on toggle values
                 for i, trace in enumerate(combined_fig.data):
                     # Check if this is a moving average trace
-                    if trace.name == 'Average APR (3d window)':
                         trace.visible = show_apr_ma
                     elif trace.name == 'Average ETH Adjusted APR (3d window)':
                         trace.visible = show_adjusted_apr_ma
@@ -2546,7 +2546,7 @@ def dashboard():
                 # Update visibility of traces based on toggle values
                 for i, trace in enumerate(combined_fig.data):
                     # Check if this is a moving average trace
-                    if trace.name == 'Average ROI (3d window)':
                         trace.visible = show_roi_ma
                 return combined_fig

     return combined_fig, csv_file
+def aggregate_daily_data(df, metric_column):
+    """
+    Aggregate data by date and agent, taking the mean of values within each day.
+    Args:
+        df: DataFrame with timestamp, agent_id, and metric data
+        metric_column: Name of the metric column ('apr' or 'roi')
+    Returns:
+        DataFrame with daily aggregated data per agent
+    """
+    if df.empty:
+        return df
+    # Convert timestamp to date only (ignore time)
+    df = df.copy()
+    df['date'] = df['timestamp'].dt.date
+    # Group by date and agent, calculate mean for each day
+    daily_agent_data = df.groupby(['date', 'agent_id']).agg({
+        metric_column: 'mean',
+        'agent_name': 'first',
+        'is_dummy': 'first',
+        'metric_type': 'first'
+    }).reset_index()
+    # Convert date back to datetime for plotting
+    daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
+    logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column}")
+    return daily_agent_data
+def calculate_daily_medians(daily_agent_data, metric_column):
+    """
+    Calculate daily medians across all agents for each date.
+    Args:
+        daily_agent_data: DataFrame with daily aggregated data per agent
+        metric_column: Name of the metric column ('apr' or 'roi')
+    Returns:
+        DataFrame with daily median values
+    """
+    if daily_agent_data.empty:
+        return daily_agent_data
+    # For each date, calculate median across all agents (excluding missing data)
+    daily_medians = daily_agent_data.groupby('date').agg({
+        metric_column: 'median'
+    }).reset_index()
+    # Convert date back to datetime for plotting
+    daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
+    logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
+    return daily_medians
+def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
+    """
+    Calculate moving average of daily medians using a specified time window.
+    Args:
+        daily_medians: DataFrame with daily median values
+        metric_column: Name of the metric column ('apr' or 'roi')
+        window_days: Number of days for the moving average window
+    Returns:
+        DataFrame with moving average values added
+    """
+    if daily_medians.empty:
+        return daily_medians
+    # Sort by timestamp
+    daily_medians = daily_medians.sort_values('timestamp').copy()
+    # Initialize moving average column
+    daily_medians['moving_avg'] = None
+    # Define the time window
+    time_window = pd.Timedelta(days=window_days)
+    logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
+    # Calculate moving averages for each timestamp
+    for i, row in daily_medians.iterrows():
+        current_time = row['timestamp']
+        window_start = current_time - time_window
+        # Get all median values within the time window
+        window_data = daily_medians[
+            (daily_medians['timestamp'] >= window_start) &
+            (daily_medians['timestamp'] <= current_time)
+        ]
+        # Calculate the average of medians for the time window
+        if not window_data.empty:
+            daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
+        else:
+            # If no data points in the window, use the current value
+            daily_medians.at[i, 'moving_avg'] = row[metric_column]
+    logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
+    return daily_medians
 def create_combined_roi_time_series_graph(df):
+    """Create a time series graph showing daily median ROI values with 7-day moving average"""
     if len(df) == 0:
         logger.error("No data to plot combined ROI graph")
         fig = go.Figure()
     # Use the filtered data for all subsequent operations
     df = df_filtered
+    # NEW APPROACH: Daily aggregation and median calculation
+    # Step 1: Aggregate data daily per agent (mean of values within each day)
+    daily_agent_data = aggregate_daily_data(df, 'roi')
+    # Step 2: Calculate daily medians across all agents
+    daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
+    # Step 3: Calculate 7-day moving average of daily medians
+    daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
+    logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
     # Find the last date where we have valid moving average data
+    last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
     # If we don't have any valid moving average data, use the max time from the original data
     last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
     logger.info(f"Last valid moving average date: {last_valid_ma_date}")
     logger.info(f"Using last valid date for graph: {last_valid_date}")
+    # Plot individual agent daily data points with agent names in hover, but limit display for scalability
+    if not daily_agent_data.empty:
         # Group by agent to use different colors for each agent
+        unique_agents = daily_agent_data['agent_name'].unique()
         colors = px.colors.qualitative.Plotly[:len(unique_agents)]
         # Create a color map for agents
         color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
         # Calculate the total number of data points per agent to determine which are most active
+        agent_counts = daily_agent_data['agent_name'].value_counts()
         # Determine how many agents to show individually (limit to top 5 most active)
         MAX_VISIBLE_AGENTS = 5
         logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
+        # Add daily aggregated data points for each agent, but only make top agents visible by default
         for agent_name in unique_agents:
+            agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
             # Explicitly convert to Python lists
             x_values = agent_data['timestamp'].tolist()
                         size=10,
                         line=dict(width=1, color='black')
                     ),
+                    name=f'Agent: {agent_name} (Daily ROI)',
+                    hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
                     visible=is_visible  # All agents hidden by default
                 )
             )
+            logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
+        # Add ROI 7-day moving average of daily medians as a smooth line
+        x_values_ma = daily_medians_with_ma['timestamp'].tolist()
+        y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
         # Create hover template for the ROI moving average line
         hover_data_roi = []
+        for idx, row in daily_medians_with_ma.iterrows():
             timestamp = row['timestamp']
+            # Format timestamp to show only date for daily data
+            formatted_timestamp = timestamp.strftime('%Y-%m-%d')
+            # Calculate number of active agents on this date
+            active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
             hover_data_roi.append(
+                f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
             )
         fig.add_trace(
                 x=x_values_ma,
                 y=y_values_ma,
                 mode='lines',  # Only lines for moving average
+                line=dict(color='blue', width=3),  # Thicker line for main trend
+                name='Median ROI (7d window)',
                 hovertext=hover_data_roi,
                 hoverinfo='text',
                 visible=True  # Visible by default
             )
         )
+        logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
     # Update layout with average runtime information in the title
     fig.update_layout(
         )
     )
+    # Update y-axis with autoscaling for ROI
     fig.update_yaxes(
         showgrid=True,
         gridwidth=1,
         gridcolor='rgba(0,0,0,0.1)',
+        # Use autoscaling instead of fixed range
+        autorange=True,  # Enable autoscaling to fit the data
         tickformat=".2f",  # Format tick labels with 2 decimal places
         tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
         title=None  # Remove the built-in axis title since we're using annotations
     # Use the filtered data for all subsequent operations
     apr_data = apr_data_filtered
+    # NEW APPROACH: Daily aggregation and median calculation for APR
+    # Step 1: Aggregate data daily per agent (mean of values within each day)
+    daily_agent_data = aggregate_daily_data(apr_data, 'apr')
+    # Step 2: Calculate daily medians across all agents
+    daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
+    # Step 3: Calculate 7-day moving average of daily medians
+    daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
+    # Also handle adjusted APR if it exists
+    daily_medians_adjusted = None
+    daily_medians_adjusted_with_ma = None
+    if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
+        # Create a separate dataset for adjusted APR
+        apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
+        if not apr_data_with_adjusted.empty:
+            # Step 1: Aggregate adjusted APR data daily per agent
+            daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
+            # Step 2: Calculate daily medians for adjusted APR
+            daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
+            # Step 3: Calculate 7-day moving average of daily medians for adjusted APR
+            daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
+    logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
+    if daily_medians_adjusted is not None:
+        logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
+    # This old moving average calculation is no longer needed with the new daily median approach
     # Find the last date where we have valid moving average data
+    last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
     # Find the last date where we have valid adjusted moving average data
     last_valid_adj_ma_date = None
+    if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
+        last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
     # Determine the last valid date for either moving average
     last_valid_date = last_valid_ma_date
                 )
                 logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
+        # Add APR 7-day moving average of daily medians as a smooth line
+        x_values_ma = daily_medians_with_ma['timestamp'].tolist()
+        y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
         # Create hover template for the APR moving average line
         hover_data_apr = []
+        for idx, row in daily_medians_with_ma.iterrows():
             timestamp = row['timestamp']
+            # Format timestamp to show only date for daily data
+            formatted_timestamp = timestamp.strftime('%Y-%m-%d')
+            # Calculate number of active agents on this date
+            active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
             hover_data_apr.append(
+                f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
             )
         fig.add_trace(
                 x=x_values_ma,
                 y=y_values_ma,
                 mode='lines',  # Only lines for moving average
+                line=dict(color='red', width=3, shape='spline', smoothing=1.3),  # Smooth curved line
+                name='Median APR (7d window)',
                 hovertext=hover_data_apr,
                 hoverinfo='text',
                 visible=True  # Visible by default
             )
         )
+        logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
+        # Add adjusted APR 7-day moving average line if it exists
+        if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
+            x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
+            y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
             # Create hover template for the adjusted APR moving average line
             hover_data_adj = []
+            for idx, row in daily_medians_adjusted_with_ma.iterrows():
                 timestamp = row['timestamp']
+                # Format timestamp to show only date for daily data
+                formatted_timestamp = timestamp.strftime('%Y-%m-%d')
+                # Calculate number of active agents on this date
+                active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
+                hover_data_adj.append(
+                    f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
+                )
             fig.add_trace(
                 go.Scatter(
+                    x=x_values_adj_ma,
                     y=y_values_adj_ma,
                     mode='lines',  # Only lines for moving average
+                    line=dict(color='green', width=3, shape='spline', smoothing=1.3),  # Smooth curved line
+                    name='Median Adjusted APR (7d window)',
                     hovertext=hover_data_adj,
                     hoverinfo='text',
                     visible=True  # Visible by default
                 )
             )
+            logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
         else:
             logger.warning("No adjusted APR moving average data available to plot")
         )
     )
+    # Update y-axis with autoscaling
     fig.update_yaxes(
         showgrid=True,
         gridwidth=1,
         gridcolor='rgba(0,0,0,0.1)',
+        # Use autoscaling instead of fixed range
+        autorange=True,  # Enable autoscaling to fit the data
         tickformat=".2f",  # Format tick labels with 2 decimal places
         tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"),  # Adjusted font size
         title=None  # Remove the built-in axis title since we're using annotations
                 # Update visibility of traces based on toggle values
                 for i, trace in enumerate(combined_fig.data):
                     # Check if this is a moving average trace
+                    if trace.name == 'Median APR (7d window)':
                         trace.visible = show_apr_ma
                     elif trace.name == 'Average ETH Adjusted APR (3d window)':
                         trace.visible = show_adjusted_apr_ma
                 # Update visibility of traces based on toggle values
                 for i, trace in enumerate(combined_fig.data):
                     # Check if this is a moving average trace
+                    if trace.name == 'Median ROI (7d window)':
                         trace.visible = show_roi_ma
                 return combined_fig