gauravlochab
commited on
Commit
·
cf88990
1
Parent(s):
144269c
chore: add preprocessing to the dataframe
Browse files
app.py
CHANGED
@@ -838,8 +838,114 @@ def generate_roi_visualizations():
|
|
838 |
|
839 |
return combined_fig, csv_file
|
840 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
841 |
def create_combined_roi_time_series_graph(df):
|
842 |
-
"""Create a time series graph showing
|
843 |
if len(df) == 0:
|
844 |
logger.error("No data to plot combined ROI graph")
|
845 |
fig = go.Figure()
|
@@ -953,53 +1059,20 @@ def create_combined_roi_time_series_graph(df):
|
|
953 |
# Use the filtered data for all subsequent operations
|
954 |
df = df_filtered
|
955 |
|
956 |
-
#
|
957 |
-
|
|
|
958 |
|
959 |
-
#
|
960 |
-
|
961 |
-
|
962 |
-
# Log the average ROI data
|
963 |
-
logger.info(f"Calculated average ROI data with {len(avg_roi_data)} points")
|
964 |
-
for idx, row in avg_roi_data.iterrows():
|
965 |
-
logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_roi={row['roi']}")
|
966 |
-
|
967 |
-
# Calculate moving average based on a time window (3 days)
|
968 |
-
# Sort data by timestamp
|
969 |
-
df_sorted = df.sort_values('timestamp')
|
970 |
|
971 |
-
#
|
972 |
-
|
973 |
-
avg_roi_data_with_ma['moving_avg'] = None # Initialize the moving average column
|
974 |
|
975 |
-
|
976 |
-
time_window = pd.Timedelta(days=3)
|
977 |
-
logger.info(f"Calculating moving average with time window of {time_window}")
|
978 |
-
|
979 |
-
# Calculate the moving averages for each timestamp
|
980 |
-
for i, row in avg_roi_data_with_ma.iterrows():
|
981 |
-
current_time = row['timestamp']
|
982 |
-
window_start = current_time - time_window
|
983 |
-
|
984 |
-
# Get all data points within the 3-day time window
|
985 |
-
window_data = df_sorted[
|
986 |
-
(df_sorted['timestamp'] >= window_start) &
|
987 |
-
(df_sorted['timestamp'] <= current_time)
|
988 |
-
]
|
989 |
-
|
990 |
-
# Calculate the average ROI for the 3-day time window
|
991 |
-
if not window_data.empty:
|
992 |
-
avg_roi_data_with_ma.at[i, 'moving_avg'] = window_data['roi'].mean()
|
993 |
-
logger.debug(f"ROI time window {window_start} to {current_time}: {len(window_data)} points, avg={window_data['roi'].mean()}")
|
994 |
-
else:
|
995 |
-
# If no data points in the window, use the current value
|
996 |
-
avg_roi_data_with_ma.at[i, 'moving_avg'] = row['roi']
|
997 |
-
logger.debug(f"No data points in time window for {current_time}, using current value {row['roi']}")
|
998 |
-
|
999 |
-
logger.info(f"Calculated time-based moving averages with {len(avg_roi_data_with_ma)} points")
|
1000 |
|
1001 |
# Find the last date where we have valid moving average data
|
1002 |
-
last_valid_ma_date =
|
1003 |
|
1004 |
# If we don't have any valid moving average data, use the max time from the original data
|
1005 |
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
|
@@ -1007,17 +1080,17 @@ def create_combined_roi_time_series_graph(df):
|
|
1007 |
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
|
1008 |
logger.info(f"Using last valid date for graph: {last_valid_date}")
|
1009 |
|
1010 |
-
# Plot individual agent data points with agent names in hover, but limit display for scalability
|
1011 |
-
if not
|
1012 |
# Group by agent to use different colors for each agent
|
1013 |
-
unique_agents =
|
1014 |
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
|
1015 |
|
1016 |
# Create a color map for agents
|
1017 |
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
|
1018 |
|
1019 |
# Calculate the total number of data points per agent to determine which are most active
|
1020 |
-
agent_counts =
|
1021 |
|
1022 |
# Determine how many agents to show individually (limit to top 5 most active)
|
1023 |
MAX_VISIBLE_AGENTS = 5
|
@@ -1025,9 +1098,9 @@ def create_combined_roi_time_series_graph(df):
|
|
1025 |
|
1026 |
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
|
1027 |
|
1028 |
-
# Add data points for each agent, but only make top agents visible by default
|
1029 |
for agent_name in unique_agents:
|
1030 |
-
agent_data =
|
1031 |
|
1032 |
# Explicitly convert to Python lists
|
1033 |
x_values = agent_data['timestamp'].tolist()
|
@@ -1048,31 +1121,29 @@ def create_combined_roi_time_series_graph(df):
|
|
1048 |
size=10,
|
1049 |
line=dict(width=1, color='black')
|
1050 |
),
|
1051 |
-
name=f'Agent: {agent_name} (ROI)',
|
1052 |
-
hovertemplate='Time: %{x}<br>ROI: %{y:.2f}
|
1053 |
visible=is_visible # All agents hidden by default
|
1054 |
)
|
1055 |
)
|
1056 |
-
logger.info(f"Added ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
|
1057 |
|
1058 |
-
# Add ROI moving average as a smooth line
|
1059 |
-
x_values_ma =
|
1060 |
-
y_values_ma =
|
1061 |
|
1062 |
# Create hover template for the ROI moving average line
|
1063 |
hover_data_roi = []
|
1064 |
-
for idx, row in
|
1065 |
timestamp = row['timestamp']
|
1066 |
-
# Format timestamp to show only
|
1067 |
-
formatted_timestamp = timestamp.strftime('%Y-%m-%d
|
1068 |
|
1069 |
-
# Calculate number of active agents
|
1070 |
-
|
1071 |
-
active_agents = len(df[(df['timestamp'] >= time_24h_ago) &
|
1072 |
-
(df['timestamp'] <= timestamp)]['agent_id'].unique())
|
1073 |
|
1074 |
hover_data_roi.append(
|
1075 |
-
f"
|
1076 |
)
|
1077 |
|
1078 |
fig.add_trace(
|
@@ -1080,14 +1151,14 @@ def create_combined_roi_time_series_graph(df):
|
|
1080 |
x=x_values_ma,
|
1081 |
y=y_values_ma,
|
1082 |
mode='lines', # Only lines for moving average
|
1083 |
-
line=dict(color='blue', width=
|
1084 |
-
name='
|
1085 |
hovertext=hover_data_roi,
|
1086 |
hoverinfo='text',
|
1087 |
visible=True # Visible by default
|
1088 |
)
|
1089 |
)
|
1090 |
-
logger.info(f"Added
|
1091 |
|
1092 |
# Update layout with average runtime information in the title
|
1093 |
fig.update_layout(
|
@@ -1148,13 +1219,13 @@ def create_combined_roi_time_series_graph(df):
|
|
1148 |
)
|
1149 |
)
|
1150 |
|
1151 |
-
# Update y-axis with
|
1152 |
fig.update_yaxes(
|
1153 |
showgrid=True,
|
1154 |
gridwidth=1,
|
1155 |
gridcolor='rgba(0,0,0,0.1)',
|
1156 |
-
# Use
|
1157 |
-
|
1158 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
1159 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
1160 |
title=None # Remove the built-in axis title since we're using annotations
|
@@ -1590,79 +1661,47 @@ def create_combined_time_series_graph(df):
|
|
1590 |
# Use the filtered data for all subsequent operations
|
1591 |
apr_data = apr_data_filtered
|
1592 |
|
1593 |
-
#
|
1594 |
-
|
1595 |
-
|
1596 |
-
# Sort by timestamp
|
1597 |
-
avg_apr_data = avg_apr_data.sort_values('timestamp')
|
1598 |
-
|
1599 |
-
# Log the average APR data
|
1600 |
-
logger.info(f"Calculated average APR data with {len(avg_apr_data)} points")
|
1601 |
-
for idx, row in avg_apr_data.iterrows():
|
1602 |
-
logger.info(f" Average point {idx}: timestamp={row['timestamp']}, avg_apr={row['apr']}")
|
1603 |
-
|
1604 |
-
# Calculate moving average based on a time window (2 hours)
|
1605 |
-
# Sort data by timestamp
|
1606 |
-
apr_data_sorted = apr_data.sort_values('timestamp')
|
1607 |
-
|
1608 |
-
# Create a new dataframe for the moving average
|
1609 |
-
avg_apr_data_with_ma = avg_apr_data.copy()
|
1610 |
-
avg_apr_data_with_ma['moving_avg'] = None # Initialize the moving average column
|
1611 |
|
1612 |
-
#
|
1613 |
-
|
1614 |
-
logger.info(f"Calculating moving average with time window of {time_window}")
|
1615 |
|
1616 |
-
#
|
1617 |
-
|
1618 |
-
avg_apr_data_with_ma['adjusted_moving_avg'] = None # 3-day window for adjusted APR
|
1619 |
|
1620 |
-
#
|
1621 |
-
|
|
|
1622 |
|
1623 |
-
|
1624 |
-
|
1625 |
-
|
1626 |
-
window_start = current_time - time_window
|
1627 |
-
|
1628 |
-
# Get all data points within the 3-day time window
|
1629 |
-
window_data = apr_data_sorted[
|
1630 |
-
(apr_data_sorted['timestamp'] >= window_start) &
|
1631 |
-
(apr_data_sorted['timestamp'] <= current_time)
|
1632 |
-
]
|
1633 |
|
1634 |
-
|
1635 |
-
|
1636 |
-
|
1637 |
-
|
1638 |
-
|
1639 |
-
|
1640 |
-
|
1641 |
-
|
1642 |
-
|
1643 |
-
|
1644 |
-
|
1645 |
-
|
1646 |
-
|
1647 |
-
# If we don't have adjusted_apr data in this window but had some previously,
|
1648 |
-
# use the last valid value to maintain continuity in the graph
|
1649 |
-
if last_valid_adjusted_moving_avg is not None:
|
1650 |
-
avg_apr_data_with_ma.at[i, 'adjusted_moving_avg'] = last_valid_adjusted_moving_avg
|
1651 |
-
logger.debug(f"No adjusted APR data in window, using last valid value: {last_valid_adjusted_moving_avg}")
|
1652 |
-
else:
|
1653 |
-
# If no data points in the window, use the current value
|
1654 |
-
avg_apr_data_with_ma.at[i, 'moving_avg'] = row['apr']
|
1655 |
-
logger.debug(f"No data points in time window for {current_time}, using current value {row['apr']}")
|
1656 |
|
1657 |
-
|
1658 |
|
1659 |
# Find the last date where we have valid moving average data
|
1660 |
-
last_valid_ma_date =
|
1661 |
|
1662 |
# Find the last date where we have valid adjusted moving average data
|
1663 |
last_valid_adj_ma_date = None
|
1664 |
-
if
|
1665 |
-
last_valid_adj_ma_date =
|
1666 |
|
1667 |
# Determine the last valid date for either moving average
|
1668 |
last_valid_date = last_valid_ma_date
|
@@ -1748,36 +1787,22 @@ def create_combined_time_series_graph(df):
|
|
1748 |
)
|
1749 |
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
|
1750 |
|
1751 |
-
# Add APR moving average as a smooth line
|
1752 |
-
x_values_ma =
|
1753 |
-
y_values_ma =
|
1754 |
|
1755 |
# Create hover template for the APR moving average line
|
1756 |
hover_data_apr = []
|
1757 |
-
for idx, row in
|
1758 |
timestamp = row['timestamp']
|
1759 |
-
# Format timestamp to show only
|
1760 |
-
formatted_timestamp = timestamp.strftime('%Y-%m-%d
|
1761 |
-
|
1762 |
-
# Calculate number of active agents
|
1763 |
-
|
1764 |
-
time_24h_ago = timestamp - pd.Timedelta(hours=24)
|
1765 |
-
april_25_2025 = datetime(2025, 4, 25)
|
1766 |
-
|
1767 |
-
if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
|
1768 |
-
# After April 25th, 2025: Use ROI data
|
1769 |
-
roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
|
1770 |
-
(global_roi_df['timestamp'] <= timestamp)]
|
1771 |
-
active_agents = len(roi_window_data['agent_id'].unique())
|
1772 |
-
logger.debug(f"Using ROI data for active agent count at {timestamp} (after Apr 25): {active_agents} agents")
|
1773 |
-
else:
|
1774 |
-
# Before April 25th, 2025 or if ROI data is not available: Use APR data
|
1775 |
-
active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
|
1776 |
-
(apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
|
1777 |
-
logger.debug(f"Using APR data for active agent count at {timestamp} (before Apr 25): {active_agents} agents")
|
1778 |
|
1779 |
hover_data_apr.append(
|
1780 |
-
f"
|
1781 |
)
|
1782 |
|
1783 |
fig.add_trace(
|
@@ -1785,72 +1810,47 @@ def create_combined_time_series_graph(df):
|
|
1785 |
x=x_values_ma,
|
1786 |
y=y_values_ma,
|
1787 |
mode='lines', # Only lines for moving average
|
1788 |
-
line=dict(color='red', width=
|
1789 |
-
name='
|
1790 |
hovertext=hover_data_apr,
|
1791 |
hoverinfo='text',
|
1792 |
visible=True # Visible by default
|
1793 |
)
|
1794 |
)
|
1795 |
-
logger.info(f"Added
|
1796 |
|
1797 |
-
# Add adjusted APR moving average line if it exists
|
1798 |
-
if
|
1799 |
-
|
1800 |
-
|
1801 |
-
filled_avg_apr_data = avg_apr_data_with_ma.copy()
|
1802 |
-
filled_avg_apr_data['adjusted_moving_avg'] = filled_avg_apr_data['adjusted_moving_avg'].fillna(method='ffill')
|
1803 |
-
|
1804 |
-
# Use the filled dataframe for the adjusted APR line
|
1805 |
-
x_values_adj = filled_avg_apr_data['timestamp'].tolist()
|
1806 |
-
y_values_adj_ma = filled_avg_apr_data['adjusted_moving_avg'].tolist()
|
1807 |
|
1808 |
# Create hover template for the adjusted APR moving average line
|
1809 |
hover_data_adj = []
|
1810 |
-
for idx, row in
|
1811 |
timestamp = row['timestamp']
|
1812 |
-
# Format timestamp to show only
|
1813 |
-
formatted_timestamp = timestamp.strftime('%Y-%m-%d
|
1814 |
|
1815 |
-
# Calculate number of active agents
|
1816 |
-
|
1817 |
-
time_24h_ago = timestamp - pd.Timedelta(hours=24)
|
1818 |
-
april_25_2025 = datetime(2025, 4, 25)
|
1819 |
-
|
1820 |
-
if timestamp >= april_25_2025 and global_roi_df is not None and not global_roi_df.empty:
|
1821 |
-
# After April 25th, 2025: Use ROI data
|
1822 |
-
roi_window_data = global_roi_df[(global_roi_df['timestamp'] >= time_24h_ago) &
|
1823 |
-
(global_roi_df['timestamp'] <= timestamp)]
|
1824 |
-
active_agents = len(roi_window_data['agent_id'].unique())
|
1825 |
-
logger.debug(f"Using ROI data for adjusted APR active agent count at {timestamp} (after Apr 25)")
|
1826 |
-
else:
|
1827 |
-
# Before April 25th, 2025 or if ROI data is not available: Use APR data
|
1828 |
-
active_agents = len(apr_data[(apr_data['timestamp'] >= time_24h_ago) &
|
1829 |
-
(apr_data['timestamp'] <= timestamp)]['agent_id'].unique())
|
1830 |
-
logger.debug(f"Using APR data for adjusted APR active agent count at {timestamp} (before Apr 25)")
|
1831 |
|
1832 |
-
|
1833 |
-
|
1834 |
-
|
1835 |
-
)
|
1836 |
-
else:
|
1837 |
-
hover_data_adj.append(
|
1838 |
-
f"Time: {formatted_timestamp}<br>Avg ETH Adjusted APR (3d window): N/A<br>Active agents (24h): {active_agents}"
|
1839 |
-
)
|
1840 |
|
1841 |
fig.add_trace(
|
1842 |
go.Scatter(
|
1843 |
-
x=
|
1844 |
y=y_values_adj_ma,
|
1845 |
mode='lines', # Only lines for moving average
|
1846 |
-
line=dict(color='green', width=
|
1847 |
-
name='
|
1848 |
hovertext=hover_data_adj,
|
1849 |
hoverinfo='text',
|
1850 |
visible=True # Visible by default
|
1851 |
)
|
1852 |
)
|
1853 |
-
logger.info(f"Added
|
1854 |
else:
|
1855 |
logger.warning("No adjusted APR moving average data available to plot")
|
1856 |
|
@@ -1942,13 +1942,13 @@ def create_combined_time_series_graph(df):
|
|
1942 |
)
|
1943 |
)
|
1944 |
|
1945 |
-
# Update y-axis with
|
1946 |
fig.update_yaxes(
|
1947 |
showgrid=True,
|
1948 |
gridwidth=1,
|
1949 |
gridcolor='rgba(0,0,0,0.1)',
|
1950 |
-
# Use
|
1951 |
-
|
1952 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
1953 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
1954 |
title=None # Remove the built-in axis title since we're using annotations
|
@@ -2519,7 +2519,7 @@ def dashboard():
|
|
2519 |
# Update visibility of traces based on toggle values
|
2520 |
for i, trace in enumerate(combined_fig.data):
|
2521 |
# Check if this is a moving average trace
|
2522 |
-
if trace.name == '
|
2523 |
trace.visible = show_apr_ma
|
2524 |
elif trace.name == 'Average ETH Adjusted APR (3d window)':
|
2525 |
trace.visible = show_adjusted_apr_ma
|
@@ -2546,7 +2546,7 @@ def dashboard():
|
|
2546 |
# Update visibility of traces based on toggle values
|
2547 |
for i, trace in enumerate(combined_fig.data):
|
2548 |
# Check if this is a moving average trace
|
2549 |
-
if trace.name == '
|
2550 |
trace.visible = show_roi_ma
|
2551 |
|
2552 |
return combined_fig
|
|
|
838 |
|
839 |
return combined_fig, csv_file
|
840 |
|
841 |
+
def aggregate_daily_data(df, metric_column):
|
842 |
+
"""
|
843 |
+
Aggregate data by date and agent, taking the mean of values within each day.
|
844 |
+
|
845 |
+
Args:
|
846 |
+
df: DataFrame with timestamp, agent_id, and metric data
|
847 |
+
metric_column: Name of the metric column ('apr' or 'roi')
|
848 |
+
|
849 |
+
Returns:
|
850 |
+
DataFrame with daily aggregated data per agent
|
851 |
+
"""
|
852 |
+
if df.empty:
|
853 |
+
return df
|
854 |
+
|
855 |
+
# Convert timestamp to date only (ignore time)
|
856 |
+
df = df.copy()
|
857 |
+
df['date'] = df['timestamp'].dt.date
|
858 |
+
|
859 |
+
# Group by date and agent, calculate mean for each day
|
860 |
+
daily_agent_data = df.groupby(['date', 'agent_id']).agg({
|
861 |
+
metric_column: 'mean',
|
862 |
+
'agent_name': 'first',
|
863 |
+
'is_dummy': 'first',
|
864 |
+
'metric_type': 'first'
|
865 |
+
}).reset_index()
|
866 |
+
|
867 |
+
# Convert date back to datetime for plotting
|
868 |
+
daily_agent_data['timestamp'] = pd.to_datetime(daily_agent_data['date'])
|
869 |
+
|
870 |
+
logger.info(f"Aggregated {len(df)} data points into {len(daily_agent_data)} daily values for {metric_column}")
|
871 |
+
|
872 |
+
return daily_agent_data
|
873 |
+
|
874 |
+
def calculate_daily_medians(daily_agent_data, metric_column):
|
875 |
+
"""
|
876 |
+
Calculate daily medians across all agents for each date.
|
877 |
+
|
878 |
+
Args:
|
879 |
+
daily_agent_data: DataFrame with daily aggregated data per agent
|
880 |
+
metric_column: Name of the metric column ('apr' or 'roi')
|
881 |
+
|
882 |
+
Returns:
|
883 |
+
DataFrame with daily median values
|
884 |
+
"""
|
885 |
+
if daily_agent_data.empty:
|
886 |
+
return daily_agent_data
|
887 |
+
|
888 |
+
# For each date, calculate median across all agents (excluding missing data)
|
889 |
+
daily_medians = daily_agent_data.groupby('date').agg({
|
890 |
+
metric_column: 'median'
|
891 |
+
}).reset_index()
|
892 |
+
|
893 |
+
# Convert date back to datetime for plotting
|
894 |
+
daily_medians['timestamp'] = pd.to_datetime(daily_medians['date'])
|
895 |
+
|
896 |
+
logger.info(f"Calculated {len(daily_medians)} daily median values for {metric_column}")
|
897 |
+
|
898 |
+
return daily_medians
|
899 |
+
|
900 |
+
def calculate_moving_average_medians(daily_medians, metric_column, window_days=7):
|
901 |
+
"""
|
902 |
+
Calculate moving average of daily medians using a specified time window.
|
903 |
+
|
904 |
+
Args:
|
905 |
+
daily_medians: DataFrame with daily median values
|
906 |
+
metric_column: Name of the metric column ('apr' or 'roi')
|
907 |
+
window_days: Number of days for the moving average window
|
908 |
+
|
909 |
+
Returns:
|
910 |
+
DataFrame with moving average values added
|
911 |
+
"""
|
912 |
+
if daily_medians.empty:
|
913 |
+
return daily_medians
|
914 |
+
|
915 |
+
# Sort by timestamp
|
916 |
+
daily_medians = daily_medians.sort_values('timestamp').copy()
|
917 |
+
|
918 |
+
# Initialize moving average column
|
919 |
+
daily_medians['moving_avg'] = None
|
920 |
+
|
921 |
+
# Define the time window
|
922 |
+
time_window = pd.Timedelta(days=window_days)
|
923 |
+
logger.info(f"Calculating {window_days}-day moving average of daily medians for {metric_column}")
|
924 |
+
|
925 |
+
# Calculate moving averages for each timestamp
|
926 |
+
for i, row in daily_medians.iterrows():
|
927 |
+
current_time = row['timestamp']
|
928 |
+
window_start = current_time - time_window
|
929 |
+
|
930 |
+
# Get all median values within the time window
|
931 |
+
window_data = daily_medians[
|
932 |
+
(daily_medians['timestamp'] >= window_start) &
|
933 |
+
(daily_medians['timestamp'] <= current_time)
|
934 |
+
]
|
935 |
+
|
936 |
+
# Calculate the average of medians for the time window
|
937 |
+
if not window_data.empty:
|
938 |
+
daily_medians.at[i, 'moving_avg'] = window_data[metric_column].mean()
|
939 |
+
else:
|
940 |
+
# If no data points in the window, use the current value
|
941 |
+
daily_medians.at[i, 'moving_avg'] = row[metric_column]
|
942 |
+
|
943 |
+
logger.info(f"Calculated {window_days}-day moving averages with {len(daily_medians)} points")
|
944 |
+
|
945 |
+
return daily_medians
|
946 |
+
|
947 |
def create_combined_roi_time_series_graph(df):
|
948 |
+
"""Create a time series graph showing daily median ROI values with 7-day moving average"""
|
949 |
if len(df) == 0:
|
950 |
logger.error("No data to plot combined ROI graph")
|
951 |
fig = go.Figure()
|
|
|
1059 |
# Use the filtered data for all subsequent operations
|
1060 |
df = df_filtered
|
1061 |
|
1062 |
+
# NEW APPROACH: Daily aggregation and median calculation
|
1063 |
+
# Step 1: Aggregate data daily per agent (mean of values within each day)
|
1064 |
+
daily_agent_data = aggregate_daily_data(df, 'roi')
|
1065 |
|
1066 |
+
# Step 2: Calculate daily medians across all agents
|
1067 |
+
daily_medians = calculate_daily_medians(daily_agent_data, 'roi')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1068 |
|
1069 |
+
# Step 3: Calculate 7-day moving average of daily medians
|
1070 |
+
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'roi', window_days=7)
|
|
|
1071 |
|
1072 |
+
logger.info(f"NEW APPROACH: Processed {len(df)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1073 |
|
1074 |
# Find the last date where we have valid moving average data
|
1075 |
+
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
|
1076 |
|
1077 |
# If we don't have any valid moving average data, use the max time from the original data
|
1078 |
last_valid_date = last_valid_ma_date if last_valid_ma_date is not None else df['timestamp'].max()
|
|
|
1080 |
logger.info(f"Last valid moving average date: {last_valid_ma_date}")
|
1081 |
logger.info(f"Using last valid date for graph: {last_valid_date}")
|
1082 |
|
1083 |
+
# Plot individual agent daily data points with agent names in hover, but limit display for scalability
|
1084 |
+
if not daily_agent_data.empty:
|
1085 |
# Group by agent to use different colors for each agent
|
1086 |
+
unique_agents = daily_agent_data['agent_name'].unique()
|
1087 |
colors = px.colors.qualitative.Plotly[:len(unique_agents)]
|
1088 |
|
1089 |
# Create a color map for agents
|
1090 |
color_map = {agent: colors[i % len(colors)] for i, agent in enumerate(unique_agents)}
|
1091 |
|
1092 |
# Calculate the total number of data points per agent to determine which are most active
|
1093 |
+
agent_counts = daily_agent_data['agent_name'].value_counts()
|
1094 |
|
1095 |
# Determine how many agents to show individually (limit to top 5 most active)
|
1096 |
MAX_VISIBLE_AGENTS = 5
|
|
|
1098 |
|
1099 |
logger.info(f"Showing {len(top_agents)} agents by default out of {len(unique_agents)} total agents")
|
1100 |
|
1101 |
+
# Add daily aggregated data points for each agent, but only make top agents visible by default
|
1102 |
for agent_name in unique_agents:
|
1103 |
+
agent_data = daily_agent_data[daily_agent_data['agent_name'] == agent_name]
|
1104 |
|
1105 |
# Explicitly convert to Python lists
|
1106 |
x_values = agent_data['timestamp'].tolist()
|
|
|
1121 |
size=10,
|
1122 |
line=dict(width=1, color='black')
|
1123 |
),
|
1124 |
+
name=f'Agent: {agent_name} (Daily ROI)',
|
1125 |
+
hovertemplate='Time: %{x}<br>Daily ROI: %{y:.2f}%<br>Agent: ' + agent_name + '<extra></extra>',
|
1126 |
visible=is_visible # All agents hidden by default
|
1127 |
)
|
1128 |
)
|
1129 |
+
logger.info(f"Added daily ROI data points for agent {agent_name} with {len(x_values)} points (visible: {is_visible})")
|
1130 |
|
1131 |
+
# Add ROI 7-day moving average of daily medians as a smooth line
|
1132 |
+
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
|
1133 |
+
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
|
1134 |
|
1135 |
# Create hover template for the ROI moving average line
|
1136 |
hover_data_roi = []
|
1137 |
+
for idx, row in daily_medians_with_ma.iterrows():
|
1138 |
timestamp = row['timestamp']
|
1139 |
+
# Format timestamp to show only date for daily data
|
1140 |
+
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
|
1141 |
|
1142 |
+
# Calculate number of active agents on this date
|
1143 |
+
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
|
|
|
|
|
1144 |
|
1145 |
hover_data_roi.append(
|
1146 |
+
f"Date: {formatted_timestamp}<br>Median ROI (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
|
1147 |
)
|
1148 |
|
1149 |
fig.add_trace(
|
|
|
1151 |
x=x_values_ma,
|
1152 |
y=y_values_ma,
|
1153 |
mode='lines', # Only lines for moving average
|
1154 |
+
line=dict(color='blue', width=3), # Thicker line for main trend
|
1155 |
+
name='Median ROI (7d window)',
|
1156 |
hovertext=hover_data_roi,
|
1157 |
hoverinfo='text',
|
1158 |
visible=True # Visible by default
|
1159 |
)
|
1160 |
)
|
1161 |
+
logger.info(f"Added 7-day moving average of daily median ROI trace with {len(x_values_ma)} points")
|
1162 |
|
1163 |
# Update layout with average runtime information in the title
|
1164 |
fig.update_layout(
|
|
|
1219 |
)
|
1220 |
)
|
1221 |
|
1222 |
+
# Update y-axis with autoscaling for ROI
|
1223 |
fig.update_yaxes(
|
1224 |
showgrid=True,
|
1225 |
gridwidth=1,
|
1226 |
gridcolor='rgba(0,0,0,0.1)',
|
1227 |
+
# Use autoscaling instead of fixed range
|
1228 |
+
autorange=True, # Enable autoscaling to fit the data
|
1229 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
1230 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
1231 |
title=None # Remove the built-in axis title since we're using annotations
|
|
|
1661 |
# Use the filtered data for all subsequent operations
|
1662 |
apr_data = apr_data_filtered
|
1663 |
|
1664 |
+
# NEW APPROACH: Daily aggregation and median calculation for APR
|
1665 |
+
# Step 1: Aggregate data daily per agent (mean of values within each day)
|
1666 |
+
daily_agent_data = aggregate_daily_data(apr_data, 'apr')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1667 |
|
1668 |
+
# Step 2: Calculate daily medians across all agents
|
1669 |
+
daily_medians = calculate_daily_medians(daily_agent_data, 'apr')
|
|
|
1670 |
|
1671 |
+
# Step 3: Calculate 7-day moving average of daily medians
|
1672 |
+
daily_medians_with_ma = calculate_moving_average_medians(daily_medians, 'apr', window_days=7)
|
|
|
1673 |
|
1674 |
+
# Also handle adjusted APR if it exists
|
1675 |
+
daily_medians_adjusted = None
|
1676 |
+
daily_medians_adjusted_with_ma = None
|
1677 |
|
1678 |
+
if 'adjusted_apr' in apr_data.columns and apr_data['adjusted_apr'].notna().any():
|
1679 |
+
# Create a separate dataset for adjusted APR
|
1680 |
+
apr_data_with_adjusted = apr_data[apr_data['adjusted_apr'].notna()].copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1681 |
|
1682 |
+
if not apr_data_with_adjusted.empty:
|
1683 |
+
# Step 1: Aggregate adjusted APR data daily per agent
|
1684 |
+
daily_agent_data_adjusted = aggregate_daily_data(apr_data_with_adjusted, 'adjusted_apr')
|
1685 |
+
|
1686 |
+
# Step 2: Calculate daily medians for adjusted APR
|
1687 |
+
daily_medians_adjusted = calculate_daily_medians(daily_agent_data_adjusted, 'adjusted_apr')
|
1688 |
+
|
1689 |
+
# Step 3: Calculate 7-day moving average of daily medians for adjusted APR
|
1690 |
+
daily_medians_adjusted_with_ma = calculate_moving_average_medians(daily_medians_adjusted, 'adjusted_apr', window_days=7)
|
1691 |
+
|
1692 |
+
logger.info(f"NEW APPROACH APR: Processed {len(apr_data)} raw points → {len(daily_agent_data)} daily agent values → {len(daily_medians)} daily medians")
|
1693 |
+
if daily_medians_adjusted is not None:
|
1694 |
+
logger.info(f"NEW APPROACH Adjusted APR: Processed adjusted APR data → {len(daily_medians_adjusted)} daily medians")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1695 |
|
1696 |
+
# This old moving average calculation is no longer needed with the new daily median approach
|
1697 |
|
1698 |
# Find the last date where we have valid moving average data
|
1699 |
+
last_valid_ma_date = daily_medians_with_ma[daily_medians_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_with_ma['moving_avg'].dropna().empty else None
|
1700 |
|
1701 |
# Find the last date where we have valid adjusted moving average data
|
1702 |
last_valid_adj_ma_date = None
|
1703 |
+
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
|
1704 |
+
last_valid_adj_ma_date = daily_medians_adjusted_with_ma[daily_medians_adjusted_with_ma['moving_avg'].notna()]['timestamp'].max() if not daily_medians_adjusted_with_ma['moving_avg'].dropna().empty else None
|
1705 |
|
1706 |
# Determine the last valid date for either moving average
|
1707 |
last_valid_date = last_valid_ma_date
|
|
|
1787 |
)
|
1788 |
logger.info(f"Added Adjusted APR data points for agent {agent_name} with {len(x_values_adj)} points (visible: {is_visible})")
|
1789 |
|
1790 |
+
# Add APR 7-day moving average of daily medians as a smooth line
|
1791 |
+
x_values_ma = daily_medians_with_ma['timestamp'].tolist()
|
1792 |
+
y_values_ma = daily_medians_with_ma['moving_avg'].tolist()
|
1793 |
|
1794 |
# Create hover template for the APR moving average line
|
1795 |
hover_data_apr = []
|
1796 |
+
for idx, row in daily_medians_with_ma.iterrows():
|
1797 |
timestamp = row['timestamp']
|
1798 |
+
# Format timestamp to show only date for daily data
|
1799 |
+
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
|
1800 |
+
|
1801 |
+
# Calculate number of active agents on this date
|
1802 |
+
active_agents = len(daily_agent_data[daily_agent_data['timestamp'] == timestamp]['agent_id'].unique())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1803 |
|
1804 |
hover_data_apr.append(
|
1805 |
+
f"Date: {formatted_timestamp}<br>Median APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
|
1806 |
)
|
1807 |
|
1808 |
fig.add_trace(
|
|
|
1810 |
x=x_values_ma,
|
1811 |
y=y_values_ma,
|
1812 |
mode='lines', # Only lines for moving average
|
1813 |
+
line=dict(color='red', width=3, shape='spline', smoothing=1.3), # Smooth curved line
|
1814 |
+
name='Median APR (7d window)',
|
1815 |
hovertext=hover_data_apr,
|
1816 |
hoverinfo='text',
|
1817 |
visible=True # Visible by default
|
1818 |
)
|
1819 |
)
|
1820 |
+
logger.info(f"Added 7-day moving average of daily median APR trace with {len(x_values_ma)} points")
|
1821 |
|
1822 |
+
# Add adjusted APR 7-day moving average line if it exists
|
1823 |
+
if daily_medians_adjusted_with_ma is not None and not daily_medians_adjusted_with_ma.empty:
|
1824 |
+
x_values_adj_ma = daily_medians_adjusted_with_ma['timestamp'].tolist()
|
1825 |
+
y_values_adj_ma = daily_medians_adjusted_with_ma['moving_avg'].tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
1826 |
|
1827 |
# Create hover template for the adjusted APR moving average line
|
1828 |
hover_data_adj = []
|
1829 |
+
for idx, row in daily_medians_adjusted_with_ma.iterrows():
|
1830 |
timestamp = row['timestamp']
|
1831 |
+
# Format timestamp to show only date for daily data
|
1832 |
+
formatted_timestamp = timestamp.strftime('%Y-%m-%d')
|
1833 |
|
1834 |
+
# Calculate number of active agents on this date
|
1835 |
+
active_agents = len(daily_agent_data_adjusted[daily_agent_data_adjusted['timestamp'] == timestamp]['agent_id'].unique()) if 'daily_agent_data_adjusted' in locals() else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1836 |
|
1837 |
+
hover_data_adj.append(
|
1838 |
+
f"Date: {formatted_timestamp}<br>Median Adjusted APR (7d window): {row['moving_avg']:.2f}%<br>Active agents: {active_agents}"
|
1839 |
+
)
|
|
|
|
|
|
|
|
|
|
|
1840 |
|
1841 |
fig.add_trace(
|
1842 |
go.Scatter(
|
1843 |
+
x=x_values_adj_ma,
|
1844 |
y=y_values_adj_ma,
|
1845 |
mode='lines', # Only lines for moving average
|
1846 |
+
line=dict(color='green', width=3, shape='spline', smoothing=1.3), # Smooth curved line
|
1847 |
+
name='Median Adjusted APR (7d window)',
|
1848 |
hovertext=hover_data_adj,
|
1849 |
hoverinfo='text',
|
1850 |
visible=True # Visible by default
|
1851 |
)
|
1852 |
)
|
1853 |
+
logger.info(f"Added 7-day moving average of daily median Adjusted APR trace with {len(x_values_adj_ma)} points")
|
1854 |
else:
|
1855 |
logger.warning("No adjusted APR moving average data available to plot")
|
1856 |
|
|
|
1942 |
)
|
1943 |
)
|
1944 |
|
1945 |
+
# Update y-axis with autoscaling
|
1946 |
fig.update_yaxes(
|
1947 |
showgrid=True,
|
1948 |
gridwidth=1,
|
1949 |
gridcolor='rgba(0,0,0,0.1)',
|
1950 |
+
# Use autoscaling instead of fixed range
|
1951 |
+
autorange=True, # Enable autoscaling to fit the data
|
1952 |
tickformat=".2f", # Format tick labels with 2 decimal places
|
1953 |
tickfont=dict(size=14, family="Arial, sans-serif", color="black", weight="bold"), # Adjusted font size
|
1954 |
title=None # Remove the built-in axis title since we're using annotations
|
|
|
2519 |
# Update visibility of traces based on toggle values
|
2520 |
for i, trace in enumerate(combined_fig.data):
|
2521 |
# Check if this is a moving average trace
|
2522 |
+
if trace.name == 'Median APR (7d window)':
|
2523 |
trace.visible = show_apr_ma
|
2524 |
elif trace.name == 'Average ETH Adjusted APR (3d window)':
|
2525 |
trace.visible = show_adjusted_apr_ma
|
|
|
2546 |
# Update visibility of traces based on toggle values
|
2547 |
for i, trace in enumerate(combined_fig.data):
|
2548 |
# Check if this is a moving average trace
|
2549 |
+
if trace.name == 'Median ROI (7d window)':
|
2550 |
trace.visible = show_roi_ma
|
2551 |
|
2552 |
return combined_fig
|