Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
import random | |
from datetime import datetime, timedelta | |
import logging | |
# Get the logger | |
logger = logging.getLogger(__name__) | |
def generate_continuous_random_data(existing_data, end_time=None): | |
""" | |
Generate authentic-looking random data that continues from existing data | |
with adjusted APR following APR with a small offset | |
Args: | |
existing_data: DataFrame containing the existing data | |
end_time: Optional end time (defaults to current time) | |
Returns: | |
DataFrame with dummy data points | |
""" | |
# Use current time if not specified | |
if end_time is None: | |
end_time = datetime.now() | |
# Find the latest timestamp in the existing data | |
if not existing_data.empty: | |
start_time = existing_data['timestamp'].max() + timedelta(minutes=10) | |
else: | |
# If no existing data, start from 30 days ago | |
start_time = end_time - timedelta(days=30) | |
# Generate timestamps with 10-minute intervals | |
timestamps = [] | |
current = start_time | |
while current <= end_time: | |
timestamps.append(current) | |
current += timedelta(minutes=10) | |
if not timestamps: | |
return pd.DataFrame() # No new data needed | |
# Get unique agents from existing data | |
if not existing_data.empty: | |
unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records') | |
else: | |
# Create one dummy agent if no existing data | |
unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}] | |
dummy_data_list = [] | |
# For each agent, create continuous dummy data | |
for agent in unique_agents: | |
agent_id = agent['agent_id'] | |
# Get the last real values for this agent to ensure continuity | |
last_apr = None | |
last_adjusted_apr = None | |
last_roi = None | |
if not existing_data.empty: | |
# Get last APR value | |
agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) & | |
(existing_data['metric_type'] == 'APR')] | |
if not agent_apr_data.empty: | |
last_apr = agent_apr_data['apr'].iloc[-1] | |
last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1] | |
# Get last ROI value | |
agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) & | |
(existing_data['metric_type'] == 'ROI')] | |
if not agent_roi_data.empty: | |
last_roi = agent_roi_data['roi'].iloc[-1] | |
# If no last values, start with reasonable values in our range | |
if last_apr is None or pd.isna(last_apr): | |
last_apr = random.uniform(-0.1, 0.1) # Start close to zero | |
if last_adjusted_apr is None or pd.isna(last_adjusted_apr): | |
# If we have APR but no adjusted APR, make it slightly different than APR | |
# Sometimes higher, sometimes lower to look more natural | |
if random.random() > 0.5: | |
last_adjusted_apr = last_apr + random.uniform(0.05, 0.15) | |
else: | |
last_adjusted_apr = last_apr - random.uniform(0.05, 0.15) | |
last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr)) | |
if last_roi is None or pd.isna(last_roi): | |
last_roi = random.uniform(-0.1, 0.1) # Start close to zero | |
# Generate APR values using smoother random walk | |
apr_values = [last_apr] | |
# Create a more natural pattern with some trends | |
# Define a few trend periods to make it look more authentic | |
num_points = len(timestamps) | |
trend_periods = [] | |
# Create 3-5 trend periods | |
num_trends = random.randint(3, 5) | |
period_length = num_points // num_trends | |
for i in range(num_trends): | |
# Each trend has a direction (up, down, or sideways) | |
# and a strength (how strong the trend is) | |
direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up | |
strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look | |
start_idx = i * period_length | |
end_idx = min((i + 1) * period_length, num_points) | |
trend_periods.append({ | |
'start': start_idx, | |
'end': end_idx, | |
'direction': direction, | |
'strength': strength | |
}) | |
# Generate values following the trends | |
for i in range(1, num_points): | |
# Find which trend period we're in | |
current_trend = None | |
for trend in trend_periods: | |
if trend['start'] <= i < trend['end']: | |
current_trend = trend | |
break | |
# If we couldn't find a trend (shouldn't happen), use a neutral trend | |
if current_trend is None: | |
current_trend = {'direction': 0, 'strength': 0.01} | |
# Base change is influenced by the trend | |
base_change = current_trend['direction'] * current_trend['strength'] | |
# Add some randomness | |
random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness | |
# Previous momentum (30% influence to make it smoother) | |
prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2] | |
momentum = 0.3 * prev_change | |
# Combine all factors | |
total_change = base_change + random_change + momentum | |
# Apply the change | |
new_value = apr_values[i-1] + total_change | |
# Keep within reasonable bounds (-0.5 to 1.0) | |
new_value = max(-0.5, min(1.0, new_value)) | |
apr_values.append(new_value) | |
# Generate adjusted APR values that follow APR with a small, varying offset | |
adjusted_apr_values = [] | |
for i, apr_value in enumerate(apr_values): | |
# Make adjusted APR follow APR but with a small, varying offset | |
# Sometimes higher, sometimes lower to look more natural | |
if i % 5 == 0: # Periodically recalculate the offset direction | |
offset_direction = 1 if random.random() > 0.5 else -1 | |
offset = offset_direction * random.uniform(0.05, 0.15) | |
adjusted_value = apr_value + offset | |
# Keep within reasonable bounds (-0.5 to 1.0) | |
adjusted_value = max(-0.5, min(1.0, adjusted_value)) | |
adjusted_apr_values.append(adjusted_value) | |
# Generate ROI values with a completely different approach to ensure better distribution | |
# Note: ROI values will be multiplied by 100 in app.py, so we need to generate values | |
# between -0.01 and 0 to get final values between -1 and 0 | |
# Instead of building on the last_roi value, we'll generate a completely new sequence | |
# that's well-distributed between -0.01 and 0 | |
# First, create a sequence of target values that we want to hit | |
# This ensures we get good coverage of the entire range | |
target_points = [] | |
for i in range(5): # Create 5 target points | |
# Distribute targets across the range, but avoid exactly 0 | |
target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025 | |
target_points.append(target) | |
# Shuffle the targets to make the pattern less predictable | |
random.shuffle(target_points) | |
# Divide the total points into segments, one for each target | |
segment_length = num_points // len(target_points) | |
# Generate the ROI values | |
roi_values = [] | |
# Start with the last real value, or a random value in our range if none exists | |
if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0: | |
# If no valid last value, start in the middle of our range | |
current_value = -0.005 | |
else: | |
current_value = last_roi | |
roi_values.append(current_value) | |
# For each segment, gradually move toward the target value | |
for segment_idx, target in enumerate(target_points): | |
start_idx = segment_idx * segment_length | |
end_idx = min((segment_idx + 1) * segment_length, num_points) | |
# How many steps we have to reach the target | |
steps = end_idx - start_idx | |
if steps <= 0: | |
continue # Skip if this segment has no points | |
# Current value is the last value in roi_values | |
current_value = roi_values[-1] | |
# Calculate how much to change per step to reach the target | |
step_change = (target - current_value) / steps | |
# Generate values for this segment | |
for step in range(steps): | |
# Base change to move toward target | |
base_change = step_change | |
# Add some randomness, but make sure we're still generally moving toward the target | |
random_factor = random.uniform(-0.0005, 0.0005) | |
# Calculate new value | |
new_value = current_value + base_change + random_factor | |
# Ensure we stay within range | |
new_value = max(-0.01, min(0, new_value)) | |
roi_values.append(new_value) | |
current_value = new_value | |
# If we didn't generate enough points, add more | |
while len(roi_values) < num_points + 1: | |
# Add a point with small random variation from the last point | |
last_value = roi_values[-1] | |
new_value = last_value + random.uniform(-0.001, 0.001) | |
new_value = max(-0.01, min(0, new_value)) | |
roi_values.append(new_value) | |
# If we generated too many points, trim the list | |
roi_values = roi_values[:num_points + 1] | |
# Create dummy data points | |
for i, timestamp in enumerate(timestamps): | |
# APR data | |
dummy_apr = { | |
'timestamp': timestamp, | |
'apr': apr_values[i], | |
'adjusted_apr': adjusted_apr_values[i], | |
'roi': None, | |
'agent_id': agent_id, | |
'agent_name': agent['agent_name'], | |
'is_dummy': True, | |
'metric_type': 'APR' | |
} | |
dummy_data_list.append(dummy_apr) | |
# ROI data | |
dummy_roi = { | |
'timestamp': timestamp, | |
'apr': None, | |
'adjusted_apr': None, | |
'roi': roi_values[i], | |
'agent_id': agent_id, | |
'agent_name': agent['agent_name'], | |
'is_dummy': True, | |
'metric_type': 'ROI' | |
} | |
dummy_data_list.append(dummy_roi) | |
return pd.DataFrame(dummy_data_list) | |