import pandas as pd import numpy as np import random from datetime import datetime, timedelta import logging # Get the logger logger = logging.getLogger(__name__) def generate_continuous_random_data(existing_data, end_time=None): """ Generate authentic-looking random data that continues from existing data with adjusted APR following APR with a small offset Args: existing_data: DataFrame containing the existing data end_time: Optional end time (defaults to current time) Returns: DataFrame with dummy data points """ # Use current time if not specified if end_time is None: end_time = datetime.now() # Find the latest timestamp in the existing data if not existing_data.empty: start_time = existing_data['timestamp'].max() + timedelta(minutes=10) else: # If no existing data, start from 30 days ago start_time = end_time - timedelta(days=30) # Generate timestamps with 10-minute intervals timestamps = [] current = start_time while current <= end_time: timestamps.append(current) current += timedelta(minutes=10) if not timestamps: return pd.DataFrame() # No new data needed # Get unique agents from existing data if not existing_data.empty: unique_agents = existing_data[['agent_id', 'agent_name']].drop_duplicates().to_dict('records') else: # Create one dummy agent if no existing data unique_agents = [{'agent_id': 'dummy_agent', 'agent_name': 'Dummy Agent'}] dummy_data_list = [] # For each agent, create continuous dummy data for agent in unique_agents: agent_id = agent['agent_id'] # Get the last real values for this agent to ensure continuity last_apr = None last_adjusted_apr = None last_roi = None if not existing_data.empty: # Get last APR value agent_apr_data = existing_data[(existing_data['agent_id'] == agent_id) & (existing_data['metric_type'] == 'APR')] if not agent_apr_data.empty: last_apr = agent_apr_data['apr'].iloc[-1] last_adjusted_apr = agent_apr_data['adjusted_apr'].iloc[-1] # Get last ROI value agent_roi_data = existing_data[(existing_data['agent_id'] == agent_id) & (existing_data['metric_type'] == 'ROI')] if not agent_roi_data.empty: last_roi = agent_roi_data['roi'].iloc[-1] # If no last values, start with reasonable values in our range if last_apr is None or pd.isna(last_apr): last_apr = random.uniform(-0.1, 0.1) # Start close to zero if last_adjusted_apr is None or pd.isna(last_adjusted_apr): # If we have APR but no adjusted APR, make it slightly different than APR # Sometimes higher, sometimes lower to look more natural if random.random() > 0.5: last_adjusted_apr = last_apr + random.uniform(0.05, 0.15) else: last_adjusted_apr = last_apr - random.uniform(0.05, 0.15) last_adjusted_apr = max(-0.5, min(1.0, last_adjusted_apr)) if last_roi is None or pd.isna(last_roi): last_roi = random.uniform(-0.1, 0.1) # Start close to zero # Generate APR values using smoother random walk apr_values = [last_apr] # Create a more natural pattern with some trends # Define a few trend periods to make it look more authentic num_points = len(timestamps) trend_periods = [] # Create 3-5 trend periods num_trends = random.randint(3, 5) period_length = num_points // num_trends for i in range(num_trends): # Each trend has a direction (up, down, or sideways) # and a strength (how strong the trend is) direction = random.choice([-1, 0, 1]) # -1: down, 0: sideways, 1: up strength = random.uniform(0.01, 0.03) # Smaller changes for more natural look start_idx = i * period_length end_idx = min((i + 1) * period_length, num_points) trend_periods.append({ 'start': start_idx, 'end': end_idx, 'direction': direction, 'strength': strength }) # Generate values following the trends for i in range(1, num_points): # Find which trend period we're in current_trend = None for trend in trend_periods: if trend['start'] <= i < trend['end']: current_trend = trend break # If we couldn't find a trend (shouldn't happen), use a neutral trend if current_trend is None: current_trend = {'direction': 0, 'strength': 0.01} # Base change is influenced by the trend base_change = current_trend['direction'] * current_trend['strength'] # Add some randomness random_change = random.normalvariate(0, 0.01) # Normal distribution for more natural randomness # Previous momentum (30% influence to make it smoother) prev_change = 0 if i == 1 else apr_values[i-1] - apr_values[i-2] momentum = 0.3 * prev_change # Combine all factors total_change = base_change + random_change + momentum # Apply the change new_value = apr_values[i-1] + total_change # Keep within reasonable bounds (-0.5 to 1.0) new_value = max(-0.5, min(1.0, new_value)) apr_values.append(new_value) # Generate adjusted APR values that follow APR with a small, varying offset adjusted_apr_values = [] for i, apr_value in enumerate(apr_values): # Make adjusted APR follow APR but with a small, varying offset # Sometimes higher, sometimes lower to look more natural if i % 5 == 0: # Periodically recalculate the offset direction offset_direction = 1 if random.random() > 0.5 else -1 offset = offset_direction * random.uniform(0.05, 0.15) adjusted_value = apr_value + offset # Keep within reasonable bounds (-0.5 to 1.0) adjusted_value = max(-0.5, min(1.0, adjusted_value)) adjusted_apr_values.append(adjusted_value) # Generate ROI values with a completely different approach to ensure better distribution # Note: ROI values will be multiplied by 100 in app.py, so we need to generate values # between -0.01 and 0 to get final values between -1 and 0 # Instead of building on the last_roi value, we'll generate a completely new sequence # that's well-distributed between -0.01 and 0 # First, create a sequence of target values that we want to hit # This ensures we get good coverage of the entire range target_points = [] for i in range(5): # Create 5 target points # Distribute targets across the range, but avoid exactly 0 target = -0.01 + (i * 0.0025) # Values from -0.01 to -0.0025 target_points.append(target) # Shuffle the targets to make the pattern less predictable random.shuffle(target_points) # Divide the total points into segments, one for each target segment_length = num_points // len(target_points) # Generate the ROI values roi_values = [] # Start with the last real value, or a random value in our range if none exists if last_roi is None or pd.isna(last_roi) or last_roi < -0.01 or last_roi > 0: # If no valid last value, start in the middle of our range current_value = -0.005 else: current_value = last_roi roi_values.append(current_value) # For each segment, gradually move toward the target value for segment_idx, target in enumerate(target_points): start_idx = segment_idx * segment_length end_idx = min((segment_idx + 1) * segment_length, num_points) # How many steps we have to reach the target steps = end_idx - start_idx if steps <= 0: continue # Skip if this segment has no points # Current value is the last value in roi_values current_value = roi_values[-1] # Calculate how much to change per step to reach the target step_change = (target - current_value) / steps # Generate values for this segment for step in range(steps): # Base change to move toward target base_change = step_change # Add some randomness, but make sure we're still generally moving toward the target random_factor = random.uniform(-0.0005, 0.0005) # Calculate new value new_value = current_value + base_change + random_factor # Ensure we stay within range new_value = max(-0.01, min(0, new_value)) roi_values.append(new_value) current_value = new_value # If we didn't generate enough points, add more while len(roi_values) < num_points + 1: # Add a point with small random variation from the last point last_value = roi_values[-1] new_value = last_value + random.uniform(-0.001, 0.001) new_value = max(-0.01, min(0, new_value)) roi_values.append(new_value) # If we generated too many points, trim the list roi_values = roi_values[:num_points + 1] # Create dummy data points for i, timestamp in enumerate(timestamps): # APR data dummy_apr = { 'timestamp': timestamp, 'apr': apr_values[i], 'adjusted_apr': adjusted_apr_values[i], 'roi': None, 'agent_id': agent_id, 'agent_name': agent['agent_name'], 'is_dummy': True, 'metric_type': 'APR' } dummy_data_list.append(dummy_apr) # ROI data dummy_roi = { 'timestamp': timestamp, 'apr': None, 'adjusted_apr': None, 'roi': roi_values[i], 'agent_id': agent_id, 'agent_name': agent['agent_name'], 'is_dummy': True, 'metric_type': 'ROI' } dummy_data_list.append(dummy_roi) return pd.DataFrame(dummy_data_list)