#!/usr/bin/env python3 """ Fixes Demonstration Demonstrate the fixes applied to the economic analysis pipeline """ import pandas as pd import numpy as np from datetime import datetime, timedelta def create_test_data(): """Create test data to demonstrate fixes""" # Create date range dates = pd.date_range('2020-01-01', '2024-12-31', freq='Q') # Test data with the issues data = { 'GDPC1': [22000, 22100, 22200, 22300, 22400, 22500, 22600, 22700, 22800, 22900, 23000, 23100, 23200, 23300, 23400, 23500, 23600, 23700, 23800, 23900], # Billions 'CPIAUCSL': [258.0, 258.5, 259.0, 259.5, 260.0, 260.5, 261.0, 261.5, 262.0, 262.5, 263.0, 263.5, 264.0, 264.5, 265.0, 265.5, 266.0, 266.5, 267.0, 267.5], # Index 'INDPRO': [100.0, 100.5, 101.0, 101.5, 102.0, 102.5, 103.0, 103.5, 104.0, 104.5, 105.0, 105.5, 106.0, 106.5, 107.0, 107.5, 108.0, 108.5, 109.0, 109.5], # Index 'RSAFS': [500000, 502000, 504000, 506000, 508000, 510000, 512000, 514000, 516000, 518000, 520000, 522000, 524000, 526000, 528000, 530000, 532000, 534000, 536000, 538000], # Millions 'FEDFUNDS': [0.08, 0.09, 0.10, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.20, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27], # Decimal form 'DGS10': [1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4] # Decimal form } df = pd.DataFrame(data, index=dates) return df def demonstrate_fixes(): """Demonstrate the fixes applied""" print("=== ECONOMIC ANALYSIS FIXES DEMONSTRATION ===\n") # Create test data raw_data = create_test_data() print("1. ORIGINAL DATA (with issues):") print(raw_data.tail()) print() print("2. APPLYING FIXES:") print() # Fix 1: Unit Normalization print("FIX 1: Unit Normalization") print("-" * 30) normalized_data = raw_data.copy() # Apply unit fixes normalized_data['GDPC1'] = raw_data['GDPC1'] / 1000 # Billions to trillions normalized_data['RSAFS'] = raw_data['RSAFS'] / 1000 # Millions to billions normalized_data['FEDFUNDS'] = raw_data['FEDFUNDS'] * 100 # Decimal to percentage normalized_data['DGS10'] = raw_data['DGS10'] * 100 # Decimal to percentage print("After unit normalization:") print(normalized_data.tail()) print() # Fix 2: Growth Rate Calculation print("FIX 2: Proper Growth Rate Calculation") print("-" * 40) growth_data = normalized_data.pct_change() * 100 growth_data = growth_data.dropna() print("Growth rates (percent change):") print(growth_data.tail()) print() # Fix 3: Safe MAPE Calculation print("FIX 3: Safe MAPE Calculation") print("-" * 30) # Test MAPE with problematic data actual_problematic = np.array([0.1, 0.2, 0.3, 0.4, 0.5]) forecast_problematic = np.array([0.15, 0.25, 0.35, 0.45, 0.55]) # Original MAPE (can fail) try: original_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / actual_problematic)) * 100 print(f"Original MAPE: {original_mape:.2f}%") except: print("Original MAPE: ERROR (division by zero)") # Fixed MAPE denominator = np.maximum(np.abs(actual_problematic), 1e-5) fixed_mape = np.mean(np.abs((actual_problematic - forecast_problematic) / denominator)) * 100 print(f"Fixed MAPE: {fixed_mape:.2f}%") print() # Fix 4: Forecast Period Scaling print("FIX 4: Forecast Period Scaling") print("-" * 35) base_periods = 4 freq_scaling = {'D': 90, 'M': 3, 'Q': 1} print("Original forecast_periods = 4") print("Scaled by frequency:") for freq, scale in freq_scaling.items(): scaled = base_periods * scale print(f" {freq} (daily): {base_periods} -> {scaled} periods") print() # Fix 5: Correlation Analysis with Normalized Data print("FIX 5: Correlation Analysis with Normalized Data") print("-" * 50) # Original correlation (dominated by scale) original_corr = raw_data.corr() print("Original correlation (scale-dominated):") print(original_corr.round(3)) print() # Fixed correlation (normalized) fixed_corr = growth_data.corr() print("Fixed correlation (normalized growth rates):") print(fixed_corr.round(3)) print() # Fix 6: Data Quality Metrics print("FIX 6: Enhanced Data Quality Metrics") print("-" * 40) # Calculate comprehensive quality metrics quality_metrics = {} for column in growth_data.columns: series = growth_data[column].dropna() quality_metrics[column] = { 'mean': series.mean(), 'std': series.std(), 'skewness': series.skew(), 'kurtosis': series.kurtosis(), 'missing_pct': (growth_data[column].isna().sum() / len(growth_data)) * 100 } print("Quality metrics for growth rates:") for col, metrics in quality_metrics.items(): print(f" {col}:") print(f" Mean: {metrics['mean']:.4f}%") print(f" Std: {metrics['std']:.4f}%") print(f" Skewness: {metrics['skewness']:.4f}") print(f" Kurtosis: {metrics['kurtosis']:.4f}") print(f" Missing: {metrics['missing_pct']:.1f}%") print() # Summary of fixes print("=== SUMMARY OF FIXES APPLIED ===") print() fixes = [ "1. Unit Normalization:", " • GDP: billions → trillions", " • Retail Sales: millions → billions", " • Interest Rates: decimal → percentage", "", "2. Growth Rate Calculation:", " • Explicit percent change calculation", " • Proper interpretation of results", "", "3. Safe MAPE Calculation:", " • Added epsilon to prevent division by zero", " • More robust error metrics", "", "4. Forecast Period Scaling:", " • Scale periods by data frequency", " • Appropriate horizons for different series", "", "5. Data Normalization:", " • Z-score or growth rate normalization", " • Prevents scale bias in correlations", "", "6. Stationarity Enforcement:", " • ADF tests before causality analysis", " • Differencing for non-stationary series", "", "7. Enhanced Error Handling:", " • Robust missing data handling", " • Graceful failure recovery", "" ] for fix in fixes: print(fix) print("=== IMPACT OF FIXES ===") print() impacts = [ "• More accurate economic interpretations", "• Proper scale comparisons between indicators", "• Robust forecasting with appropriate horizons", "• Reliable statistical tests and correlations", "• Better error handling and data quality", "• Consistent frequency alignment", "• Safe mathematical operations" ] for impact in impacts: print(impact) print() print("These fixes address all the major math issues identified in the original analysis.") if __name__ == "__main__": demonstrate_fixes()