|
|
|
""" |
|
Run Time Series Analysis: Decomposition, ARIMA forecasting, plots |
|
""" |
|
import os |
|
import sys |
|
import glob |
|
sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'src')) |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from statsmodels.tsa.seasonal import seasonal_decompose |
|
from statsmodels.tsa.arima.model import ARIMA |
|
|
|
def find_latest_data(): |
|
data_files = glob.glob('data/processed/fred_data_*.csv') |
|
if not data_files: |
|
raise FileNotFoundError("No FRED data files found. Run the pipeline first.") |
|
return max(data_files, key=os.path.getctime) |
|
|
|
def main(): |
|
print("="*60) |
|
print("FRED Time Series Analysis: Decomposition & ARIMA Forecasting") |
|
print("="*60) |
|
data_file = find_latest_data() |
|
print(f"Using data file: {data_file}") |
|
df = pd.read_csv(data_file, index_col=0, parse_dates=True) |
|
target_var = 'GDP' |
|
if target_var not in df.columns: |
|
print(f"Target variable '{target_var}' not found in data.") |
|
return |
|
ts_data = df[target_var].dropna() |
|
if len(ts_data) < 50: |
|
print("Insufficient data for time series analysis (need at least 50 points). Skipping.") |
|
return |
|
print(f"Time series length: {len(ts_data)} observations") |
|
print(f"Date range: {ts_data.index.min()} to {ts_data.index.max()}") |
|
|
|
try: |
|
if ts_data.index.freq is None: |
|
ts_monthly = ts_data.resample('M').mean() |
|
else: |
|
ts_monthly = ts_data |
|
decomposition = seasonal_decompose(ts_monthly, model='additive', period=12) |
|
fig, axes = plt.subplots(4, 1, figsize=(12, 10)) |
|
decomposition.observed.plot(ax=axes[0], title='Original Time Series') |
|
decomposition.trend.plot(ax=axes[1], title='Trend') |
|
decomposition.seasonal.plot(ax=axes[2], title='Seasonality') |
|
decomposition.resid.plot(ax=axes[3], title='Residuals') |
|
plt.tight_layout() |
|
plt.savefig('data/exports/time_series_decomposition.png', dpi=200, bbox_inches='tight') |
|
plt.close() |
|
print("Decomposition plot saved.") |
|
except Exception as e: |
|
print(f"Decomposition failed: {e}") |
|
|
|
try: |
|
model = ARIMA(ts_monthly, order=(1, 1, 1)) |
|
fitted_model = model.fit() |
|
print(f"ARIMA Model Summary:\n{fitted_model.summary()}") |
|
forecast_steps = min(12, len(ts_monthly) // 4) |
|
forecast = fitted_model.forecast(steps=forecast_steps) |
|
conf_int = fitted_model.get_forecast(steps=forecast_steps).conf_int() |
|
plt.figure(figsize=(12, 6)) |
|
ts_monthly.plot(label='Historical Data') |
|
forecast.plot(label='Forecast', color='red') |
|
plt.fill_between(forecast.index, conf_int.iloc[:, 0], conf_int.iloc[:, 1], alpha=0.3, color='red', label='Confidence Interval') |
|
plt.title(f'{target_var} - ARIMA Forecast') |
|
plt.legend() |
|
plt.grid(True) |
|
plt.tight_layout() |
|
plt.savefig('data/exports/time_series_forecast.png', dpi=200, bbox_inches='tight') |
|
plt.close() |
|
print("Forecast plot saved.") |
|
except Exception as e: |
|
print(f"ARIMA modeling failed: {e}") |
|
print("\nTime series analysis complete. Outputs saved to data/exports/.") |
|
|
|
if __name__ == "__main__": |
|
main() |