Edwin Salguero commited on Jul 11

Commit

dba04f7

1 Parent(s): 8398c59

Enterprise: Transform to production-grade architecture with FastAPI, Docker, K8s, monitoring, and comprehensive tooling

Browse files

Files changed (37) hide show

.coverage +0 -0
.pre-commit-config.yaml +34 -0
Dockerfile +43 -0
Makefile +52 -0
README.md +134 -30
alerts/alertmanager.yml +21 -0
config/__pycache__/settings.cpython-39.pyc +0 -0
data/processed/fred_data_20250710_221702.csv +3 -0
data/processed/fred_data_20250710_223022.csv +3 -0
data/processed/fred_data_20250710_223149.csv +3 -0
data/processed/fred_economic_data_20250710_220401.csv +3 -0
docker-compose.yml +47 -0
helm/Chart.yaml +17 -0
kubernetes/deployment.yaml +61 -0
monitoring/prometheus.yml +18 -0
requirements.txt +26 -1
src/__init__.py +4 -4
src/__pycache__/__init__.cpython-39.pyc +0 -0
src/analysis/__init__.py +1 -1
src/analysis/__pycache__/__init__.cpython-39.pyc +0 -0
src/analysis/__pycache__/advanced_analytics.cpython-39.pyc +0 -0
src/analysis/advanced_analytics.py +221 -181
src/analysis/economic_analyzer.py +87 -70
src/core/__init__.py +1 -1
src/core/__pycache__/__init__.cpython-39.pyc +0 -0
src/core/__pycache__/fred_client.cpython-39.pyc +0 -0
src/core/base_pipeline.py +11 -11
src/core/fred_client.py +120 -105
src/core/fred_pipeline.py +27 -23
src/main.py +141 -0
src/utils/__init__.py +1 -1
src/utils/examples.py +56 -46
src/visualization/__init__.py +1 -1
tests/__pycache__/test_fred_api.cpython-39-pytest-7.4.0.pyc +0 -0
tests/__pycache__/test_fredapi_library.cpython-39-pytest-7.4.0.pyc +0 -0
tests/test_fred_api.py +34 -32
tests/test_fredapi_library.py +25 -19

.coverage ADDED Viewed

Binary file (53.2 kB). View file

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,34 @@

+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      - id: check-added-large-files
+      - id: check-merge-conflict
+      - id: debug-statements
+  - repo: https://github.com/psf/black
+    rev: 23.11.0
+    hooks:
+      - id: black
+        language_version: python3
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        args: ["--profile", "black"]
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.1.0
+    hooks:
+      - id: flake8
+        args: [--max-line-length=88]
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.7.1
+    hooks:
+      - id: mypy
+        additional_dependencies: [types-all]

Dockerfile ADDED Viewed

	@@ -0,0 +1,43 @@

+# Production Dockerfile for FRED ML
+FROM python:3.9-slim
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PIP_NO_CACHE_DIR=1
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+# Set work directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
+        build-essential \
+        curl \
+        git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create non-root user
+RUN useradd --create-home --shell /bin/bash app \
+    && chown -R app:app /app
+USER app
+# Health check
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD python -c "import requests; requests.get('http://localhost:8000/health')" || exit 1
+# Expose port
+EXPOSE 8000
+# Run the application
+CMD ["python", "-m", "src.main"]

Makefile ADDED Viewed

	@@ -0,0 +1,52 @@

+.PHONY: help install test lint format clean build run deploy
+help: ## Show this help message
+	@echo 'Usage: make [target]'
+	@echo ''
+	@echo 'Targets:'
+	@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "  %-15s %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+install: ## Install dependencies
+	pip install -r requirements.txt
+	pre-commit install
+test: ## Run tests
+	pytest tests/ -v --cov=src --cov-report=html
+lint: ## Run linting
+	flake8 src/ tests/
+	mypy src/
+format: ## Format code
+	black src/ tests/
+	isort src/ tests/
+clean: ## Clean build artifacts
+	find . -type f -name "*.pyc" -delete
+	find . -type d -name "__pycache__" -delete
+	rm -rf .pytest_cache/
+	rm -rf htmlcov/
+build: ## Build Docker image
+	docker build -t fred-ml .
+run: ## Run application locally
+	uvicorn src.main:app --reload --host 0.0.0.0 --port 8000
+run-docker: ## Run with Docker Compose
+	docker-compose up --build
+deploy: ## Deploy to Kubernetes
+	kubectl apply -f kubernetes/
+	helm install fred-ml helm/
+logs: ## View application logs
+	docker-compose logs -f fred-ml
+shell: ## Open shell in container
+	docker-compose exec fred-ml bash
+migrate: ## Run database migrations
+	alembic upgrade head
+setup-dev: install format lint test ## Setup development environment

README.md CHANGED Viewed

@@ -1,14 +1,20 @@
-# FRED Economic Data Analysis Tool
-A comprehensive Python tool for collecting, analyzing, and visualizing Federal Reserve Economic Data (FRED) using the FRED API.
 ## Features
 - **Data Collection**: Fetch economic indicators from FRED API
-- **Data Analysis**: Generate summary statistics and insights
 - **Visualization**: Create time series plots and charts
 - **Data Export**: Save data to CSV format
-- **Flexible Configuration**: Easy customization of indicators and date ranges
 ## Setup
@@ -34,29 +40,75 @@ pip install -r requirements.txt
 ```
 FRED_ML/
-├── config/               # Configuration settings
-│   ├── settings.py       # Environment variables and settings
-│   └── pipeline.yaml     # Pipeline configuration
-├── src/                  # Source code
-│   ├── core/            # Core functionality
-│   ├── analysis/        # Analysis modules
-│   ├── utils/           # Utility functions
-│   └── visualization/   # Visualization modules
-├── scripts/             # Executable scripts
-├── tests/               # Test files
-├── data/                # Data directories
-│   ├── raw/            # Raw data
-│   ├── processed/      # Processed data
-│   └── exports/        # Exported files
-├── requirements.txt     # Python dependencies
-├── .env.example        # Environment variables template
-└── README.md           # This file
 ```
 ## Usage
 ### Basic Usage
 Run the EDA script to perform exploratory data analysis:
 ```bash
@@ -136,21 +188,47 @@ The tool includes error handling for rate limit issues.
 ## Configuration
 Edit `config/settings.py` to customize:
 - Default date ranges
 - Output directories
 - Default indicators
-The API key is now managed through environment variables (see Setup section above).
 ## Dependencies
 - `fredapi`: FRED API client
 - `pandas`: Data manipulation
 - `numpy`: Numerical computing
 - `matplotlib`: Plotting
 - `seaborn`: Statistical visualization
-- `jupyter`: Interactive notebooks (optional)
 ## Error Handling
@@ -160,17 +238,43 @@ The tool includes comprehensive error handling for:
 - Rate limit exceeded
 - Data format errors
 ## Contributing
-To add new features:
-1. Extend the `FREDDataCollector` class
-2. Add new methods for specific analysis
-3. Update the configuration as needed
 ## License
-This project is for educational and research purposes. Please respect FRED API terms of service.
 ## Support
-For issues with the FRED API, visit: https://fred.stlouisfed.org/docs/api/

+# FRED ML - Enterprise Economic Data Analysis Platform
+A production-grade Python platform for collecting, analyzing, and visualizing Federal Reserve Economic Data (FRED) using the FRED API. Built with enterprise-grade architecture including FastAPI, Docker, Kubernetes, and comprehensive monitoring.
 ## Features
+- **Production-Ready API**: FastAPI-based REST API with automatic documentation
+- **Containerized Deployment**: Docker and Docker Compose for easy deployment
+- **Kubernetes Support**: Helm charts and K8s manifests for cloud deployment
+- **Monitoring & Observability**: Prometheus metrics and structured logging
 - **Data Collection**: Fetch economic indicators from FRED API
+- **Advanced Analytics**: Machine learning models and statistical analysis
 - **Visualization**: Create time series plots and charts
 - **Data Export**: Save data to CSV format
+- **Flexible Configuration**: Environment-based configuration
+- **Comprehensive Testing**: Unit, integration, and E2E tests
+- **CI/CD Ready**: Pre-commit hooks and automated quality checks
 ## Setup
 ```
 FRED_ML/
+├── src/                     # Source code
+│   ├── core/               # Core functionality
+│   ├── analysis/           # Analysis modules
+│   ├── utils/              # Utility functions
+│   └── visualization/      # Visualization modules
+├── config/                  # Configuration settings
+│   ├── settings.py         # Environment variables and settings
+│   └── pipeline.yaml       # Pipeline configuration
+├── deployment/              # Deployment configurations
+├── docker/                  # Docker configurations
+├── kubernetes/              # K8s manifests
+├── helm/                    # Helm charts
+├── scripts/                 # Executable scripts
+│   ├── dev/                # Development scripts
+│   ├── prod/               # Production scripts
+│   └── deploy/             # Deployment scripts
+├── tests/                   # Test files
+│   ├── unit/               # Unit tests
+│   ├── integration/        # Integration tests
+│   └── e2e/                # End-to-end tests
+├── docs/                    # Documentation
+│   ├── api/                # API documentation
+│   ├── user_guide/         # User guides
+│   ├── deployment/         # Deployment guides
+│   └── architecture/       # Architecture docs
+├── monitoring/              # Monitoring configurations
+├── alerts/                  # Alert configurations
+├── data/                    # Data directories
+│   ├── raw/                # Raw data
+│   ├── processed/          # Processed data
+│   └── exports/            # Exported files
+├── logs/                    # Application logs
+├── requirements.txt         # Python dependencies
+├── Dockerfile              # Docker image
+├── docker-compose.yml      # Local development
+├── Makefile                # Build automation
+├── .env.example            # Environment variables template
+├── .pre-commit-config.yaml # Code quality hooks
+└── README.md               # This file
 ```
 ## Usage
 ### Basic Usage
+#### Local Development
+Run the application locally:
+```bash
+make run
+```
+Or with Docker Compose:
+```bash
+make run-docker
+```
+#### API Usage
+Once running, access the API at `http://localhost:8000`:
+- **API Documentation**: `http://localhost:8000/docs`
+- **Health Check**: `http://localhost:8000/health`
+- **Available Indicators**: `http://localhost:8000/api/v1/indicators`
+#### Scripts
 Run the EDA script to perform exploratory data analysis:
 ```bash
 ## Configuration
+### Environment Variables
+The application uses environment variables for configuration:
+- `FRED_API_KEY`: Your FRED API key (required)
+- `ENVIRONMENT`: `development` or `production` (default: development)
+- `PORT`: Application port (default: 8000)
+- `POSTGRES_PASSWORD`: Database password for Docker Compose
+### Customization
 Edit `config/settings.py` to customize:
 - Default date ranges
 - Output directories
 - Default indicators
 ## Dependencies
+### Core Dependencies
 - `fredapi`: FRED API client
 - `pandas`: Data manipulation
 - `numpy`: Numerical computing
 - `matplotlib`: Plotting
 - `seaborn`: Statistical visualization
+- `scikit-learn`: Machine learning
+- `statsmodels`: Statistical models
+### Production Dependencies
+- `fastapi`: Web framework
+- `uvicorn`: ASGI server
+- `redis`: Caching
+- `psycopg2-binary`: PostgreSQL adapter
+- `sqlalchemy`: ORM
+- `prometheus-client`: Metrics
+### Development Dependencies
+- `pytest`: Testing framework
+- `black`: Code formatting
+- `flake8`: Linting
+- `mypy`: Type checking
+- `pre-commit`: Git hooks
 ## Error Handling
 - Rate limit exceeded
 - Data format errors
+## Development
+### Setup Development Environment
+```bash
+make setup-dev
+```
+### Code Quality
+```bash
+make format    # Format code
+make lint      # Run linting
+make test      # Run tests
+```
+### Deployment
+```bash
+make build     # Build Docker image
+make deploy    # Deploy to Kubernetes
+```
 ## Contributing
+1. Fork the repository
+2. Create a feature branch
+3. Make your changes
+4. Run tests and linting: `make test lint`
+5. Submit a pull request
 ## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
 ## Support
+- **Documentation**: Check the `docs/` directory
+- **Issues**: Report bugs via GitHub Issues
+- **FRED API**: https://fred.stlouisfed.org/docs/api/

alerts/alertmanager.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+global:
+  resolve_timeout: 5m
+route:
+  group_by: ['alertname']
+  group_wait: 10s
+  group_interval: 10s
+  repeat_interval: 1h
+  receiver: 'web.hook'
+receivers:
+  - name: 'web.hook'
+    webhook_configs:
+      - url: 'http://127.0.0.1:5001/'
+inhibit_rules:
+  - source_match:
+      severity: 'critical'
+    target_match:
+      severity: 'warning'
+    equal: ['alertname', 'dev', 'instance']

config/__pycache__/settings.cpython-39.pyc CHANGED Viewed

Binary files a/config/__pycache__/settings.cpython-39.pyc and b/config/__pycache__/settings.cpython-39.pyc differ

data/processed/fred_data_20250710_221702.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:538c15716d377a0f1f9b68c03ffacf898f86c0c7bd7b1279ced9d32065345d90
+size 541578

data/processed/fred_data_20250710_223022.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:538c15716d377a0f1f9b68c03ffacf898f86c0c7bd7b1279ced9d32065345d90
+size 541578

data/processed/fred_data_20250710_223149.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:538c15716d377a0f1f9b68c03ffacf898f86c0c7bd7b1279ced9d32065345d90
+size 541578

data/processed/fred_economic_data_20250710_220401.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:538c15716d377a0f1f9b68c03ffacf898f86c0c7bd7b1279ced9d32065345d90
+size 541578

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,47 @@

+version: '3.8'
+services:
+  fred-ml:
+    build: .
+    ports:
+      - "8000:8000"
+    environment:
+      - FRED_API_KEY=${FRED_API_KEY}
+      - ENVIRONMENT=development
+    volumes:
+      - ./data:/app/data
+      - ./logs:/app/logs
+    depends_on:
+      - redis
+    networks:
+      - fred-ml-network
+  redis:
+    image: redis:7-alpine
+    ports:
+      - "6379:6379"
+    volumes:
+      - redis_data:/data
+    networks:
+      - fred-ml-network
+  postgres:
+    image: postgres:15-alpine
+    environment:
+      POSTGRES_DB: fred_ml
+      POSTGRES_USER: fred_user
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme}
+    ports:
+      - "5432:5432"
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    networks:
+      - fred-ml-network
+volumes:
+  redis_data:
+  postgres_data:
+networks:
+  fred-ml-network:
+    driver: bridge

helm/Chart.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+apiVersion: v2
+name: fred-ml
+description: A Helm chart for FRED ML Economic Data Analysis
+type: application
+version: 1.0.0
+appVersion: "1.0.0"
+keywords:
+  - economics
+  - data-analysis
+  - machine-learning
+  - fred
+home: https://github.com/EAName/FREDML
+sources:
+  - https://github.com/EAName/FREDML
+maintainers:
+  - name: Edwin Salguero
+    email: [email protected]

kubernetes/deployment.yaml ADDED Viewed

	@@ -0,0 +1,61 @@

+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: fred-ml
+  labels:
+    app: fred-ml
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: fred-ml
+  template:
+    metadata:
+      labels:
+        app: fred-ml
+    spec:
+      containers:
+      - name: fred-ml
+        image: fred-ml:latest
+        ports:
+        - containerPort: 8000
+        env:
+        - name: FRED_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: fred-ml-secrets
+              key: fred-api-key
+        - name: ENVIRONMENT
+          value: "production"
+        resources:
+          requests:
+            memory: "256Mi"
+            cpu: "250m"
+          limits:
+            memory: "512Mi"
+            cpu: "500m"
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /ready
+            port: 8000
+          initialDelaySeconds: 5
+          periodSeconds: 5
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: fred-ml-service
+spec:
+  selector:
+    app: fred-ml
+  ports:
+    - protocol: TCP
+      port: 80
+      targetPort: 8000
+  type: LoadBalancer

monitoring/prometheus.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+rule_files:
+  # - "first_rules.yml"
+  # - "second_rules.yml"
+scrape_configs:
+  - job_name: 'fred-ml'
+    static_configs:
+      - targets: ['localhost:8000']
+    metrics_path: '/metrics'
+    scrape_interval: 5s
+  - job_name: 'prometheus'
+    static_configs:
+      - targets: ['localhost:9090']

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 fredapi==0.4.2
 pandas==2.1.4
 numpy==1.24.3
@@ -10,4 +11,28 @@ PyYAML==6.0.2
 APScheduler==3.10.4
 scikit-learn==1.3.0
 scipy==1.11.1
-statsmodels==0.14.0

+# Core dependencies
 fredapi==0.4.2
 pandas==2.1.4
 numpy==1.24.3
 APScheduler==3.10.4
 scikit-learn==1.3.0
 scipy==1.11.1
+statsmodels==0.14.0
+# Production dependencies
+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+pydantic==1.10.13
+redis==5.0.1
+psycopg2-binary==2.9.9
+sqlalchemy==2.0.23
+alembic==1.13.0
+# Monitoring and logging
+prometheus-client==0.19.0
+structlog==23.2.0
+# Testing
+pytest==7.4.0
+pytest-asyncio==0.21.1
+httpx==0.25.2
+# Development
+black==23.11.0
+flake8==6.1.0
+mypy==1.7.1
+pre-commit==3.6.0

src/__init__.py CHANGED Viewed

@@ -12,10 +12,10 @@ __version__ = "1.0.0"
 __author__ = "Economic Data Team"
 __email__ = "[email protected]"
-from .core.fred_client import FREDDataCollectorV2
 from .analysis.advanced_analytics import AdvancedAnalytics
 __all__ = [
-    'FREDDataCollectorV2',
-    'AdvancedAnalytics',
-]

 __author__ = "Economic Data Team"
 __email__ = "[email protected]"
 from .analysis.advanced_analytics import AdvancedAnalytics
+from .core.fred_client import FREDDataCollectorV2
 __all__ = [
+    "FREDDataCollectorV2",
+    "AdvancedAnalytics",
+]

src/__pycache__/__init__.cpython-39.pyc CHANGED Viewed

Binary files a/src/__pycache__/__init__.cpython-39.pyc and b/src/__pycache__/__init__.cpython-39.pyc differ

src/analysis/__init__.py CHANGED Viewed

@@ -4,4 +4,4 @@ Economic data analysis and visualization tools.
 from .advanced_analytics import AdvancedAnalytics
-__all__ = ['AdvancedAnalytics']


4
5	from .advanced_analytics import AdvancedAnalytics
6
7	+ __all__ = ["AdvancedAnalytics"]

src/analysis/__pycache__/__init__.cpython-39.pyc CHANGED Viewed

Binary files a/src/analysis/__pycache__/__init__.cpython-39.pyc and b/src/analysis/__pycache__/__init__.cpython-39.pyc differ

src/analysis/__pycache__/advanced_analytics.cpython-39.pyc CHANGED Viewed

Binary files a/src/analysis/__pycache__/advanced_analytics.cpython-39.pyc and b/src/analysis/__pycache__/advanced_analytics.cpython-39.pyc differ

src/analysis/advanced_analytics.py CHANGED Viewed

@@ -4,32 +4,34 @@ Advanced Analytics Module for FRED Economic Data
 Performs comprehensive statistical analysis, modeling, and insights extraction.
 """
-import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from scipy import stats
-from sklearn.preprocessing import StandardScaler
-from sklearn.decomposition import PCA
 from sklearn.cluster import KMeans
-from sklearn.metrics import silhouette_score
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import r2_score, mean_squared_error
-import statsmodels.api as sm
-from statsmodels.tsa.seasonal import seasonal_decompose
-from statsmodels.tsa.arima.model import ARIMA
 from statsmodels.stats.diagnostic import het_breuschpagan
 from statsmodels.stats.outliers_influence import variance_inflation_factor
-import warnings
-warnings.filterwarnings('ignore')
 class AdvancedAnalytics:
     """
     Comprehensive analytics class for FRED economic data.
     Performs EDA, statistical modeling, segmentation, and time series analysis.
     """
     def __init__(self, data_path=None, df=None):
         """Initialize with data path or DataFrame."""
         if df is not None:
@@ -38,171 +40,171 @@ class AdvancedAnalytics:
             self.df = pd.read_csv(data_path, index_col=0, parse_dates=True)
         else:
             raise ValueError("Must provide either data_path or DataFrame")
         self.scaler = StandardScaler()
         self.results = {}
     def perform_eda(self):
         """Perform comprehensive Exploratory Data Analysis."""
         print("=" * 60)
         print("EXPLORATORY DATA ANALYSIS")
         print("=" * 60)
         # Basic info
         print(f"\nDataset Shape: {self.df.shape}")
         print(f"Date Range: {self.df.index.min()} to {self.df.index.max()}")
         print(f"Variables: {list(self.df.columns)}")
         # Descriptive statistics
         print("\n" + "=" * 40)
         print("DESCRIPTIVE STATISTICS")
         print("=" * 40)
         desc_stats = self.df.describe()
         print(desc_stats)
         # Skewness and Kurtosis
         print("\n" + "=" * 40)
         print("SKEWNESS AND KURTOSIS")
         print("=" * 40)
         skewness = self.df.skew()
         kurtosis = self.df.kurtosis()
         for col in self.df.columns:
             print(f"{col}:")
             print(f"  Skewness: {skewness[col]:.3f}")
             print(f"  Kurtosis: {kurtosis[col]:.3f}")
         # Correlation Analysis
         print("\n" + "=" * 40)
         print("CORRELATION ANALYSIS")
         print("=" * 40)
         # Pearson correlation
-        pearson_corr = self.df.corr(method='pearson')
         print("\nPearson Correlation Matrix:")
         print(pearson_corr.round(3))
         # Spearman correlation
-        spearman_corr = self.df.corr(method='spearman')
         print("\nSpearman Correlation Matrix:")
         print(spearman_corr.round(3))
         # Store results
-        self.results['eda'] = {
-            'descriptive_stats': desc_stats,
-            'skewness': skewness,
-            'kurtosis': kurtosis,
-            'pearson_corr': pearson_corr,
-            'spearman_corr': spearman_corr
         }
-        return self.results['eda']
-    def perform_dimensionality_reduction(self, method='pca', n_components=2):
         """Perform dimensionality reduction for visualization."""
         print("\n" + "=" * 40)
         print(f"DIMENSIONALITY REDUCTION ({method.upper()})")
         print("=" * 40)
         # Prepare data (remove NaN values)
         df_clean = self.df.dropna()
-        if method.lower() == 'pca':
             # PCA
             pca = PCA(n_components=n_components)
             scaled_data = self.scaler.fit_transform(df_clean)
             pca_result = pca.fit_transform(scaled_data)
             print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
             print(f"Total explained variance: {sum(pca.explained_variance_ratio_):.3f}")
             # Create DataFrame with PCA results
             pca_df = pd.DataFrame(
-                pca_result,
-                columns=[f'PC{i+1}' for i in range(n_components)],
-                index=df_clean.index
             )
-            self.results['pca'] = {
-                'components': pca_df,
-                'explained_variance': pca.explained_variance_ratio_,
-                'feature_importance': pd.DataFrame(
                     pca.components_.T,
-                    columns=[f'PC{i+1}' for i in range(n_components)],
-                    index=df_clean.columns
-                )
             }
-            return self.results['pca']
         return None
-    def perform_statistical_modeling(self, target_var='GDP', test_size=0.2):
         """Perform linear regression with comprehensive diagnostics."""
         print("\n" + "=" * 40)
         print("STATISTICAL MODELING - LINEAR REGRESSION")
         print("=" * 40)
         # Prepare data
         df_clean = self.df.dropna()
         if target_var not in df_clean.columns:
             print(f"Target variable '{target_var}' not found in dataset")
             return None
         # Prepare features and target
         feature_cols = [col for col in df_clean.columns if col != target_var]
         X = df_clean[feature_cols]
         y = df_clean[target_var]
         # Split data
         X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=test_size, random_state=42
         )
         # Fit linear regression
         model = LinearRegression()
         model.fit(X_train, y_train)
         # Predictions
         y_pred_train = model.predict(X_train)
         y_pred_test = model.predict(X_test)
         # Model performance
         r2_train = r2_score(y_train, y_pred_train)
         r2_test = r2_score(y_test, y_pred_test)
         rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
         rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
         print(f"\nModel Performance:")
         print(f"R² (Training): {r2_train:.4f}")
         print(f"R² (Test): {r2_test:.4f}")
         print(f"RMSE (Training): {rmse_train:.4f}")
         print(f"RMSE (Test): {rmse_test:.4f}")
         # Coefficients
         print(f"\nCoefficients:")
         for feature, coef in zip(feature_cols, model.coef_):
             print(f"  {feature}: {coef:.4f}")
         print(f"  Intercept: {model.intercept_:.4f}")
         # Statistical significance using statsmodels
         X_with_const = sm.add_constant(X_train)
         model_sm = sm.OLS(y_train, X_with_const).fit()
         print(f"\nStatistical Significance:")
         print(model_sm.summary().tables[1])
         # Assumption tests
         print(f"\n" + "=" * 30)
         print("REGRESSION ASSUMPTIONS")
         print("=" * 30)
         # 1. Normality of residuals
         residuals = y_train - y_pred_train
         _, p_value_norm = stats.normaltest(residuals)
         print(f"Normality test (p-value): {p_value_norm:.4f}")
         # 2. Multicollinearity (VIF)
         vif_data = []
         for i in range(X_train.shape[1]):
@@ -211,11 +213,11 @@ class AdvancedAnalytics:
                 vif_data.append(vif)
             except:
                 vif_data.append(np.nan)
         print(f"\nVariance Inflation Factors:")
         for feature, vif in zip(feature_cols, vif_data):
             print(f"  {feature}: {vif:.3f}")
         # 3. Homoscedasticity
         try:
             _, p_value_het = het_breuschpagan(residuals, X_with_const)
@@ -223,44 +225,46 @@ class AdvancedAnalytics:
         except:
             p_value_het = np.nan
             print(f"\nHomoscedasticity test failed")
         # Store results
-        self.results['regression'] = {
-            'model': model,
-            'model_sm': model_sm,
-            'performance': {
-                'r2_train': r2_train,
-                'r2_test': r2_test,
-                'rmse_train': rmse_train,
-                'rmse_test': rmse_test
             },
-            'coefficients': dict(zip(feature_cols, model.coef_)),
-            'assumptions': {
-                'normality_p': p_value_norm,
-                'homoscedasticity_p': p_value_het,
-                'vif': dict(zip(feature_cols, vif_data))
-            }
         }
-        return self.results['regression']
     def perform_clustering(self, max_k=10):
         """Perform clustering analysis with optimal k selection."""
         print("\n" + "=" * 40)
         print("CLUSTERING ANALYSIS")
         print("=" * 40)
         # Prepare data
         df_clean = self.df.dropna()
         if df_clean.shape[0] < 10 or df_clean.shape[1] < 2:
-            print("Not enough data for clustering (need at least 10 rows and 2 columns after dropna). Skipping.")
-            self.results['clustering'] = None
             return None
         try:
             scaled_data = self.scaler.fit_transform(df_clean)
         except Exception as e:
             print(f"Scaling failed: {e}")
-            self.results['clustering'] = None
             return None
         # Find optimal k using elbow method and silhouette score
         inertias = []
@@ -268,7 +272,7 @@ class AdvancedAnalytics:
         k_range = range(2, min(max_k + 1, len(df_clean) // 10 + 1))
         if len(k_range) < 2:
             print("Not enough data for multiple clusters. Skipping clustering.")
-            self.results['clustering'] = None
             return None
         try:
             for k in k_range:
@@ -280,19 +284,21 @@ class AdvancedAnalytics:
             if inertias and silhouette_scores:
                 plt.figure(figsize=(12, 4))
                 plt.subplot(1, 2, 1)
-                plt.plot(list(k_range), inertias, 'bo-')
-                plt.xlabel('Number of Clusters (k)')
-                plt.ylabel('Inertia')
-                plt.title('Elbow Method')
                 plt.grid(True)
                 plt.subplot(1, 2, 2)
-                plt.plot(list(k_range), silhouette_scores, 'ro-')
-                plt.xlabel('Number of Clusters (k)')
-                plt.ylabel('Silhouette Score')
-                plt.title('Silhouette Analysis')
                 plt.grid(True)
                 plt.tight_layout()
-                plt.savefig('data/exports/clustering_analysis.png', dpi=300, bbox_inches='tight')
                 plt.show()
             # Choose optimal k (highest silhouette score)
             optimal_k = list(k_range)[np.argmax(silhouette_scores)]
@@ -303,42 +309,44 @@ class AdvancedAnalytics:
             cluster_labels = kmeans_optimal.fit_predict(scaled_data)
             # Add cluster labels to data
             df_clustered = df_clean.copy()
-            df_clustered['Cluster'] = cluster_labels
             # Cluster characteristics
             print(f"\nCluster Characteristics:")
-            cluster_stats = df_clustered.groupby('Cluster').agg(['mean', 'std'])
             print(cluster_stats.round(3))
             # Store results
-            self.results['clustering'] = {
-                'optimal_k': optimal_k,
-                'silhouette_score': max(silhouette_scores),
-                'cluster_labels': cluster_labels,
-                'clustered_data': df_clustered,
-                'cluster_stats': cluster_stats,
-                'inertias': inertias,
-                'silhouette_scores': silhouette_scores
             }
-            return self.results['clustering']
         except Exception as e:
             print(f"Clustering failed: {e}")
-            self.results['clustering'] = None
             return None
-    def perform_time_series_analysis(self, target_var='GDP'):
         """Perform comprehensive time series analysis."""
         print("\n" + "=" * 40)
         print("TIME SERIES ANALYSIS")
         print("=" * 40)
         if target_var not in self.df.columns:
             print(f"Target variable '{target_var}' not found")
-            self.results['time_series'] = None
             return None
         # Prepare time series data
         ts_data = self.df[target_var].dropna()
         if len(ts_data) < 50:
-            print("Insufficient data for time series analysis (need at least 50 points). Skipping.")
-            self.results['time_series'] = None
             return None
         print(f"Time series length: {len(ts_data)} observations")
         print(f"Date range: {ts_data.index.min()} to {ts_data.index.max()}")
@@ -347,18 +355,22 @@ class AdvancedAnalytics:
         try:
             # Resample to monthly data if needed
             if ts_data.index.freq is None:
-                ts_monthly = ts_data.resample('M').mean()
             else:
                 ts_monthly = ts_data
-            decomposition = seasonal_decompose(ts_monthly, model='additive', period=12)
             # Plot decomposition
             fig, axes = plt.subplots(4, 1, figsize=(12, 10))
-            decomposition.observed.plot(ax=axes[0], title='Original Time Series')
-            decomposition.trend.plot(ax=axes[1], title='Trend')
-            decomposition.seasonal.plot(ax=axes[2], title='Seasonality')
-            decomposition.resid.plot(ax=axes[3], title='Residuals')
             plt.tight_layout()
-            plt.savefig('data/exports/time_series_decomposition.png', dpi=300, bbox_inches='tight')
             plt.show()
         except Exception as e:
             print(f"Decomposition failed: {e}")
@@ -376,65 +388,77 @@ class AdvancedAnalytics:
             conf_int = fitted_model.get_forecast(steps=forecast_steps).conf_int()
             # Plot forecast
             plt.figure(figsize=(12, 6))
-            ts_monthly.plot(label='Historical Data')
-            forecast.plot(label='Forecast', color='red')
-            plt.fill_between(forecast.index,
-                           conf_int.iloc[:, 0],
-                           conf_int.iloc[:, 1],
-                           alpha=0.3, color='red', label='Confidence Interval')
-            plt.title(f'{target_var} - ARIMA Forecast')
             plt.legend()
             plt.grid(True)
             plt.tight_layout()
-            plt.savefig('data/exports/time_series_forecast.png', dpi=300, bbox_inches='tight')
             plt.show()
             # Store results
-            self.results['time_series'] = {
-                'model': fitted_model,
-                'forecast': forecast,
-                'confidence_intervals': conf_int,
-                'decomposition': decomposition if 'decomposition' in locals() else None
             }
         except Exception as e:
             print(f"ARIMA modeling failed: {e}")
-            self.results['time_series'] = None
-        return self.results.get('time_series')
     def generate_insights_report(self):
         """Generate comprehensive insights report in layman's terms."""
         print("\n" + "=" * 60)
         print("COMPREHENSIVE INSIGHTS REPORT")
         print("=" * 60)
         insights = []
         # EDA Insights
-        if 'eda' in self.results and self.results['eda'] is not None:
             insights.append("EXPLORATORY DATA ANALYSIS INSIGHTS:")
             insights.append("-" * 40)
             # Correlation insights
-            pearson_corr = self.results['eda']['pearson_corr']
             high_corr_pairs = []
             for i in range(len(pearson_corr.columns)):
-                for j in range(i+1, len(pearson_corr.columns)):
                     corr_val = pearson_corr.iloc[i, j]
                     if abs(corr_val) > 0.7:
-                        high_corr_pairs.append((pearson_corr.columns[i], pearson_corr.columns[j], corr_val))
             if high_corr_pairs:
                 insights.append("Strong correlations found:")
                 for var1, var2, corr in high_corr_pairs:
                     insights.append(f"  • {var1} and {var2}: {corr:.3f}")
             else:
-                insights.append("No strong correlations (>0.7) found between variables.")
         else:
             insights.append("EDA could not be performed or returned no results.")
         # Regression Insights
-        if 'regression' in self.results and self.results['regression'] is not None:
             insights.append("\nREGRESSION MODEL INSIGHTS:")
             insights.append("-" * 40)
-            reg_results = self.results['regression']
-            r2_test = reg_results['performance']['r2_test']
             insights.append(f"Model Performance:")
-            insights.append(f"  • The model explains {r2_test:.1%} of the variation in the target variable")
             if r2_test > 0.7:
                 insights.append("  • This is considered a good model fit")
             elif r2_test > 0.5:
@@ -442,20 +466,26 @@ class AdvancedAnalytics:
             else:
                 insights.append("  • This model has limited predictive power")
             # Assumption insights
-            assumptions = reg_results['assumptions']
-            if assumptions['normality_p'] > 0.05:
-                insights.append("  • Residuals are normally distributed (assumption met)")
             else:
-                insights.append("  • Residuals are not normally distributed (assumption violated)")
         else:
-            insights.append("Regression modeling could not be performed or returned no results.")
         # Clustering Insights
-        if 'clustering' in self.results and self.results['clustering'] is not None:
             insights.append("\nCLUSTERING INSIGHTS:")
             insights.append("-" * 40)
-            cluster_results = self.results['clustering']
-            optimal_k = cluster_results['optimal_k']
-            silhouette_score = cluster_results['silhouette_score']
             insights.append(f"Optimal number of clusters: {optimal_k}")
             insights.append(f"Cluster quality score: {silhouette_score:.3f}")
             if silhouette_score > 0.5:
@@ -467,51 +497,61 @@ class AdvancedAnalytics:
         else:
             insights.append("Clustering could not be performed or returned no results.")
         # Time Series Insights
-        if 'time_series' in self.results and self.results['time_series'] is not None:
             insights.append("\nTIME SERIES INSIGHTS:")
             insights.append("-" * 40)
-            insights.append("  • Time series decomposition shows trend, seasonality, and random components")
-            insights.append("  • ARIMA model provides future forecasts with confidence intervals")
-            insights.append("  • Forecasts can be used for planning and decision-making")
         else:
-            insights.append("Time series analysis could not be performed or returned no results.")
         # Print insights
         for insight in insights:
             print(insight)
         # Save insights to file
-        with open('data/exports/insights_report.txt', 'w') as f:
-            f.write('\n'.join(insights))
         return insights
     def run_complete_analysis(self):
         """Run the complete advanced analytics workflow."""
         print("Starting comprehensive advanced analytics...")
         # 1. EDA
         self.perform_eda()
         # 2. Dimensionality reduction
         self.perform_dimensionality_reduction()
         # 3. Statistical modeling
         self.perform_statistical_modeling()
         # 4. Clustering
         self.perform_clustering()
         # 5. Time series analysis
         self.perform_time_series_analysis()
         # 6. Generate insights
         self.generate_insights_report()
         print("\n" + "=" * 60)
         print("ANALYSIS COMPLETE!")
         print("=" * 60)
         print("Check the following outputs:")
         print("  • data/exports/insights_report.txt - Comprehensive insights")
         print("  • data/exports/clustering_analysis.png - Clustering results")
-        print("  • data/exports/time_series_decomposition.png - Time series decomposition")
         print("  • data/exports/time_series_forecast.png - Time series forecast")
-        return self.results

 Performs comprehensive statistical analysis, modeling, and insights extraction.
 """
+import warnings
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 import seaborn as sns
+import statsmodels.api as sm
 from scipy import stats
 from sklearn.cluster import KMeans
+from sklearn.decomposition import PCA
 from sklearn.linear_model import LinearRegression
+from sklearn.metrics import mean_squared_error, r2_score, silhouette_score
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
 from statsmodels.stats.diagnostic import het_breuschpagan
 from statsmodels.stats.outliers_influence import variance_inflation_factor
+from statsmodels.tsa.arima.model import ARIMA
+from statsmodels.tsa.seasonal import seasonal_decompose
+warnings.filterwarnings("ignore")
 class AdvancedAnalytics:
     """
     Comprehensive analytics class for FRED economic data.
     Performs EDA, statistical modeling, segmentation, and time series analysis.
     """
     def __init__(self, data_path=None, df=None):
         """Initialize with data path or DataFrame."""
         if df is not None:
             self.df = pd.read_csv(data_path, index_col=0, parse_dates=True)
         else:
             raise ValueError("Must provide either data_path or DataFrame")
         self.scaler = StandardScaler()
         self.results = {}
     def perform_eda(self):
         """Perform comprehensive Exploratory Data Analysis."""
         print("=" * 60)
         print("EXPLORATORY DATA ANALYSIS")
         print("=" * 60)
         # Basic info
         print(f"\nDataset Shape: {self.df.shape}")
         print(f"Date Range: {self.df.index.min()} to {self.df.index.max()}")
         print(f"Variables: {list(self.df.columns)}")
         # Descriptive statistics
         print("\n" + "=" * 40)
         print("DESCRIPTIVE STATISTICS")
         print("=" * 40)
         desc_stats = self.df.describe()
         print(desc_stats)
         # Skewness and Kurtosis
         print("\n" + "=" * 40)
         print("SKEWNESS AND KURTOSIS")
         print("=" * 40)
         skewness = self.df.skew()
         kurtosis = self.df.kurtosis()
         for col in self.df.columns:
             print(f"{col}:")
             print(f"  Skewness: {skewness[col]:.3f}")
             print(f"  Kurtosis: {kurtosis[col]:.3f}")
         # Correlation Analysis
         print("\n" + "=" * 40)
         print("CORRELATION ANALYSIS")
         print("=" * 40)
         # Pearson correlation
+        pearson_corr = self.df.corr(method="pearson")
         print("\nPearson Correlation Matrix:")
         print(pearson_corr.round(3))
         # Spearman correlation
+        spearman_corr = self.df.corr(method="spearman")
         print("\nSpearman Correlation Matrix:")
         print(spearman_corr.round(3))
         # Store results
+        self.results["eda"] = {
+            "descriptive_stats": desc_stats,
+            "skewness": skewness,
+            "kurtosis": kurtosis,
+            "pearson_corr": pearson_corr,
+            "spearman_corr": spearman_corr,
         }
+        return self.results["eda"]
+    def perform_dimensionality_reduction(self, method="pca", n_components=2):
         """Perform dimensionality reduction for visualization."""
         print("\n" + "=" * 40)
         print(f"DIMENSIONALITY REDUCTION ({method.upper()})")
         print("=" * 40)
         # Prepare data (remove NaN values)
         df_clean = self.df.dropna()
+        if method.lower() == "pca":
             # PCA
             pca = PCA(n_components=n_components)
             scaled_data = self.scaler.fit_transform(df_clean)
             pca_result = pca.fit_transform(scaled_data)
             print(f"Explained variance ratio: {pca.explained_variance_ratio_}")
             print(f"Total explained variance: {sum(pca.explained_variance_ratio_):.3f}")
             # Create DataFrame with PCA results
             pca_df = pd.DataFrame(
+                pca_result,
+                columns=[f"PC{i+1}" for i in range(n_components)],
+                index=df_clean.index,
             )
+            self.results["pca"] = {
+                "components": pca_df,
+                "explained_variance": pca.explained_variance_ratio_,
+                "feature_importance": pd.DataFrame(
                     pca.components_.T,
+                    columns=[f"PC{i+1}" for i in range(n_components)],
+                    index=df_clean.columns,
+                ),
             }
+            return self.results["pca"]
         return None
+    def perform_statistical_modeling(self, target_var="GDP", test_size=0.2):
         """Perform linear regression with comprehensive diagnostics."""
         print("\n" + "=" * 40)
         print("STATISTICAL MODELING - LINEAR REGRESSION")
         print("=" * 40)
         # Prepare data
         df_clean = self.df.dropna()
         if target_var not in df_clean.columns:
             print(f"Target variable '{target_var}' not found in dataset")
             return None
         # Prepare features and target
         feature_cols = [col for col in df_clean.columns if col != target_var]
         X = df_clean[feature_cols]
         y = df_clean[target_var]
         # Split data
         X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=test_size, random_state=42
         )
         # Fit linear regression
         model = LinearRegression()
         model.fit(X_train, y_train)
         # Predictions
         y_pred_train = model.predict(X_train)
         y_pred_test = model.predict(X_test)
         # Model performance
         r2_train = r2_score(y_train, y_pred_train)
         r2_test = r2_score(y_test, y_pred_test)
         rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
         rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
         print(f"\nModel Performance:")
         print(f"R² (Training): {r2_train:.4f}")
         print(f"R² (Test): {r2_test:.4f}")
         print(f"RMSE (Training): {rmse_train:.4f}")
         print(f"RMSE (Test): {rmse_test:.4f}")
         # Coefficients
         print(f"\nCoefficients:")
         for feature, coef in zip(feature_cols, model.coef_):
             print(f"  {feature}: {coef:.4f}")
         print(f"  Intercept: {model.intercept_:.4f}")
         # Statistical significance using statsmodels
         X_with_const = sm.add_constant(X_train)
         model_sm = sm.OLS(y_train, X_with_const).fit()
         print(f"\nStatistical Significance:")
         print(model_sm.summary().tables[1])
         # Assumption tests
         print(f"\n" + "=" * 30)
         print("REGRESSION ASSUMPTIONS")
         print("=" * 30)
         # 1. Normality of residuals
         residuals = y_train - y_pred_train
         _, p_value_norm = stats.normaltest(residuals)
         print(f"Normality test (p-value): {p_value_norm:.4f}")
         # 2. Multicollinearity (VIF)
         vif_data = []
         for i in range(X_train.shape[1]):
                 vif_data.append(vif)
             except:
                 vif_data.append(np.nan)
         print(f"\nVariance Inflation Factors:")
         for feature, vif in zip(feature_cols, vif_data):
             print(f"  {feature}: {vif:.3f}")
         # 3. Homoscedasticity
         try:
             _, p_value_het = het_breuschpagan(residuals, X_with_const)
         except:
             p_value_het = np.nan
             print(f"\nHomoscedasticity test failed")
         # Store results
+        self.results["regression"] = {
+            "model": model,
+            "model_sm": model_sm,
+            "performance": {
+                "r2_train": r2_train,
+                "r2_test": r2_test,
+                "rmse_train": rmse_train,
+                "rmse_test": rmse_test,
+            },
+            "coefficients": dict(zip(feature_cols, model.coef_)),
+            "assumptions": {
+                "normality_p": p_value_norm,
+                "homoscedasticity_p": p_value_het,
+                "vif": dict(zip(feature_cols, vif_data)),
             },
         }
+        return self.results["regression"]
     def perform_clustering(self, max_k=10):
         """Perform clustering analysis with optimal k selection."""
         print("\n" + "=" * 40)
         print("CLUSTERING ANALYSIS")
         print("=" * 40)
         # Prepare data
         df_clean = self.df.dropna()
         if df_clean.shape[0] < 10 or df_clean.shape[1] < 2:
+            print(
+                "Not enough data for clustering (need at least 10 rows and 2 columns after dropna). Skipping."
+            )
+            self.results["clustering"] = None
             return None
         try:
             scaled_data = self.scaler.fit_transform(df_clean)
         except Exception as e:
             print(f"Scaling failed: {e}")
+            self.results["clustering"] = None
             return None
         # Find optimal k using elbow method and silhouette score
         inertias = []
         k_range = range(2, min(max_k + 1, len(df_clean) // 10 + 1))
         if len(k_range) < 2:
             print("Not enough data for multiple clusters. Skipping clustering.")
+            self.results["clustering"] = None
             return None
         try:
             for k in k_range:
             if inertias and silhouette_scores:
                 plt.figure(figsize=(12, 4))
                 plt.subplot(1, 2, 1)
+                plt.plot(list(k_range), inertias, "bo-")
+                plt.xlabel("Number of Clusters (k)")
+                plt.ylabel("Inertia")
+                plt.title("Elbow Method")
                 plt.grid(True)
                 plt.subplot(1, 2, 2)
+                plt.plot(list(k_range), silhouette_scores, "ro-")
+                plt.xlabel("Number of Clusters (k)")
+                plt.ylabel("Silhouette Score")
+                plt.title("Silhouette Analysis")
                 plt.grid(True)
                 plt.tight_layout()
+                plt.savefig(
+                    "data/exports/clustering_analysis.png", dpi=300, bbox_inches="tight"
+                )
                 plt.show()
             # Choose optimal k (highest silhouette score)
             optimal_k = list(k_range)[np.argmax(silhouette_scores)]
             cluster_labels = kmeans_optimal.fit_predict(scaled_data)
             # Add cluster labels to data
             df_clustered = df_clean.copy()
+            df_clustered["Cluster"] = cluster_labels
             # Cluster characteristics
             print(f"\nCluster Characteristics:")
+            cluster_stats = df_clustered.groupby("Cluster").agg(["mean", "std"])
             print(cluster_stats.round(3))
             # Store results
+            self.results["clustering"] = {
+                "optimal_k": optimal_k,
+                "silhouette_score": max(silhouette_scores),
+                "cluster_labels": cluster_labels,
+                "clustered_data": df_clustered,
+                "cluster_stats": cluster_stats,
+                "inertias": inertias,
+                "silhouette_scores": silhouette_scores,
             }
+            return self.results["clustering"]
         except Exception as e:
             print(f"Clustering failed: {e}")
+            self.results["clustering"] = None
             return None
+    def perform_time_series_analysis(self, target_var="GDP"):
         """Perform comprehensive time series analysis."""
         print("\n" + "=" * 40)
         print("TIME SERIES ANALYSIS")
         print("=" * 40)
         if target_var not in self.df.columns:
             print(f"Target variable '{target_var}' not found")
+            self.results["time_series"] = None
             return None
         # Prepare time series data
         ts_data = self.df[target_var].dropna()
         if len(ts_data) < 50:
+            print(
+                "Insufficient data for time series analysis (need at least 50 points). Skipping."
+            )
+            self.results["time_series"] = None
             return None
         print(f"Time series length: {len(ts_data)} observations")
         print(f"Date range: {ts_data.index.min()} to {ts_data.index.max()}")
         try:
             # Resample to monthly data if needed
             if ts_data.index.freq is None:
+                ts_monthly = ts_data.resample("M").mean()
             else:
                 ts_monthly = ts_data
+            decomposition = seasonal_decompose(ts_monthly, model="additive", period=12)
             # Plot decomposition
             fig, axes = plt.subplots(4, 1, figsize=(12, 10))
+            decomposition.observed.plot(ax=axes[0], title="Original Time Series")
+            decomposition.trend.plot(ax=axes[1], title="Trend")
+            decomposition.seasonal.plot(ax=axes[2], title="Seasonality")
+            decomposition.resid.plot(ax=axes[3], title="Residuals")
             plt.tight_layout()
+            plt.savefig(
+                "data/exports/time_series_decomposition.png",
+                dpi=300,
+                bbox_inches="tight",
+            )
             plt.show()
         except Exception as e:
             print(f"Decomposition failed: {e}")
             conf_int = fitted_model.get_forecast(steps=forecast_steps).conf_int()
             # Plot forecast
             plt.figure(figsize=(12, 6))
+            ts_monthly.plot(label="Historical Data")
+            forecast.plot(label="Forecast", color="red")
+            plt.fill_between(
+                forecast.index,
+                conf_int.iloc[:, 0],
+                conf_int.iloc[:, 1],
+                alpha=0.3,
+                color="red",
+                label="Confidence Interval",
+            )
+            plt.title(f"{target_var} - ARIMA Forecast")
             plt.legend()
             plt.grid(True)
             plt.tight_layout()
+            plt.savefig(
+                "data/exports/time_series_forecast.png", dpi=300, bbox_inches="tight"
+            )
             plt.show()
             # Store results
+            self.results["time_series"] = {
+                "model": fitted_model,
+                "forecast": forecast,
+                "confidence_intervals": conf_int,
+                "decomposition": decomposition if "decomposition" in locals() else None,
             }
         except Exception as e:
             print(f"ARIMA modeling failed: {e}")
+            self.results["time_series"] = None
+        return self.results.get("time_series")
     def generate_insights_report(self):
         """Generate comprehensive insights report in layman's terms."""
         print("\n" + "=" * 60)
         print("COMPREHENSIVE INSIGHTS REPORT")
         print("=" * 60)
         insights = []
         # EDA Insights
+        if "eda" in self.results and self.results["eda"] is not None:
             insights.append("EXPLORATORY DATA ANALYSIS INSIGHTS:")
             insights.append("-" * 40)
             # Correlation insights
+            pearson_corr = self.results["eda"]["pearson_corr"]
             high_corr_pairs = []
             for i in range(len(pearson_corr.columns)):
+                for j in range(i + 1, len(pearson_corr.columns)):
                     corr_val = pearson_corr.iloc[i, j]
                     if abs(corr_val) > 0.7:
+                        high_corr_pairs.append(
+                            (pearson_corr.columns[i], pearson_corr.columns[j], corr_val)
+                        )
             if high_corr_pairs:
                 insights.append("Strong correlations found:")
                 for var1, var2, corr in high_corr_pairs:
                     insights.append(f"  • {var1} and {var2}: {corr:.3f}")
             else:
+                insights.append(
+                    "No strong correlations (>0.7) found between variables."
+                )
         else:
             insights.append("EDA could not be performed or returned no results.")
         # Regression Insights
+        if "regression" in self.results and self.results["regression"] is not None:
             insights.append("\nREGRESSION MODEL INSIGHTS:")
             insights.append("-" * 40)
+            reg_results = self.results["regression"]
+            r2_test = reg_results["performance"]["r2_test"]
             insights.append(f"Model Performance:")
+            insights.append(
+                f"  • The model explains {r2_test:.1%} of the variation in the target variable"
+            )
             if r2_test > 0.7:
                 insights.append("  • This is considered a good model fit")
             elif r2_test > 0.5:
             else:
                 insights.append("  • This model has limited predictive power")
             # Assumption insights
+            assumptions = reg_results["assumptions"]
+            if assumptions["normality_p"] > 0.05:
+                insights.append(
+                    "  • Residuals are normally distributed (assumption met)"
+                )
             else:
+                insights.append(
+                    "  • Residuals are not normally distributed (assumption violated)"
+                )
         else:
+            insights.append(
+                "Regression modeling could not be performed or returned no results."
+            )
         # Clustering Insights
+        if "clustering" in self.results and self.results["clustering"] is not None:
             insights.append("\nCLUSTERING INSIGHTS:")
             insights.append("-" * 40)
+            cluster_results = self.results["clustering"]
+            optimal_k = cluster_results["optimal_k"]
+            silhouette_score = cluster_results["silhouette_score"]
             insights.append(f"Optimal number of clusters: {optimal_k}")
             insights.append(f"Cluster quality score: {silhouette_score:.3f}")
             if silhouette_score > 0.5:
         else:
             insights.append("Clustering could not be performed or returned no results.")
         # Time Series Insights
+        if "time_series" in self.results and self.results["time_series"] is not None:
             insights.append("\nTIME SERIES INSIGHTS:")
             insights.append("-" * 40)
+            insights.append(
+                "  • Time series decomposition shows trend, seasonality, and random components"
+            )
+            insights.append(
+                "  • ARIMA model provides future forecasts with confidence intervals"
+            )
+            insights.append(
+                "  • Forecasts can be used for planning and decision-making"
+            )
         else:
+            insights.append(
+                "Time series analysis could not be performed or returned no results."
+            )
         # Print insights
         for insight in insights:
             print(insight)
         # Save insights to file
+        with open("data/exports/insights_report.txt", "w") as f:
+            f.write("\n".join(insights))
         return insights
     def run_complete_analysis(self):
         """Run the complete advanced analytics workflow."""
         print("Starting comprehensive advanced analytics...")
         # 1. EDA
         self.perform_eda()
         # 2. Dimensionality reduction
         self.perform_dimensionality_reduction()
         # 3. Statistical modeling
         self.perform_statistical_modeling()
         # 4. Clustering
         self.perform_clustering()
         # 5. Time series analysis
         self.perform_time_series_analysis()
         # 6. Generate insights
         self.generate_insights_report()
         print("\n" + "=" * 60)
         print("ANALYSIS COMPLETE!")
         print("=" * 60)
         print("Check the following outputs:")
         print("  • data/exports/insights_report.txt - Comprehensive insights")
         print("  • data/exports/clustering_analysis.png - Clustering results")
+        print(
+            "  • data/exports/time_series_decomposition.png - Time series decomposition"
+        )
         print("  • data/exports/time_series_forecast.png - Time series forecast")
+        return self.results

src/analysis/economic_analyzer.py CHANGED Viewed

@@ -4,198 +4,215 @@ Quick Start Guide for FRED Economic Data Analysis
 Demonstrates how to load and analyze the collected data
 """
-import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
-import sys
-import os
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from core.fred_client import FREDDataCollectorV2
 from datetime import datetime, timedelta
 def load_latest_data():
     """Load the most recent data file."""
-    import os
     import glob
     # Find the most recent data file
-    data_files = glob.glob('data/fred_economic_data_*.csv')
     if not data_files:
         print("No data files found. Run the collector first.")
         return None
     latest_file = max(data_files, key=os.path.getctime)
     print(f"Loading data from: {latest_file}")
     df = pd.read_csv(latest_file, index_col=0, parse_dates=True)
     return df
 def analyze_gdp_trends(df):
     """Analyze GDP trends."""
     print("\n=== GDP Analysis ===")
-    if 'GDP' not in df.columns:
         print("GDP data not available")
         return
-    gdp_data = df['GDP'].dropna()
     print(f"GDP Data Points: {len(gdp_data)}")
     print(f"Date Range: {gdp_data.index.min()} to {gdp_data.index.max()}")
     print(f"Latest GDP: ${gdp_data.iloc[-1]:,.2f} billion")
-    print(f"GDP Growth (last 5 years): {((gdp_data.iloc[-1] / gdp_data.iloc[-20]) - 1) * 100:.2f}%")
     # Plot GDP trend
     plt.figure(figsize=(12, 6))
     gdp_data.plot(linewidth=2)
-    plt.title('US GDP Over Time')
-    plt.ylabel('GDP (Billions of Dollars)')
     plt.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def analyze_unemployment(df):
     """Analyze unemployment trends."""
     print("\n=== Unemployment Analysis ===")
-    if 'UNRATE' not in df.columns:
         print("Unemployment data not available")
         return
-    unrate_data = df['UNRATE'].dropna()
     print(f"Unemployment Data Points: {len(unrate_data)}")
     print(f"Current Unemployment Rate: {unrate_data.iloc[-1]:.1f}%")
     print(f"Average Unemployment Rate: {unrate_data.mean():.1f}%")
     print(f"Lowest Rate: {unrate_data.min():.1f}%")
     print(f"Highest Rate: {unrate_data.max():.1f}%")
     # Plot unemployment trend
     plt.figure(figsize=(12, 6))
-    unrate_data.plot(linewidth=2, color='red')
-    plt.title('US Unemployment Rate Over Time')
-    plt.ylabel('Unemployment Rate (%)')
     plt.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def analyze_inflation(df):
     """Analyze inflation trends using CPI."""
     print("\n=== Inflation Analysis (CPI) ===")
-    if 'CPIAUCSL' not in df.columns:
         print("CPI data not available")
         return
-    cpi_data = df['CPIAUCSL'].dropna()
     # Calculate year-over-year inflation
     cpi_yoy = cpi_data.pct_change(periods=12) * 100
     print(f"CPI Data Points: {len(cpi_data)}")
     print(f"Current CPI: {cpi_data.iloc[-1]:.2f}")
     print(f"Current YoY Inflation: {cpi_yoy.iloc[-1]:.2f}%")
     print(f"Average YoY Inflation: {cpi_yoy.mean():.2f}%")
     # Plot inflation trend
     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
-    cpi_data.plot(ax=ax1, linewidth=2, color='green')
-    ax1.set_title('Consumer Price Index (CPI)')
-    ax1.set_ylabel('CPI')
     ax1.grid(True, alpha=0.3)
-    cpi_yoy.plot(ax=ax2, linewidth=2, color='orange')
-    ax2.set_title('Year-over-Year Inflation Rate')
-    ax2.set_ylabel('Inflation Rate (%)')
     ax2.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def analyze_interest_rates(df):
     """Analyze interest rate trends."""
     print("\n=== Interest Rate Analysis ===")
     rates_data = {}
-    if 'FEDFUNDS' in df.columns:
-        rates_data['Federal Funds Rate'] = df['FEDFUNDS'].dropna()
-    if 'DGS10' in df.columns:
-        rates_data['10-Year Treasury'] = df['DGS10'].dropna()
     if not rates_data:
         print("No interest rate data available")
         return
     for name, data in rates_data.items():
         print(f"\n{name}:")
         print(f"  Current Rate: {data.iloc[-1]:.2f}%")
         print(f"  Average Rate: {data.mean():.2f}%")
         print(f"  Range: {data.min():.2f}% - {data.max():.2f}%")
     # Plot interest rates
     plt.figure(figsize=(12, 6))
     for name, data in rates_data.items():
         data.plot(linewidth=2, label=name)
-    plt.title('Interest Rates Over Time')
-    plt.ylabel('Interest Rate (%)')
     plt.legend()
     plt.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def correlation_analysis(df):
     """Analyze correlations between economic indicators."""
     print("\n=== Correlation Analysis ===")
     # Select available indicators
-    available_cols = [col for col in ['GDP', 'UNRATE', 'CPIAUCSL', 'FEDFUNDS', 'DGS10']
-                     if col in df.columns]
     if len(available_cols) < 2:
         print("Need at least 2 indicators for correlation analysis")
         return
     # Calculate correlations
     corr_data = df[available_cols].corr()
     print("Correlation Matrix:")
     print(corr_data.round(3))
     # Plot correlation heatmap
     plt.figure(figsize=(8, 6))
-    sns.heatmap(corr_data, annot=True, cmap='coolwarm', center=0,
-                square=True, linewidths=0.5)
-    plt.title('Economic Indicators Correlation Matrix')
     plt.tight_layout()
     plt.show()
 def main():
     """Run the quick start analysis."""
     print("FRED Economic Data - Quick Start Analysis")
     print("=" * 50)
     # Load data
     df = load_latest_data()
     if df is None:
         return
     print(f"Data loaded successfully!")
     print(f"Shape: {df.shape}")
     print(f"Columns: {list(df.columns)}")
     print(f"Date range: {df.index.min()} to {df.index.max()}")
     # Run analyses
     analyze_gdp_trends(df)
     analyze_unemployment(df)
     analyze_inflation(df)
     analyze_interest_rates(df)
     correlation_analysis(df)
     print("\n=== Analysis Complete ===")
     print("Check the generated plots for visual insights!")
 if __name__ == "__main__":
-    main()

 Demonstrates how to load and analyze the collected data
 """
+import os
+import sys
 import matplotlib.pyplot as plt
+import pandas as pd
 import seaborn as sns
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from datetime import datetime, timedelta
+from core.fred_client import FREDDataCollectorV2
 def load_latest_data():
     """Load the most recent data file."""
     import glob
+    import os
     # Find the most recent data file
+    data_files = glob.glob("data/fred_economic_data_*.csv")
     if not data_files:
         print("No data files found. Run the collector first.")
         return None
     latest_file = max(data_files, key=os.path.getctime)
     print(f"Loading data from: {latest_file}")
     df = pd.read_csv(latest_file, index_col=0, parse_dates=True)
     return df
 def analyze_gdp_trends(df):
     """Analyze GDP trends."""
     print("\n=== GDP Analysis ===")
+    if "GDP" not in df.columns:
         print("GDP data not available")
         return
+    gdp_data = df["GDP"].dropna()
     print(f"GDP Data Points: {len(gdp_data)}")
     print(f"Date Range: {gdp_data.index.min()} to {gdp_data.index.max()}")
     print(f"Latest GDP: ${gdp_data.iloc[-1]:,.2f} billion")
+    print(
+        f"GDP Growth (last 5 years): {((gdp_data.iloc[-1] / gdp_data.iloc[-20]) - 1) * 100:.2f}%"
+    )
     # Plot GDP trend
     plt.figure(figsize=(12, 6))
     gdp_data.plot(linewidth=2)
+    plt.title("US GDP Over Time")
+    plt.ylabel("GDP (Billions of Dollars)")
     plt.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def analyze_unemployment(df):
     """Analyze unemployment trends."""
     print("\n=== Unemployment Analysis ===")
+    if "UNRATE" not in df.columns:
         print("Unemployment data not available")
         return
+    unrate_data = df["UNRATE"].dropna()
     print(f"Unemployment Data Points: {len(unrate_data)}")
     print(f"Current Unemployment Rate: {unrate_data.iloc[-1]:.1f}%")
     print(f"Average Unemployment Rate: {unrate_data.mean():.1f}%")
     print(f"Lowest Rate: {unrate_data.min():.1f}%")
     print(f"Highest Rate: {unrate_data.max():.1f}%")
     # Plot unemployment trend
     plt.figure(figsize=(12, 6))
+    unrate_data.plot(linewidth=2, color="red")
+    plt.title("US Unemployment Rate Over Time")
+    plt.ylabel("Unemployment Rate (%)")
     plt.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def analyze_inflation(df):
     """Analyze inflation trends using CPI."""
     print("\n=== Inflation Analysis (CPI) ===")
+    if "CPIAUCSL" not in df.columns:
         print("CPI data not available")
         return
+    cpi_data = df["CPIAUCSL"].dropna()
     # Calculate year-over-year inflation
     cpi_yoy = cpi_data.pct_change(periods=12) * 100
     print(f"CPI Data Points: {len(cpi_data)}")
     print(f"Current CPI: {cpi_data.iloc[-1]:.2f}")
     print(f"Current YoY Inflation: {cpi_yoy.iloc[-1]:.2f}%")
     print(f"Average YoY Inflation: {cpi_yoy.mean():.2f}%")
     # Plot inflation trend
     fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
+    cpi_data.plot(ax=ax1, linewidth=2, color="green")
+    ax1.set_title("Consumer Price Index (CPI)")
+    ax1.set_ylabel("CPI")
     ax1.grid(True, alpha=0.3)
+    cpi_yoy.plot(ax=ax2, linewidth=2, color="orange")
+    ax2.set_title("Year-over-Year Inflation Rate")
+    ax2.set_ylabel("Inflation Rate (%)")
     ax2.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def analyze_interest_rates(df):
     """Analyze interest rate trends."""
     print("\n=== Interest Rate Analysis ===")
     rates_data = {}
+    if "FEDFUNDS" in df.columns:
+        rates_data["Federal Funds Rate"] = df["FEDFUNDS"].dropna()
+    if "DGS10" in df.columns:
+        rates_data["10-Year Treasury"] = df["DGS10"].dropna()
     if not rates_data:
         print("No interest rate data available")
         return
     for name, data in rates_data.items():
         print(f"\n{name}:")
         print(f"  Current Rate: {data.iloc[-1]:.2f}%")
         print(f"  Average Rate: {data.mean():.2f}%")
         print(f"  Range: {data.min():.2f}% - {data.max():.2f}%")
     # Plot interest rates
     plt.figure(figsize=(12, 6))
     for name, data in rates_data.items():
         data.plot(linewidth=2, label=name)
+    plt.title("Interest Rates Over Time")
+    plt.ylabel("Interest Rate (%)")
     plt.legend()
     plt.grid(True, alpha=0.3)
     plt.tight_layout()
     plt.show()
 def correlation_analysis(df):
     """Analyze correlations between economic indicators."""
     print("\n=== Correlation Analysis ===")
     # Select available indicators
+    available_cols = [
+        col
+        for col in ["GDP", "UNRATE", "CPIAUCSL", "FEDFUNDS", "DGS10"]
+        if col in df.columns
+    ]
     if len(available_cols) < 2:
         print("Need at least 2 indicators for correlation analysis")
         return
     # Calculate correlations
     corr_data = df[available_cols].corr()
     print("Correlation Matrix:")
     print(corr_data.round(3))
     # Plot correlation heatmap
     plt.figure(figsize=(8, 6))
+    sns.heatmap(
+        corr_data, annot=True, cmap="coolwarm", center=0, square=True, linewidths=0.5
+    )
+    plt.title("Economic Indicators Correlation Matrix")
     plt.tight_layout()
     plt.show()
 def main():
     """Run the quick start analysis."""
     print("FRED Economic Data - Quick Start Analysis")
     print("=" * 50)
     # Load data
     df = load_latest_data()
     if df is None:
         return
     print(f"Data loaded successfully!")
     print(f"Shape: {df.shape}")
     print(f"Columns: {list(df.columns)}")
     print(f"Date range: {df.index.min()} to {df.index.max()}")
     # Run analyses
     analyze_gdp_trends(df)
     analyze_unemployment(df)
     analyze_inflation(df)
     analyze_interest_rates(df)
     correlation_analysis(df)
     print("\n=== Analysis Complete ===")
     print("Check the generated plots for visual insights!")
 if __name__ == "__main__":
+    main()

src/core/__init__.py CHANGED Viewed

@@ -4,4 +4,4 @@ Core functionality for FRED data collection and processing.
 from .fred_client import FREDDataCollectorV2
-__all__ = ['FREDDataCollectorV2']


4
5	from .fred_client import FREDDataCollectorV2
6
7	+ __all__ = ["FREDDataCollectorV2"]

src/core/__pycache__/__init__.cpython-39.pyc CHANGED Viewed

Binary files a/src/core/__pycache__/__init__.cpython-39.pyc and b/src/core/__pycache__/__init__.cpython-39.pyc differ

src/core/__pycache__/fred_client.cpython-39.pyc CHANGED Viewed

Binary files a/src/core/__pycache__/fred_client.cpython-39.pyc and b/src/core/__pycache__/fred_client.cpython-39.pyc differ

src/core/base_pipeline.py CHANGED Viewed

@@ -1,38 +1,38 @@
 import abc
 import logging
-import yaml
 import os
 class BasePipeline(abc.ABC):
     """
     Abstract base class for all data pipelines.
     Handles config loading, logging, and pipeline orchestration.
     """
     def __init__(self, config_path: str):
         self.config = self.load_config(config_path)
         self.logger = self.setup_logger()
     @staticmethod
     def load_config(config_path: str):
-        with open(config_path, 'r') as f:
             return yaml.safe_load(f)
     def setup_logger(self):
-        log_cfg = self.config.get('logging', {})
-        log_level = getattr(logging, log_cfg.get('level', 'INFO').upper(), logging.INFO)
-        log_file = log_cfg.get('file', 'pipeline.log')
         os.makedirs(os.path.dirname(log_file), exist_ok=True)
         logging.basicConfig(
             level=log_level,
-            format='%(asctime)s %(levelname)s %(name)s %(message)s',
-            handlers=[
-                logging.FileHandler(log_file),
-                logging.StreamHandler()
-            ]
         )
         return logging.getLogger(self.__class__.__name__)
     @abc.abstractmethod
     def run(self):
         """Run the pipeline (to be implemented by subclasses)."""
-        pass

 import abc
 import logging
 import os
+import yaml
 class BasePipeline(abc.ABC):
     """
     Abstract base class for all data pipelines.
     Handles config loading, logging, and pipeline orchestration.
     """
     def __init__(self, config_path: str):
         self.config = self.load_config(config_path)
         self.logger = self.setup_logger()
     @staticmethod
     def load_config(config_path: str):
+        with open(config_path, "r") as f:
             return yaml.safe_load(f)
     def setup_logger(self):
+        log_cfg = self.config.get("logging", {})
+        log_level = getattr(logging, log_cfg.get("level", "INFO").upper(), logging.INFO)
+        log_file = log_cfg.get("file", "pipeline.log")
         os.makedirs(os.path.dirname(log_file), exist_ok=True)
         logging.basicConfig(
             level=log_level,
+            format="%(asctime)s %(levelname)s %(name)s %(message)s",
+            handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
         )
         return logging.getLogger(self.__class__.__name__)
     @abc.abstractmethod
     def run(self):
         """Run the pipeline (to be implemented by subclasses)."""
+        pass

src/core/fred_client.py CHANGED Viewed

@@ -6,283 +6,298 @@ using direct API calls instead of the fredapi library
 """
 import os
-import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
-import seaborn as sns
 import requests
-from datetime import datetime, timedelta
-import warnings
-warnings.filterwarnings('ignore')
-import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
-from config.settings import FRED_API_KEY, DEFAULT_START_DATE, DEFAULT_END_DATE, OUTPUT_DIR, PLOTS_DIR
 class FREDDataCollectorV2:
     def __init__(self, api_key=None):
         """Initialize the FRED data collector with API key."""
         self.api_key = api_key or FRED_API_KEY
         self.base_url = "https://api.stlouisfed.org/fred"
         # Create output directories
         os.makedirs(OUTPUT_DIR, exist_ok=True)
         os.makedirs(PLOTS_DIR, exist_ok=True)
         # Common economic indicators
         self.indicators = {
-            'GDP': 'GDP',  # Gross Domestic Product
-            'UNRATE': 'UNRATE',  # Unemployment Rate
-            'CPIAUCSL': 'CPIAUCSL',  # Consumer Price Index
-            'FEDFUNDS': 'FEDFUNDS',  # Federal Funds Rate
-            'DGS10': 'DGS10',  # 10-Year Treasury Rate
-            'DEXUSEU': 'DEXUSEU',  # US/Euro Exchange Rate
-            'PAYEMS': 'PAYEMS',  # Total Nonfarm Payrolls
-            'INDPRO': 'INDPRO',  # Industrial Production
-            'M2SL': 'M2SL',  # M2 Money Stock
-            'PCE': 'PCE'  # Personal Consumption Expenditures
         }
     def get_series_info(self, series_id):
         """Get information about a FRED series."""
         try:
             url = f"{self.base_url}/series"
             params = {
-                'series_id': series_id,
-                'api_key': self.api_key,
-                'file_type': 'json'
             }
             response = requests.get(url, params=params)
             if response.status_code == 200:
                 data = response.json()
-                series = data.get('seriess', [])
                 if series:
                     s = series[0]
                     return {
-                        'id': s['id'],
-                        'title': s['title'],
-                        'units': s.get('units', ''),
-                        'frequency': s.get('frequency', ''),
-                        'last_updated': s.get('last_updated', ''),
-                        'notes': s.get('notes', '')
                     }
             return None
         except Exception as e:
             print(f"Error getting info for {series_id}: {e}")
             return None
     def get_economic_data(self, series_ids, start_date=None, end_date=None):
         """Fetch economic data for specified series."""
         start_date = start_date or DEFAULT_START_DATE
         end_date = end_date or DEFAULT_END_DATE
         data = {}
         for series_id in series_ids:
             try:
                 print(f"Fetching data for {series_id}...")
                 url = f"{self.base_url}/series/observations"
                 params = {
-                    'series_id': series_id,
-                    'api_key': self.api_key,
-                    'file_type': 'json',
-                    'start_date': start_date,
-                    'end_date': end_date
                 }
                 response = requests.get(url, params=params)
                 if response.status_code == 200:
                     response_data = response.json()
-                    observations = response_data.get('observations', [])
                     if observations:
                         # Convert to pandas Series
                         dates = []
                         values = []
                         for obs in observations:
                             try:
-                                date = pd.to_datetime(obs['date'])
-                                value = float(obs['value']) if obs['value'] != '.' else np.nan
                                 dates.append(date)
                                 values.append(value)
                             except (ValueError, KeyError):
                                 continue
                         if dates and values:
                             series_data = pd.Series(values, index=dates, name=series_id)
                             data[series_id] = series_data
-                            print(f"✓ Retrieved {len(series_data)} observations for {series_id}")
                         else:
                             print(f"✗ No valid data for {series_id}")
                     else:
                         print(f"✗ No observations found for {series_id}")
                 else:
                     print(f"✗ Error fetching {series_id}: HTTP {response.status_code}")
             except Exception as e:
                 print(f"✗ Error fetching {series_id}: {e}")
         return data
     def create_dataframe(self, data_dict):
         """Convert dictionary of series data to a pandas DataFrame."""
         if not data_dict:
             return pd.DataFrame()
         # Find the common date range
         all_dates = set()
         for series in data_dict.values():
             all_dates.update(series.index)
         # Create a complete date range
         if all_dates:
-            date_range = pd.date_range(min(all_dates), max(all_dates), freq='D')
             df = pd.DataFrame(index=date_range)
             # Add each series
             for series_id, series_data in data_dict.items():
                 df[series_id] = series_data
-            df.index.name = 'Date'
             return df
         return pd.DataFrame()
     def save_data(self, df, filename):
         """Save data to CSV file."""
         if df.empty:
             print("No data to save")
             return None
         filepath = os.path.join(OUTPUT_DIR, filename)
         df.to_csv(filepath)
         print(f"Data saved to {filepath}")
         return filepath
     def plot_economic_indicators(self, df, indicators_to_plot=None):
         """Create plots for economic indicators."""
         if df.empty:
             print("No data to plot")
             return
         if indicators_to_plot is None:
             indicators_to_plot = [col for col in df.columns if col in df.columns]
         if not indicators_to_plot:
             print("No indicators to plot")
             return
         # Set up the plotting style
-        plt.style.use('default')
         sns.set_palette("husl")
         # Create subplots
         n_indicators = len(indicators_to_plot)
-        fig, axes = plt.subplots(n_indicators, 1, figsize=(15, 4*n_indicators))
         if n_indicators == 1:
             axes = [axes]
         for i, indicator in enumerate(indicators_to_plot):
             if indicator in df.columns:
                 ax = axes[i]
                 df[indicator].dropna().plot(ax=ax, linewidth=2)
                 # Get series info for title
                 info = self.get_series_info(indicator)
                 title = f'{indicator} - {info["title"]}' if info else indicator
                 ax.set_title(title)
-                ax.set_ylabel('Value')
                 ax.grid(True, alpha=0.3)
         plt.tight_layout()
-        plot_path = os.path.join(PLOTS_DIR, 'economic_indicators.png')
-        plt.savefig(plot_path, dpi=300, bbox_inches='tight')
         plt.show()
         print(f"Plot saved to {plot_path}")
     def generate_summary_statistics(self, df):
         """Generate summary statistics for the economic data."""
         if df.empty:
             return pd.DataFrame()
         summary = df.describe()
         # Add additional statistics
-        summary.loc['missing_values'] = df.isnull().sum()
-        summary.loc['missing_percentage'] = (df.isnull().sum() / len(df)) * 100
         return summary
     def run_analysis(self, series_ids=None, start_date=None, end_date=None):
         """Run a complete analysis of economic indicators."""
         if series_ids is None:
             series_ids = list(self.indicators.values())
         print("=== FRED Economic Data Analysis v2 ===")
         print(f"API Key: {self.api_key[:8]}...")
-        print(f"Date Range: {start_date or DEFAULT_START_DATE} to {end_date or DEFAULT_END_DATE}")
         print(f"Series to analyze: {series_ids}")
         print("=" * 50)
         # Fetch data
         data = self.get_economic_data(series_ids, start_date, end_date)
         if not data:
             print("No data retrieved. Please check your API key and series IDs.")
             return None, None
         # Create DataFrame
         df = self.create_dataframe(data)
         if df.empty:
             print("No data to analyze")
             return None, None
         # Save data
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        self.save_data(df, f'fred_economic_data_{timestamp}.csv')
         # Generate summary statistics
         summary = self.generate_summary_statistics(df)
         print("\n=== Summary Statistics ===")
         print(summary)
         # Create plots
         print("\n=== Creating Visualizations ===")
         self.plot_economic_indicators(df)
         return df, summary
 def main():
     """Main function to run the FRED data analysis."""
     collector = FREDDataCollectorV2()
     # Example: Analyze key economic indicators
-    key_indicators = ['GDP', 'UNRATE', 'CPIAUCSL', 'FEDFUNDS', 'DGS10']
     try:
         df, summary = collector.run_analysis(series_ids=key_indicators)
         if df is not None:
             print("\n=== Analysis Complete ===")
             print(f"Data shape: {df.shape}")
             print(f"Date range: {df.index.min()} to {df.index.max()}")
         else:
             print("\n=== Analysis Failed ===")
     except Exception as e:
         print(f"Error during analysis: {e}")
 if __name__ == "__main__":
-    main()

 """
 import os
+import warnings
+from datetime import datetime, timedelta
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 import requests
+import seaborn as sns
+warnings.filterwarnings("ignore")
 import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
+from config.settings import (DEFAULT_END_DATE, DEFAULT_START_DATE,
+                             FRED_API_KEY, OUTPUT_DIR, PLOTS_DIR)
 class FREDDataCollectorV2:
     def __init__(self, api_key=None):
         """Initialize the FRED data collector with API key."""
         self.api_key = api_key or FRED_API_KEY
         self.base_url = "https://api.stlouisfed.org/fred"
         # Create output directories
         os.makedirs(OUTPUT_DIR, exist_ok=True)
         os.makedirs(PLOTS_DIR, exist_ok=True)
         # Common economic indicators
         self.indicators = {
+            "GDP": "GDP",  # Gross Domestic Product
+            "UNRATE": "UNRATE",  # Unemployment Rate
+            "CPIAUCSL": "CPIAUCSL",  # Consumer Price Index
+            "FEDFUNDS": "FEDFUNDS",  # Federal Funds Rate
+            "DGS10": "DGS10",  # 10-Year Treasury Rate
+            "DEXUSEU": "DEXUSEU",  # US/Euro Exchange Rate
+            "PAYEMS": "PAYEMS",  # Total Nonfarm Payrolls
+            "INDPRO": "INDPRO",  # Industrial Production
+            "M2SL": "M2SL",  # M2 Money Stock
+            "PCE": "PCE",  # Personal Consumption Expenditures
         }
     def get_series_info(self, series_id):
         """Get information about a FRED series."""
         try:
             url = f"{self.base_url}/series"
             params = {
+                "series_id": series_id,
+                "api_key": self.api_key,
+                "file_type": "json",
             }
             response = requests.get(url, params=params)
             if response.status_code == 200:
                 data = response.json()
+                series = data.get("seriess", [])
                 if series:
                     s = series[0]
                     return {
+                        "id": s["id"],
+                        "title": s["title"],
+                        "units": s.get("units", ""),
+                        "frequency": s.get("frequency", ""),
+                        "last_updated": s.get("last_updated", ""),
+                        "notes": s.get("notes", ""),
                     }
             return None
         except Exception as e:
             print(f"Error getting info for {series_id}: {e}")
             return None
     def get_economic_data(self, series_ids, start_date=None, end_date=None):
         """Fetch economic data for specified series."""
         start_date = start_date or DEFAULT_START_DATE
         end_date = end_date or DEFAULT_END_DATE
         data = {}
         for series_id in series_ids:
             try:
                 print(f"Fetching data for {series_id}...")
                 url = f"{self.base_url}/series/observations"
                 params = {
+                    "series_id": series_id,
+                    "api_key": self.api_key,
+                    "file_type": "json",
+                    "start_date": start_date,
+                    "end_date": end_date,
                 }
                 response = requests.get(url, params=params)
                 if response.status_code == 200:
                     response_data = response.json()
+                    observations = response_data.get("observations", [])
                     if observations:
                         # Convert to pandas Series
                         dates = []
                         values = []
                         for obs in observations:
                             try:
+                                date = pd.to_datetime(obs["date"])
+                                value = (
+                                    float(obs["value"])
+                                    if obs["value"] != "."
+                                    else np.nan
+                                )
                                 dates.append(date)
                                 values.append(value)
                             except (ValueError, KeyError):
                                 continue
                         if dates and values:
                             series_data = pd.Series(values, index=dates, name=series_id)
                             data[series_id] = series_data
+                            print(
+                                f"✓ Retrieved {len(series_data)} observations for {series_id}"
+                            )
                         else:
                             print(f"✗ No valid data for {series_id}")
                     else:
                         print(f"✗ No observations found for {series_id}")
                 else:
                     print(f"✗ Error fetching {series_id}: HTTP {response.status_code}")
             except Exception as e:
                 print(f"✗ Error fetching {series_id}: {e}")
         return data
     def create_dataframe(self, data_dict):
         """Convert dictionary of series data to a pandas DataFrame."""
         if not data_dict:
             return pd.DataFrame()
         # Find the common date range
         all_dates = set()
         for series in data_dict.values():
             all_dates.update(series.index)
         # Create a complete date range
         if all_dates:
+            date_range = pd.date_range(min(all_dates), max(all_dates), freq="D")
             df = pd.DataFrame(index=date_range)
             # Add each series
             for series_id, series_data in data_dict.items():
                 df[series_id] = series_data
+            df.index.name = "Date"
             return df
         return pd.DataFrame()
     def save_data(self, df, filename):
         """Save data to CSV file."""
         if df.empty:
             print("No data to save")
             return None
         filepath = os.path.join(OUTPUT_DIR, filename)
         df.to_csv(filepath)
         print(f"Data saved to {filepath}")
         return filepath
     def plot_economic_indicators(self, df, indicators_to_plot=None):
         """Create plots for economic indicators."""
         if df.empty:
             print("No data to plot")
             return
         if indicators_to_plot is None:
             indicators_to_plot = [col for col in df.columns if col in df.columns]
         if not indicators_to_plot:
             print("No indicators to plot")
             return
         # Set up the plotting style
+        plt.style.use("default")
         sns.set_palette("husl")
         # Create subplots
         n_indicators = len(indicators_to_plot)
+        fig, axes = plt.subplots(n_indicators, 1, figsize=(15, 4 * n_indicators))
         if n_indicators == 1:
             axes = [axes]
         for i, indicator in enumerate(indicators_to_plot):
             if indicator in df.columns:
                 ax = axes[i]
                 df[indicator].dropna().plot(ax=ax, linewidth=2)
                 # Get series info for title
                 info = self.get_series_info(indicator)
                 title = f'{indicator} - {info["title"]}' if info else indicator
                 ax.set_title(title)
+                ax.set_ylabel("Value")
                 ax.grid(True, alpha=0.3)
         plt.tight_layout()
+        plot_path = os.path.join(PLOTS_DIR, "economic_indicators.png")
+        plt.savefig(plot_path, dpi=300, bbox_inches="tight")
         plt.show()
         print(f"Plot saved to {plot_path}")
     def generate_summary_statistics(self, df):
         """Generate summary statistics for the economic data."""
         if df.empty:
             return pd.DataFrame()
         summary = df.describe()
         # Add additional statistics
+        summary.loc["missing_values"] = df.isnull().sum()
+        summary.loc["missing_percentage"] = (df.isnull().sum() / len(df)) * 100
         return summary
     def run_analysis(self, series_ids=None, start_date=None, end_date=None):
         """Run a complete analysis of economic indicators."""
         if series_ids is None:
             series_ids = list(self.indicators.values())
         print("=== FRED Economic Data Analysis v2 ===")
         print(f"API Key: {self.api_key[:8]}...")
+        print(
+            f"Date Range: {start_date or DEFAULT_START_DATE} to {end_date or DEFAULT_END_DATE}"
+        )
         print(f"Series to analyze: {series_ids}")
         print("=" * 50)
         # Fetch data
         data = self.get_economic_data(series_ids, start_date, end_date)
         if not data:
             print("No data retrieved. Please check your API key and series IDs.")
             return None, None
         # Create DataFrame
         df = self.create_dataframe(data)
         if df.empty:
             print("No data to analyze")
             return None, None
         # Save data
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.save_data(df, f"fred_economic_data_{timestamp}.csv")
         # Generate summary statistics
         summary = self.generate_summary_statistics(df)
         print("\n=== Summary Statistics ===")
         print(summary)
         # Create plots
         print("\n=== Creating Visualizations ===")
         self.plot_economic_indicators(df)
         return df, summary
 def main():
     """Main function to run the FRED data analysis."""
     collector = FREDDataCollectorV2()
     # Example: Analyze key economic indicators
+    key_indicators = ["GDP", "UNRATE", "CPIAUCSL", "FEDFUNDS", "DGS10"]
     try:
         df, summary = collector.run_analysis(series_ids=key_indicators)
         if df is not None:
             print("\n=== Analysis Complete ===")
             print(f"Data shape: {df.shape}")
             print(f"Date range: {df.index.min()} to {df.index.max()}")
         else:
             print("\n=== Analysis Failed ===")
     except Exception as e:
         print(f"Error during analysis: {e}")
 if __name__ == "__main__":
+    main()

src/core/fred_pipeline.py CHANGED Viewed

@@ -1,22 +1,26 @@
-from .base_pipeline import BasePipeline
-import requests
-import pandas as pd
 import os
 from datetime import datetime
 class FREDPipeline(BasePipeline):
     """
     FRED Data Pipeline: Extracts, transforms, and loads FRED data using config.
     """
     def __init__(self, config_path: str):
         super().__init__(config_path)
-        self.fred_cfg = self.config['fred']
-        self.api_key = self.fred_cfg['api_key']
-        self.series = self.fred_cfg['series']
-        self.start_date = self.fred_cfg['start_date']
-        self.end_date = self.fred_cfg['end_date']
-        self.output_dir = self.fred_cfg['output_dir']
-        self.export_dir = self.fred_cfg['export_dir']
         os.makedirs(self.output_dir, exist_ok=True)
         os.makedirs(self.export_dir, exist_ok=True)
@@ -26,21 +30,21 @@ class FREDPipeline(BasePipeline):
         data = {}
         for series_id in self.series:
             params = {
-                'series_id': series_id,
-                'api_key': self.api_key,
-                'file_type': 'json',
-                'start_date': self.start_date,
-                'end_date': self.end_date
             }
             try:
                 resp = requests.get(base_url, params=params)
                 resp.raise_for_status()
-                obs = resp.json().get('observations', [])
                 dates, values = [], []
                 for o in obs:
                     try:
-                        dates.append(pd.to_datetime(o['date']))
-                        values.append(float(o['value']) if o['value'] != '.' else None)
                     except Exception:
                         continue
                 data[series_id] = pd.Series(values, index=dates, name=series_id)
@@ -59,11 +63,11 @@ class FREDPipeline(BasePipeline):
             all_dates.update(s.index)
         if not all_dates:
             return pd.DataFrame()
-        date_range = pd.date_range(min(all_dates), max(all_dates), freq='D')
         df = pd.DataFrame(index=date_range)
         for k, v in data.items():
             df[k] = v
-        df.index.name = 'Date'
         self.logger.info(f"Transformed data to DataFrame with shape {df.shape}")
         return df
@@ -73,8 +77,8 @@ class FREDPipeline(BasePipeline):
             self.logger.warning("No data to load.")
             return None
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
-        out_path = os.path.join(self.output_dir, f'fred_data_{ts}.csv')
-        exp_path = os.path.join(self.export_dir, f'fred_data_{ts}.csv')
         df.to_csv(out_path)
         df.to_csv(exp_path)
         self.logger.info(f"Saved data to {out_path} and {exp_path}")
@@ -85,4 +89,4 @@ class FREDPipeline(BasePipeline):
         data = self.extract()
         df = self.transform(data)
         self.load(df)
-        self.logger.info("FRED data pipeline run complete.")

 import os
 from datetime import datetime
+import pandas as pd
+import requests
+from .base_pipeline import BasePipeline
 class FREDPipeline(BasePipeline):
     """
     FRED Data Pipeline: Extracts, transforms, and loads FRED data using config.
     """
     def __init__(self, config_path: str):
         super().__init__(config_path)
+        self.fred_cfg = self.config["fred"]
+        self.api_key = self.fred_cfg["api_key"]
+        self.series = self.fred_cfg["series"]
+        self.start_date = self.fred_cfg["start_date"]
+        self.end_date = self.fred_cfg["end_date"]
+        self.output_dir = self.fred_cfg["output_dir"]
+        self.export_dir = self.fred_cfg["export_dir"]
         os.makedirs(self.output_dir, exist_ok=True)
         os.makedirs(self.export_dir, exist_ok=True)
         data = {}
         for series_id in self.series:
             params = {
+                "series_id": series_id,
+                "api_key": self.api_key,
+                "file_type": "json",
+                "start_date": self.start_date,
+                "end_date": self.end_date,
             }
             try:
                 resp = requests.get(base_url, params=params)
                 resp.raise_for_status()
+                obs = resp.json().get("observations", [])
                 dates, values = [], []
                 for o in obs:
                     try:
+                        dates.append(pd.to_datetime(o["date"]))
+                        values.append(float(o["value"]) if o["value"] != "." else None)
                     except Exception:
                         continue
                 data[series_id] = pd.Series(values, index=dates, name=series_id)
             all_dates.update(s.index)
         if not all_dates:
             return pd.DataFrame()
+        date_range = pd.date_range(min(all_dates), max(all_dates), freq="D")
         df = pd.DataFrame(index=date_range)
         for k, v in data.items():
             df[k] = v
+        df.index.name = "Date"
         self.logger.info(f"Transformed data to DataFrame with shape {df.shape}")
         return df
             self.logger.warning("No data to load.")
             return None
         ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        out_path = os.path.join(self.output_dir, f"fred_data_{ts}.csv")
+        exp_path = os.path.join(self.export_dir, f"fred_data_{ts}.csv")
         df.to_csv(out_path)
         df.to_csv(exp_path)
         self.logger.info(f"Saved data to {out_path} and {exp_path}")
         data = self.extract()
         df = self.transform(data)
         self.load(df)
+        self.logger.info("FRED data pipeline run complete.")

src/main.py ADDED Viewed

	@@ -0,0 +1,141 @@

+#!/usr/bin/env python3
+"""
+FRED ML - Main Application Entry Point
+Production-grade FastAPI application for economic data analysis
+"""
+import logging
+import os
+from contextlib import asynccontextmanager
+import uvicorn
+from fastapi import Depends, FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from config.settings import FRED_API_KEY
+from src.analysis.advanced_analytics import AdvancedAnalytics
+from src.core.fred_client import FREDDataCollectorV2
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+# Global variables for application state
+collector = None
+analytics = None
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan manager"""
+    # Startup
+    global collector, analytics
+    logger.info("Starting FRED ML application...")
+    if not FRED_API_KEY:
+        logger.error("FRED_API_KEY not configured")
+        raise ValueError("FRED_API_KEY environment variable is required")
+    collector = FREDDataCollectorV2(api_key=FRED_API_KEY)
+    logger.info("FRED Data Collector initialized")
+    yield
+    # Shutdown
+    logger.info("Shutting down FRED ML application...")
+# Create FastAPI application
+app = FastAPI(
+    title="FRED ML API",
+    description="Economic Data Analysis API using Federal Reserve Economic Data",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {"message": "FRED ML API", "version": "1.0.0", "status": "running"}
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {"status": "healthy"}
+@app.get("/ready")
+async def readiness_check():
+    """Readiness check endpoint"""
+    if collector is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+    return {"status": "ready"}
+@app.get("/api/v1/indicators")
+async def get_indicators():
+    """Get available economic indicators"""
+    if collector is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+    return {
+        "indicators": list(collector.indicators.keys()),
+        "descriptions": collector.indicators,
+    }
+@app.post("/api/v1/analyze")
+async def analyze_data(
+    series_ids: list[str], start_date: str = None, end_date: str = None
+):
+    """Analyze economic data for specified series"""
+    if collector is None:
+        raise HTTPException(status_code=503, detail="Service not ready")
+    try:
+        df, summary = collector.run_analysis(
+            series_ids=series_ids, start_date=start_date, end_date=end_date
+        )
+        return {
+            "status": "success",
+            "data_shape": df.shape if df is not None else None,
+            "summary": summary.to_dict() if summary is not None else None,
+        }
+    except Exception as e:
+        logger.error(f"Analysis failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/api/v1/status")
+async def get_status():
+    """Get application status"""
+    return {
+        "api_key_configured": bool(FRED_API_KEY),
+        "collector_initialized": collector is not None,
+        "environment": os.getenv("ENVIRONMENT", "development"),
+    }
+if __name__ == "__main__":
+    port = int(os.getenv("PORT", 8000))
+    uvicorn.run(
+        "src.main:app",
+        host="0.0.0.0",
+        port=port,
+        reload=os.getenv("ENVIRONMENT") == "development",
+    )

src/utils/__init__.py CHANGED Viewed

@@ -4,4 +4,4 @@ Utility functions and helper modules.
 from .examples import *
-__all__ = ['examples']


4
5	from .examples import *
6
7	+ __all__ = ["examples"]

src/utils/examples.py CHANGED Viewed

@@ -4,98 +4,105 @@ Example usage of the FRED Data Collector
 Demonstrates various ways to use the tool for economic data analysis
 """
-import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
-from core.fred_client import FREDDataCollectorV2
-import pandas as pd
 from datetime import datetime, timedelta
 def example_basic_usage():
     """Basic usage example."""
     print("=== Basic Usage Example ===")
     collector = FREDDataCollectorV2()
     # Get data for a single indicator
-    gdp_data = collector.get_economic_data(['GDP'], '2020-01-01', '2024-01-01')
     df = collector.create_dataframe(gdp_data)
     print(f"GDP data shape: {df.shape}")
     print(f"Date range: {df.index.min()} to {df.index.max()}")
     print(f"Latest GDP value: ${df['GDP'].iloc[-1]:,.2f} billion")
     return df
 def example_multiple_indicators():
     """Example with multiple economic indicators."""
     print("\n=== Multiple Indicators Example ===")
     collector = FREDDataCollectorV2()
     # Define indicators of interest
-    indicators = ['UNRATE', 'CPIAUCSL', 'FEDFUNDS']
     # Get data for the last 5 years
-    end_date = datetime.now().strftime('%Y-%m-%d')
-    start_date = (datetime.now() - timedelta(days=5*365)).strftime('%Y-%m-%d')
     data = collector.get_economic_data(indicators, start_date, end_date)
     df = collector.create_dataframe(data)
     # Generate summary statistics
     summary = collector.generate_summary_statistics(df)
     print("\nSummary Statistics:")
     print(summary)
     # Save data
-    collector.save_data(df, 'example_multiple_indicators.csv')
     return df
 def example_custom_analysis():
     """Example of custom analysis."""
     print("\n=== Custom Analysis Example ===")
     collector = FREDDataCollectorV2()
     # Focus on monetary policy indicators
-    monetary_indicators = ['FEDFUNDS', 'DGS10', 'M2SL']
     # Get data for the last 10 years
-    end_date = datetime.now().strftime('%Y-%m-%d')
-    start_date = (datetime.now() - timedelta(days=10*365)).strftime('%Y-%m-%d')
     data = collector.get_economic_data(monetary_indicators, start_date, end_date)
     df = collector.create_dataframe(data)
     # Calculate some custom metrics
-    if 'FEDFUNDS' in df.columns and 'DGS10' in df.columns:
         # Calculate yield curve spread (10Y - Fed Funds)
-        df['YIELD_SPREAD'] = df['DGS10'] - df['FEDFUNDS']
         print(f"\nYield Curve Analysis:")
         print(f"Current Fed Funds Rate: {df['FEDFUNDS'].iloc[-1]:.2f}%")
         print(f"Current 10Y Treasury Rate: {df['DGS10'].iloc[-1]:.2f}%")
         print(f"Current Yield Spread: {df['YIELD_SPREAD'].iloc[-1]:.2f}%")
         # Check for inverted yield curve (negative spread)
-        inverted_periods = df[df['YIELD_SPREAD'] < 0]
         if not inverted_periods.empty:
             print(f"Yield curve inverted for {len(inverted_periods)} periods")
     return df
 def example_series_info():
     """Example of getting series information."""
     print("\n=== Series Information Example ===")
     collector = FREDDataCollectorV2()
     # Get information about different series
-    series_to_check = ['GDP', 'UNRATE', 'CPIAUCSL']
     for series_id in series_to_check:
         info = collector.get_series_info(series_id)
         if info:
@@ -105,23 +112,25 @@ def example_series_info():
             print(f"  Frequency: {info['frequency']}")
             print(f"  Last Updated: {info['last_updated']}")
 def example_error_handling():
     """Example showing error handling."""
     print("\n=== Error Handling Example ===")
     collector = FREDDataCollectorV2()
     # Try to get data for an invalid series ID
-    invalid_series = ['INVALID_SERIES_ID']
     data = collector.get_economic_data(invalid_series)
     print("Attempted to fetch invalid series - handled gracefully")
 def main():
     """Run all examples."""
     print("FRED Data Collector - Example Usage")
     print("=" * 50)
     try:
         # Run examples
         example_basic_usage()
@@ -129,11 +138,12 @@ def main():
         example_custom_analysis()
         example_series_info()
         example_error_handling()
         print("\n=== All Examples Completed Successfully ===")
     except Exception as e:
         print(f"Error running examples: {e}")
 if __name__ == "__main__":
-    main()

 Demonstrates various ways to use the tool for economic data analysis
 """
 import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from datetime import datetime, timedelta
+import pandas as pd
+from core.fred_client import FREDDataCollectorV2
 def example_basic_usage():
     """Basic usage example."""
     print("=== Basic Usage Example ===")
     collector = FREDDataCollectorV2()
     # Get data for a single indicator
+    gdp_data = collector.get_economic_data(["GDP"], "2020-01-01", "2024-01-01")
     df = collector.create_dataframe(gdp_data)
     print(f"GDP data shape: {df.shape}")
     print(f"Date range: {df.index.min()} to {df.index.max()}")
     print(f"Latest GDP value: ${df['GDP'].iloc[-1]:,.2f} billion")
     return df
 def example_multiple_indicators():
     """Example with multiple economic indicators."""
     print("\n=== Multiple Indicators Example ===")
     collector = FREDDataCollectorV2()
     # Define indicators of interest
+    indicators = ["UNRATE", "CPIAUCSL", "FEDFUNDS"]
     # Get data for the last 5 years
+    end_date = datetime.now().strftime("%Y-%m-%d")
+    start_date = (datetime.now() - timedelta(days=5 * 365)).strftime("%Y-%m-%d")
     data = collector.get_economic_data(indicators, start_date, end_date)
     df = collector.create_dataframe(data)
     # Generate summary statistics
     summary = collector.generate_summary_statistics(df)
     print("\nSummary Statistics:")
     print(summary)
     # Save data
+    collector.save_data(df, "example_multiple_indicators.csv")
     return df
 def example_custom_analysis():
     """Example of custom analysis."""
     print("\n=== Custom Analysis Example ===")
     collector = FREDDataCollectorV2()
     # Focus on monetary policy indicators
+    monetary_indicators = ["FEDFUNDS", "DGS10", "M2SL"]
     # Get data for the last 10 years
+    end_date = datetime.now().strftime("%Y-%m-%d")
+    start_date = (datetime.now() - timedelta(days=10 * 365)).strftime("%Y-%m-%d")
     data = collector.get_economic_data(monetary_indicators, start_date, end_date)
     df = collector.create_dataframe(data)
     # Calculate some custom metrics
+    if "FEDFUNDS" in df.columns and "DGS10" in df.columns:
         # Calculate yield curve spread (10Y - Fed Funds)
+        df["YIELD_SPREAD"] = df["DGS10"] - df["FEDFUNDS"]
         print(f"\nYield Curve Analysis:")
         print(f"Current Fed Funds Rate: {df['FEDFUNDS'].iloc[-1]:.2f}%")
         print(f"Current 10Y Treasury Rate: {df['DGS10'].iloc[-1]:.2f}%")
         print(f"Current Yield Spread: {df['YIELD_SPREAD'].iloc[-1]:.2f}%")
         # Check for inverted yield curve (negative spread)
+        inverted_periods = df[df["YIELD_SPREAD"] < 0]
         if not inverted_periods.empty:
             print(f"Yield curve inverted for {len(inverted_periods)} periods")
     return df
 def example_series_info():
     """Example of getting series information."""
     print("\n=== Series Information Example ===")
     collector = FREDDataCollectorV2()
     # Get information about different series
+    series_to_check = ["GDP", "UNRATE", "CPIAUCSL"]
     for series_id in series_to_check:
         info = collector.get_series_info(series_id)
         if info:
             print(f"  Frequency: {info['frequency']}")
             print(f"  Last Updated: {info['last_updated']}")
 def example_error_handling():
     """Example showing error handling."""
     print("\n=== Error Handling Example ===")
     collector = FREDDataCollectorV2()
     # Try to get data for an invalid series ID
+    invalid_series = ["INVALID_SERIES_ID"]
     data = collector.get_economic_data(invalid_series)
     print("Attempted to fetch invalid series - handled gracefully")
 def main():
     """Run all examples."""
     print("FRED Data Collector - Example Usage")
     print("=" * 50)
     try:
         # Run examples
         example_basic_usage()
         example_custom_analysis()
         example_series_info()
         example_error_handling()
         print("\n=== All Examples Completed Successfully ===")
     except Exception as e:
         print(f"Error running examples: {e}")
 if __name__ == "__main__":
+    main()

src/visualization/__init__.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Data visualization and plotting utilities.
 """
-__all__ = []

 Data visualization and plotting utilities.
 """
+__all__ = []

tests/__pycache__/test_fred_api.cpython-39-pytest-7.4.0.pyc CHANGED Viewed

Binary files a/tests/__pycache__/test_fred_api.cpython-39-pytest-7.4.0.pyc and b/tests/__pycache__/test_fred_api.cpython-39-pytest-7.4.0.pyc differ

tests/__pycache__/test_fredapi_library.cpython-39-pytest-7.4.0.pyc CHANGED Viewed

Binary files a/tests/__pycache__/test_fredapi_library.cpython-39-pytest-7.4.0.pyc and b/tests/__pycache__/test_fredapi_library.cpython-39-pytest-7.4.0.pyc differ

tests/test_fred_api.py CHANGED Viewed

@@ -3,38 +3,41 @@
 Simple FRED API test
 """
-import requests
-import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 from config.settings import FRED_API_KEY
 def test_fred_api_direct():
     """Test FRED API directly using requests."""
     print("Testing FRED API directly...")
     # Test URL for GDP series
     url = f"https://api.stlouisfed.org/fred/series/observations"
     params = {
-        'series_id': 'GDP',
-        'api_key': FRED_API_KEY,
-        'file_type': 'json',
-        'start_date': '2023-01-01',
-        'end_date': '2023-12-31'
     }
     try:
         response = requests.get(url, params=params)
         if response.status_code == 200:
             data = response.json()
-            observations = data.get('observations', [])
             if observations:
                 print("✓ API connection successful!")
                 print(f"✓ Retrieved {len(observations)} GDP observations")
                 # Get the latest observation
                 latest = observations[-1]
                 print(f"✓ Latest GDP value: ${float(latest['value']):,.2f} billion")
@@ -47,33 +50,30 @@ def test_fred_api_direct():
             print(f"✗ API request failed with status code: {response.status_code}")
             print(f"Response: {response.text}")
             return False
     except Exception as e:
         print(f"✗ API connection failed: {e}")
         return False
 def test_series_search():
     """Test searching for series."""
     print("\nTesting series search...")
     url = "https://api.stlouisfed.org/fred/series/search"
-    params = {
-        'search_text': 'GDP',
-        'api_key': FRED_API_KEY,
-        'file_type': 'json'
-    }
     try:
         response = requests.get(url, params=params)
         if response.status_code == 200:
             data = response.json()
-            series = data.get('seriess', [])
             if series:
                 print("✓ Series search successful!")
                 print(f"✓ Found {len(series)} series matching 'GDP'")
                 # Show first few results
                 for i, s in enumerate(series[:3]):
                     print(f"  {i+1}. {s['id']}: {s['title']}")
@@ -84,32 +84,34 @@ def test_series_search():
         else:
             print(f"✗ Search request failed: {response.status_code}")
             return False
     except Exception as e:
         print(f"✗ Search failed: {e}")
         return False
 def main():
     """Run simple API tests."""
     print("Simple FRED API Test")
     print("=" * 30)
     print(f"API Key: {FRED_API_KEY[:8]}...")
     print()
     # Test direct API access
     api_ok = test_fred_api_direct()
     # Test series search
     search_ok = test_series_search()
     print("\n" + "=" * 30)
     if api_ok and search_ok:
         print("✓ All tests passed! Your API key is working correctly.")
         print("The issue is with the fredapi library, not your API key.")
     else:
         print("✗ Some tests failed. Please check your API key.")
     return api_ok and search_ok
 if __name__ == "__main__":
-    main()

 Simple FRED API test
 """
 import os
+import sys
+import requests
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from config.settings import FRED_API_KEY
 def test_fred_api_direct():
     """Test FRED API directly using requests."""
     print("Testing FRED API directly...")
     # Test URL for GDP series
     url = f"https://api.stlouisfed.org/fred/series/observations"
     params = {
+        "series_id": "GDP",
+        "api_key": FRED_API_KEY,
+        "file_type": "json",
+        "start_date": "2023-01-01",
+        "end_date": "2023-12-31",
     }
     try:
         response = requests.get(url, params=params)
         if response.status_code == 200:
             data = response.json()
+            observations = data.get("observations", [])
             if observations:
                 print("✓ API connection successful!")
                 print(f"✓ Retrieved {len(observations)} GDP observations")
                 # Get the latest observation
                 latest = observations[-1]
                 print(f"✓ Latest GDP value: ${float(latest['value']):,.2f} billion")
             print(f"✗ API request failed with status code: {response.status_code}")
             print(f"Response: {response.text}")
             return False
     except Exception as e:
         print(f"✗ API connection failed: {e}")
         return False
 def test_series_search():
     """Test searching for series."""
     print("\nTesting series search...")
     url = "https://api.stlouisfed.org/fred/series/search"
+    params = {"search_text": "GDP", "api_key": FRED_API_KEY, "file_type": "json"}
     try:
         response = requests.get(url, params=params)
         if response.status_code == 200:
             data = response.json()
+            series = data.get("seriess", [])
             if series:
                 print("✓ Series search successful!")
                 print(f"✓ Found {len(series)} series matching 'GDP'")
                 # Show first few results
                 for i, s in enumerate(series[:3]):
                     print(f"  {i+1}. {s['id']}: {s['title']}")
         else:
             print(f"✗ Search request failed: {response.status_code}")
             return False
     except Exception as e:
         print(f"✗ Search failed: {e}")
         return False
 def main():
     """Run simple API tests."""
     print("Simple FRED API Test")
     print("=" * 30)
     print(f"API Key: {FRED_API_KEY[:8]}...")
     print()
     # Test direct API access
     api_ok = test_fred_api_direct()
     # Test series search
     search_ok = test_series_search()
     print("\n" + "=" * 30)
     if api_ok and search_ok:
         print("✓ All tests passed! Your API key is working correctly.")
         print("The issue is with the fredapi library, not your API key.")
     else:
         print("✗ Some tests failed. Please check your API key.")
     return api_ok and search_ok
 if __name__ == "__main__":
+    main()

tests/test_fredapi_library.py CHANGED Viewed

@@ -3,25 +3,28 @@
 Test script to verify FRED API key functionality
 """
-from fredapi import Fred
-import sys
 import os
-sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 from config.settings import FRED_API_KEY
 def test_api_connection():
     """Test the FRED API connection with the provided key."""
     print("Testing FRED API connection...")
     try:
         # Initialize FRED client
         fred = Fred(api_key=FRED_API_KEY)
         # Test with a simple series (GDP)
         print("Fetching GDP data as a test...")
-        gdp_data = fred.get_series('GDP', start='2023-01-01', end='2023-12-31')
         if not gdp_data.empty:
             print("✓ API connection successful!")
             print(f"✓ Retrieved {len(gdp_data)} GDP observations")
@@ -31,54 +34,57 @@ def test_api_connection():
         else:
             print("✗ No data retrieved")
             return False
     except Exception as e:
         print(f"✗ API connection failed: {e}")
         return False
 def test_series_info():
     """Test getting series information."""
     print("\nTesting series information retrieval...")
     try:
         fred = Fred(api_key=FRED_API_KEY)
         # Test getting info for GDP
-        series_info = fred.get_series_info('GDP')
         print("✓ Series information retrieved successfully!")
         print(f"  Title: {series_info.title}")
         print(f"  Units: {series_info.units}")
         print(f"  Frequency: {series_info.frequency}")
         print(f"  Last Updated: {series_info.last_updated}")
         return True
     except Exception as e:
         print(f"✗ Failed to get series info: {e}")
         return False
 def main():
     """Run API tests."""
     print("FRED API Key Test")
     print("=" * 30)
     print(f"API Key: {FRED_API_KEY[:8]}...")
     print()
     # Test connection
     connection_ok = test_api_connection()
     # Test series info
     info_ok = test_series_info()
     print("\n" + "=" * 30)
     if connection_ok and info_ok:
         print("✓ All tests passed! Your API key is working correctly.")
         print("You can now use the FRED data collector tool.")
     else:
         print("✗ Some tests failed. Please check your API key.")
     return connection_ok and info_ok
 if __name__ == "__main__":
-    main()

 Test script to verify FRED API key functionality
 """
 import os
+import sys
+from fredapi import Fred
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
 from config.settings import FRED_API_KEY
 def test_api_connection():
     """Test the FRED API connection with the provided key."""
     print("Testing FRED API connection...")
     try:
         # Initialize FRED client
         fred = Fred(api_key=FRED_API_KEY)
         # Test with a simple series (GDP)
         print("Fetching GDP data as a test...")
+        gdp_data = fred.get_series("GDP", start="2023-01-01", end="2023-12-31")
         if not gdp_data.empty:
             print("✓ API connection successful!")
             print(f"✓ Retrieved {len(gdp_data)} GDP observations")
         else:
             print("✗ No data retrieved")
             return False
     except Exception as e:
         print(f"✗ API connection failed: {e}")
         return False
 def test_series_info():
     """Test getting series information."""
     print("\nTesting series information retrieval...")
     try:
         fred = Fred(api_key=FRED_API_KEY)
         # Test getting info for GDP
+        series_info = fred.get_series_info("GDP")
         print("✓ Series information retrieved successfully!")
         print(f"  Title: {series_info.title}")
         print(f"  Units: {series_info.units}")
         print(f"  Frequency: {series_info.frequency}")
         print(f"  Last Updated: {series_info.last_updated}")
         return True
     except Exception as e:
         print(f"✗ Failed to get series info: {e}")
         return False
 def main():
     """Run API tests."""
     print("FRED API Key Test")
     print("=" * 30)
     print(f"API Key: {FRED_API_KEY[:8]}...")
     print()
     # Test connection
     connection_ok = test_api_connection()
     # Test series info
     info_ok = test_series_info()
     print("\n" + "=" * 30)
     if connection_ok and info_ok:
         print("✓ All tests passed! Your API key is working correctly.")
         print("You can now use the FRED data collector tool.")
     else:
         print("✗ Some tests failed. Please check your API key.")
     return connection_ok and info_ok
 if __name__ == "__main__":
+    main()