import pandas as pd
import numpy as np

from sklearn.svm import NuSVR
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

import joblib


class NuSVRInsuranceModel:
    """
    This class encapsulates:
      1. Preprocessing: column transformations, scaling
      2. Prediction: using NuSVR
      3. Postprocessing: inverse-transform predictions to original scale
    """

    # --- Custom Transformer defined INSIDE the class ---
    class MultiplyScaler(BaseEstimator, TransformerMixin):
        def __init__(self, factor=2):
            self.factor = factor

        def fit(self, X, y=None):
            return self

        def transform(self, X):
            return X * self.factor

    def __init__(self):
        """
        In the constructor, define the column pipelines, the main ColumnTransformer,
        the target scaler, and the model.
        """

        # Example pipelines (adjust as needed)
        text_pipeline = Pipeline([
            ('one-hot', OneHotEncoder())
        ])

        nums_pipeline = Pipeline([
            ('normalize', StandardScaler(with_mean=True)),
        ])

        nums_pipeline_strong = Pipeline([
            ('normalize', StandardScaler(with_mean=True)),
            # Note we reference the nested class here
            ('scalarMultiply', NuSVRInsuranceModel.MultiplyScaler(factor=2))
        ])

        smoke_pipeline = Pipeline([
            ('one-hot', OneHotEncoder()),
            ('normalize', StandardScaler(with_mean=False)),
            ('scalar-multiply', NuSVRInsuranceModel.MultiplyScaler(factor=5))
        ])

        region_pipeline = Pipeline([
            ('categories', OrdinalEncoder())
        ])

        # Create ColumnTransformer
        # Adjust columns to match your dataset's actual column names
        self.ct = ColumnTransformer([
            ('str_handler', text_pipeline, ['diabetic', 'gender']),
            ('smoke_handle', smoke_pipeline, ['smoker']),
            ('floats_ints_weak', nums_pipeline, ['children', 'age']),
            ('floats_ints_strong', nums_pipeline_strong, ['bmi', 'bloodpressure']),
        ])

        # Target scaler (for the 'claim' column)
        self.target_scaler = MinMaxScaler(feature_range=(-0.5, 0.5))

        # NuSVR model with desired hyperparameters
        self.model = NuSVR(C=10, gamma='scale', kernel='rbf', nu=0.80)

    def preprocessing(self, df):
        """
        Takes a raw dataframe (with the relevant columns) and applies the
        fitted ColumnTransformer used in training.
        Returns the transformed feature matrix.
        """
        return self.ct.transform(df)

    def predict(self, preprocessed_data):
        """
        Takes already-preprocessed data (matrix/array) and outputs the
        final predictions in the original scale.
        """
        y_pred_scaled = self.model.predict(preprocessed_data)
        return self.postprocessing(y_pred_scaled)

    def postprocessing(self, y_pred_scaled):
        """
        Takes scaled predictions (in the target_scaler domain) and inversely
        transforms them back to the original target domain.
        """
        y_pred_original = self.target_scaler.inverse_transform(
            y_pred_scaled.reshape(-1, 1)
        )
        return y_pred_original.ravel()


if __name__ == "__main__":
    # -------------------------------------------------
    # 1. Load data
    # -------------------------------------------------
    df = pd.read_csv('cleaned_insurance_data.csv')

    # Separate features and target
    features = df.drop(columns=['claim', 'PatientID', 'index'])
    target = df['claim']

    # -------------------------------------------------
    # 2. Instantiate our NuSVRInsuranceModel
    # -------------------------------------------------
    nusvr_wrapper = NuSVRInsuranceModel()

    # -------------------------------------------------
    # 3. Train-test split
    # -------------------------------------------------
    X_train_raw, X_test_raw, y_train, y_test = train_test_split(
        features, target, test_size=0.25, random_state=42
    )

    # -------------------------------------------------
    # 4. Fit ColumnTransformer & target scaler on TRAIN data
    # -------------------------------------------------
    # Fit the ColumnTransformer
    X_train_t = nusvr_wrapper.ct.fit_transform(X_train_raw)
    # Fit the target scaler
    y_train_t = nusvr_wrapper.target_scaler.fit_transform(y_train.values.reshape(-1, 1)).ravel()

    # -------------------------------------------------
    # 5. Train the NuSVR model
    # -------------------------------------------------
    nusvr_wrapper.model.fit(X_train_t, y_train_t)

    # -------------------------------------------------
    # 6. Evaluate on test data
    # -------------------------------------------------
    # Preprocess the test features with the same pipeline
    X_test_t = nusvr_wrapper.preprocessing(X_test_raw)

    # Make predictions (in original scale)
    y_pred = nusvr_wrapper.predict(X_test_t)

    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Test MAE (original scale): {mae:.3f}")
    print(f"Test R^2 (original scale): {r2:.3f}")

    # -------------------------------------------------
    # 7. Export the fitted model
    # -------------------------------------------------
    joblib.dump(nusvr_wrapper, "nusvr_insurance_model.joblib")
    print("Fitted NuSVRInsuranceModel saved to nusvr_insurance_model.joblib")