File size: 3,865 Bytes
0f859ab eb85dea 0f859ab eb85dea 0f859ab eb85dea 0f859ab eb85dea 0f859ab eb85dea 0f859ab eb85dea 0f859ab |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import os
os.environ["OMP_PROC_BIND"] = "spread"
os.environ["OMP_PLACES"] = "threads"
from pathlib import Path
import yaml
import numpy as np
import joblib
from datasets.utils.logging import disable_progress_bar
from datasets import load_dataset
from sklearn.base import clone
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import KFold, GridSearchCV
from plaid.bridges.huggingface_bridge import huggingface_dataset_to_plaid, huggingface_description_to_problem_definition
from plaid.pipelines.sklearn_block_wrappers import WrappedPlaidSklearnTransformer, WrappedPlaidSklearnRegressor
from plaid.pipelines.plaid_blocks import PlaidTransformedTargetRegressor, PlaidColumnTransformer
from mmgp.pipelines.mmgp_blocks import MMGPPreparer, MMGPTransformer
from mmgp_tensile2d.utils import length_scale_init, morphing
n_processes = min(max(1, os.cpu_count()), 24)
# load dataset
hf_dataset = load_dataset("PLAID-datasets/Tensile2d", split="all_samples")
ids_train = hf_dataset.description["split"]['train_500']
dataset_train, _ = huggingface_dataset_to_plaid(hf_dataset, ids = ids_train, processes_number = n_processes, verbose = True)
# load pipeline configuration and trim dataset
with open("config_pipeline.yml", 'r') as f:
config = yaml.safe_load(f)
all_feature_id = config['input_scalar_scaler']['in_features_identifiers'] +\
config['pca_nodes']['in_features_identifiers'] + config['pca_u1']['in_features_identifiers']
dataset_train = dataset_train.from_features_identifier(all_feature_id)
# define learning pipeline
preparator = MMGPPreparer(common_mesh_id = 1, morphing = morphing)
input_scalar_scaler = WrappedPlaidSklearnTransformer(MinMaxScaler(), **config['input_scalar_scaler'])
nodes_preprocessor = Pipeline(
steps=[
("mmgp_nodes_transf", MMGPTransformer(**config['mmgp_nodes_transf'])),
('pca_nodes', WrappedPlaidSklearnTransformer(PCA(), **config['pca_nodes'])),
]
)
column_preprocessor = PlaidColumnTransformer(
[
('input_scalar_scaler', input_scalar_scaler),
('nodes_preprocessor', nodes_preprocessor),
]
)
preprocessor = Pipeline(
steps=[
("preparator", preparator),
('column_preprocessor', column_preprocessor),
]
)
kernel = Matern(length_scale_bounds=(1e-8, 1e8), nu = 2.5)
gpr = GaussianProcessRegressor(
kernel=kernel,
optimizer='fmin_l_bfgs_b',
n_restarts_optimizer=2,
random_state=42)
reg = MultiOutputRegressor(gpr)
dynamics_params_factory = {'estimator__kernel__length_scale':length_scale_init}
regressor = WrappedPlaidSklearnRegressor(reg, **config['regressor_mach'], dynamics_params_factory = dynamics_params_factory)
postprocessor = Pipeline(
steps=[
("mmgp_u1_transf", MMGPTransformer(**config['mmgp_u1_transf'])),
('pca_u1', WrappedPlaidSklearnTransformer(PCA(), **config['pca_u1'])),
]
)
target_regressor = PlaidTransformedTargetRegressor(
regressor=regressor,
transformer=postprocessor,
)
pipeline = Pipeline(
steps=[
("preprocessor", preprocessor),
("regressor", target_regressor),
]
)
# Set hyperameter that have been optimized by cross-valdiation on the training set
optimized_pipeline = clone(pipeline).set_params(
preprocessor__column_preprocessor__nodes_preprocessor__pca_nodes__sklearn_block__n_components = 16,
regressor__transformer__pca_u1__sklearn_block__n_components = 32
)
# Train the model
optimized_pipeline.fit(dataset_train)
# Save model
joblib.dump(optimized_pipeline, "pipeline.joblib") |