Spaces:
Sleeping
Sleeping
LLH
commited on
Commit
·
8d94a86
1
Parent(s):
4a491db
2024/03/07/16:46
Browse files- .gitignore +4 -0
- README.md +1 -1
- __init__.py +0 -0
- analysis/model_train/__init__.py +0 -0
- analysis/model_train/bayes_model.py +93 -0
- analysis/model_train/distance_model.py +128 -0
- analysis/model_train/gradient_model.py +74 -0
- analysis/model_train/kernel_model.py +134 -0
- analysis/model_train/linear_model.py +246 -0
- analysis/model_train/tree_model.py +329 -0
- analysis/others/hyperparam_optimize.py +30 -0
- analysis/others/shap_model.py +55 -0
- app.py +0 -0
- classes/__init__.py +0 -0
- classes/static_custom_class.py +248 -0
- data/notes.md +194 -8
- design/__init__.py +0 -0
- design/custom.css +5 -0
- design/welcome.js +31 -0
- functions/__init__.py +0 -0
- functions/process.py +38 -0
- visualization/draw_boxplot.py +0 -3
- visualization/draw_data_fit_total.py +1 -4
- visualization/draw_heat_map.py +3 -5
- visualization/draw_histogram.py +1 -6
- visualization/draw_histogram_line_subgraph.py +3 -3
- visualization/draw_learning_curve.py +5 -6
- visualization/draw_learning_curve_total.py +1 -3
- visualization/draw_line_graph.py +0 -3
- visualization/draw_momentum.py +0 -5
- visualization/draw_parallel_coordinates.py +1 -3
- visualization/draw_play_flow.py +0 -5
- visualization/draw_pred_total.py +0 -2
- visualization/draw_roc_auc_curve_total.py +3 -5
- visualization/draw_scatter.py +1 -3
- visualization/draw_scatter_line_graph.py +5 -5
- visualization/draw_swings_and_positives.py +0 -5
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/programmer.md
|
2 |
+
/venv
|
3 |
+
/test
|
4 |
+
/old
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: EasyMachineLearning
|
3 |
emoji: 🔥
|
4 |
colorFrom: red
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: EasyMachineLearning
|
3 |
emoji: 🔥
|
4 |
colorFrom: red
|
5 |
colorTo: red
|
__init__.py
ADDED
File without changes
|
analysis/model_train/__init__.py
ADDED
File without changes
|
analysis/model_train/bayes_model.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.model_selection import learning_curve
|
3 |
+
from sklearn.naive_bayes import *
|
4 |
+
from analysis.others.hyperparam_optimize import *
|
5 |
+
from classes.static_custom_class import StaticValue
|
6 |
+
from functions.process import transform_params_list, get_values_from_container_class
|
7 |
+
|
8 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
9 |
+
|
10 |
+
|
11 |
+
class NaiveBayesClassifierParams:
|
12 |
+
@classmethod
|
13 |
+
def get_params_type(cls, sort):
|
14 |
+
if sort == "MultinomialNB":
|
15 |
+
return {
|
16 |
+
"alpha": StaticValue.FLOAT
|
17 |
+
}
|
18 |
+
elif sort == "GaussianNB":
|
19 |
+
return {}
|
20 |
+
elif sort == "ComplementNB":
|
21 |
+
return {
|
22 |
+
"alpha": StaticValue.FLOAT,
|
23 |
+
"fit_prior": StaticValue.BOOL,
|
24 |
+
"norm": StaticValue.BOOL
|
25 |
+
}
|
26 |
+
|
27 |
+
@classmethod
|
28 |
+
def get_params(cls, sort):
|
29 |
+
if sort == "MultinomialNB":
|
30 |
+
return {
|
31 |
+
"alpha": [0.1, 0.5, 1.0, 2.0]
|
32 |
+
}
|
33 |
+
elif sort == "GaussianNB":
|
34 |
+
return {}
|
35 |
+
elif sort == "ComplementNB":
|
36 |
+
return {
|
37 |
+
"alpha": [0.1, 0.5, 1, 10],
|
38 |
+
"fit_prior": [True, False],
|
39 |
+
"norm": [True, False]
|
40 |
+
}
|
41 |
+
|
42 |
+
|
43 |
+
# 朴素贝叶斯分类
|
44 |
+
def naive_bayes_classifier(container, params_list, model=None):
|
45 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
46 |
+
info = {}
|
47 |
+
|
48 |
+
params_list = transform_params_list(NaiveBayesClassifierParams, params_list, model)
|
49 |
+
|
50 |
+
if model == "MultinomialNB":
|
51 |
+
naive_bayes_model = MultinomialNB()
|
52 |
+
params = params_list
|
53 |
+
elif model == "GaussianNB":
|
54 |
+
naive_bayes_model = GaussianNB()
|
55 |
+
params = params_list
|
56 |
+
elif model == "ComplementNB":
|
57 |
+
naive_bayes_model = ComplementNB()
|
58 |
+
params = params_list
|
59 |
+
else:
|
60 |
+
naive_bayes_model = GaussianNB()
|
61 |
+
params = params_list
|
62 |
+
|
63 |
+
if hyper_params_optimize == "grid_search":
|
64 |
+
best_model = grid_search(params, naive_bayes_model, x_train, y_train)
|
65 |
+
elif hyper_params_optimize == "bayes_search":
|
66 |
+
best_model = bayes_search(params, naive_bayes_model, x_train, y_train)
|
67 |
+
else:
|
68 |
+
best_model = naive_bayes_model
|
69 |
+
best_model.fit(x_train, y_train)
|
70 |
+
|
71 |
+
info["参数"] = best_model.get_params()
|
72 |
+
|
73 |
+
y_pred = best_model.predict(x_test)
|
74 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
75 |
+
container.set_y_pred(y_pred)
|
76 |
+
|
77 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
78 |
+
|
79 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
80 |
+
train_scores_std = np.std(train_scores, axis=1)
|
81 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
82 |
+
test_scores_std = np.std(test_scores, axis=1)
|
83 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
84 |
+
test_scores_std)
|
85 |
+
|
86 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
87 |
+
|
88 |
+
container.set_info(info)
|
89 |
+
container.set_status("trained")
|
90 |
+
container.set_model(best_model)
|
91 |
+
|
92 |
+
return container
|
93 |
+
|
analysis/model_train/distance_model.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.model_selection import learning_curve
|
2 |
+
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
|
3 |
+
|
4 |
+
from analysis.others.shap_model import *
|
5 |
+
from classes.static_custom_class import StaticValue
|
6 |
+
from functions.process import get_values_from_container_class, transform_params_list
|
7 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
8 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
9 |
+
from analysis.others.hyperparam_optimize import *
|
10 |
+
|
11 |
+
|
12 |
+
class KNNClassifierParams:
|
13 |
+
@classmethod
|
14 |
+
def get_params_type(cls):
|
15 |
+
return {
|
16 |
+
"n_neighbors": StaticValue.INT,
|
17 |
+
"weights": StaticValue.STR,
|
18 |
+
"p": StaticValue.INT
|
19 |
+
}
|
20 |
+
|
21 |
+
@classmethod
|
22 |
+
def get_params(cls):
|
23 |
+
return {
|
24 |
+
"n_neighbors": [3, 5, 7, 9],
|
25 |
+
"weights": ['uniform', 'distance'],
|
26 |
+
"p": [1, 2]
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
# KNN分类
|
31 |
+
def knn_classifier(container, params_list):
|
32 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
33 |
+
info = {}
|
34 |
+
|
35 |
+
params_list = transform_params_list(KNNClassifierParams, params_list)
|
36 |
+
|
37 |
+
knn_classifier_model = KNeighborsClassifier()
|
38 |
+
params = params_list
|
39 |
+
|
40 |
+
if hyper_params_optimize == "grid_search":
|
41 |
+
best_model = grid_search(params, knn_classifier_model, x_train, y_train)
|
42 |
+
elif hyper_params_optimize == "bayes_search":
|
43 |
+
best_model = bayes_search(params, knn_classifier_model, x_train, y_train)
|
44 |
+
else:
|
45 |
+
best_model = knn_classifier_model
|
46 |
+
best_model.fit(x_train, y_train)
|
47 |
+
|
48 |
+
info["参数"] = best_model.get_params()
|
49 |
+
|
50 |
+
y_pred = best_model.predict(x_test)
|
51 |
+
container.set_y_pred(y_pred)
|
52 |
+
|
53 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
54 |
+
|
55 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
56 |
+
train_scores_std = np.std(train_scores, axis=1)
|
57 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
58 |
+
test_scores_std = np.std(test_scores, axis=1)
|
59 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
60 |
+
test_scores_std)
|
61 |
+
|
62 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
63 |
+
|
64 |
+
container.set_info(info)
|
65 |
+
container.set_status("trained")
|
66 |
+
container.set_model(best_model)
|
67 |
+
|
68 |
+
return container
|
69 |
+
|
70 |
+
|
71 |
+
class KNNRegressionParams:
|
72 |
+
@classmethod
|
73 |
+
def get_params_type(cls):
|
74 |
+
return {
|
75 |
+
"n_neighbors": StaticValue.INT,
|
76 |
+
"weights": StaticValue.STR,
|
77 |
+
"p": StaticValue.INT
|
78 |
+
}
|
79 |
+
|
80 |
+
@classmethod
|
81 |
+
def get_params(cls):
|
82 |
+
return {
|
83 |
+
"n_neighbors": [3, 5, 7, 9],
|
84 |
+
"weights": ['uniform', 'distance'],
|
85 |
+
"p": [1, 2]
|
86 |
+
}
|
87 |
+
|
88 |
+
|
89 |
+
# KNN回归
|
90 |
+
def knn_regressor(container, params_list):
|
91 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
92 |
+
info = {}
|
93 |
+
|
94 |
+
params_list = transform_params_list(KNNRegressionParams, params_list)
|
95 |
+
|
96 |
+
knn_regression_model = KNeighborsRegressor()
|
97 |
+
params = params_list
|
98 |
+
|
99 |
+
if hyper_params_optimize == "grid_search":
|
100 |
+
best_model = grid_search(params, knn_regression_model, x_train, y_train)
|
101 |
+
elif hyper_params_optimize == "bayes_search":
|
102 |
+
best_model = bayes_search(params, knn_regression_model, x_train, y_train)
|
103 |
+
else:
|
104 |
+
best_model = knn_regression_model
|
105 |
+
best_model.fit(x_train, y_train)
|
106 |
+
|
107 |
+
info["参数"] = best_model.get_params()
|
108 |
+
|
109 |
+
y_pred = best_model.predict(x_test)
|
110 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
111 |
+
container.set_y_pred(y_pred)
|
112 |
+
|
113 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
114 |
+
|
115 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
116 |
+
train_scores_std = np.std(train_scores, axis=1)
|
117 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
118 |
+
test_scores_std = np.std(test_scores, axis=1)
|
119 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
120 |
+
test_scores_std)
|
121 |
+
|
122 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
123 |
+
|
124 |
+
container.set_info(info)
|
125 |
+
container.set_status("trained")
|
126 |
+
container.set_model(best_model)
|
127 |
+
|
128 |
+
return container
|
analysis/model_train/gradient_model.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.ensemble import GradientBoostingRegressor
|
3 |
+
from sklearn.model_selection import learning_curve
|
4 |
+
|
5 |
+
from functions.process import transform_params_list, get_values_from_container_class
|
6 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
7 |
+
from analysis.others.hyperparam_optimize import *
|
8 |
+
from classes.static_custom_class import StaticValue
|
9 |
+
|
10 |
+
|
11 |
+
class GradientBoostingParams:
|
12 |
+
@classmethod
|
13 |
+
def get_params_type(cls):
|
14 |
+
return {
|
15 |
+
'n_estimators': StaticValue.INT,
|
16 |
+
'learning_rate': StaticValue.FLOAT,
|
17 |
+
'max_depth': StaticValue.INT,
|
18 |
+
'min_samples_split': StaticValue.INT,
|
19 |
+
'min_samples_leaf': StaticValue.INT,
|
20 |
+
'random_state': StaticValue.INT
|
21 |
+
}
|
22 |
+
|
23 |
+
@classmethod
|
24 |
+
def get_params(cls):
|
25 |
+
return {
|
26 |
+
'n_estimators': [50, 100, 150],
|
27 |
+
'learning_rate': [0.01, 0.1, 0.2],
|
28 |
+
'max_depth': [3, 5, 7],
|
29 |
+
'min_samples_split': [2, 5, 10],
|
30 |
+
'min_samples_leaf': [1, 2, 4],
|
31 |
+
'random_state': [StaticValue.RANDOM_STATE]
|
32 |
+
}
|
33 |
+
|
34 |
+
|
35 |
+
# 梯度提升回归
|
36 |
+
def gradient_boosting_regressor(container, params_list):
|
37 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
38 |
+
info = {}
|
39 |
+
|
40 |
+
params_list = transform_params_list(GradientBoostingParams, params_list)
|
41 |
+
|
42 |
+
gradient_boosting_regression_model = GradientBoostingRegressor(random_state=StaticValue.RANDOM_STATE)
|
43 |
+
params = params_list
|
44 |
+
|
45 |
+
if hyper_params_optimize == "grid_search":
|
46 |
+
best_model = grid_search(params, gradient_boosting_regression_model, x_train, y_train)
|
47 |
+
elif hyper_params_optimize == "bayes_search":
|
48 |
+
best_model = bayes_search(params, gradient_boosting_regression_model, x_train, y_train)
|
49 |
+
else:
|
50 |
+
best_model = gradient_boosting_regression_model
|
51 |
+
best_model.fit(x_train, y_train)
|
52 |
+
|
53 |
+
info["参数"] = best_model.get_params()
|
54 |
+
|
55 |
+
y_pred = best_model.predict(x_test)
|
56 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
57 |
+
container.set_y_pred(y_pred)
|
58 |
+
|
59 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
60 |
+
|
61 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
62 |
+
train_scores_std = np.std(train_scores, axis=1)
|
63 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
64 |
+
test_scores_std = np.std(test_scores, axis=1)
|
65 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
66 |
+
test_scores_std)
|
67 |
+
|
68 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
69 |
+
|
70 |
+
container.set_info(info)
|
71 |
+
container.set_status("trained")
|
72 |
+
container.set_model(best_model)
|
73 |
+
|
74 |
+
return container
|
analysis/model_train/kernel_model.py
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.model_selection import learning_curve
|
3 |
+
from sklearn.svm import SVC
|
4 |
+
from sklearn.svm import SVR
|
5 |
+
|
6 |
+
from classes.static_custom_class import StaticValue
|
7 |
+
from functions.process import get_values_from_container_class, transform_params_list
|
8 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
9 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
10 |
+
from analysis.others.hyperparam_optimize import *
|
11 |
+
|
12 |
+
|
13 |
+
class SVMRegressionParams:
|
14 |
+
@classmethod
|
15 |
+
def get_params_type(cls):
|
16 |
+
return {
|
17 |
+
'kernel': StaticValue.STR,
|
18 |
+
'C': StaticValue.FLOAT,
|
19 |
+
'gamma': StaticValue.FLOAT,
|
20 |
+
'epsilon': StaticValue.FLOAT
|
21 |
+
}
|
22 |
+
|
23 |
+
@classmethod
|
24 |
+
def get_params(cls):
|
25 |
+
return {
|
26 |
+
'kernel': ['linear', 'rbf'],
|
27 |
+
'C': [0.1, 1, 10, 100],
|
28 |
+
'gamma': [0.01, 0.1, 1, 10],
|
29 |
+
'epsilon': [0.01, 0.1, 1]
|
30 |
+
}
|
31 |
+
|
32 |
+
|
33 |
+
# 支持向量机回归
|
34 |
+
def svm_regressor(container, params_list):
|
35 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
36 |
+
info = {}
|
37 |
+
|
38 |
+
params_list = transform_params_list(SVMRegressionParams, params_list)
|
39 |
+
|
40 |
+
svm_regression_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
|
41 |
+
params = params_list
|
42 |
+
|
43 |
+
if hyper_params_optimize == "grid_search":
|
44 |
+
best_model = grid_search(params, svm_regression_model, x_train, y_train)
|
45 |
+
elif hyper_params_optimize == "bayes_search":
|
46 |
+
best_model = bayes_search(params, svm_regression_model, x_train, y_train)
|
47 |
+
else:
|
48 |
+
best_model = svm_regression_model
|
49 |
+
best_model.fit(x_train, y_train)
|
50 |
+
|
51 |
+
info["参数"] = best_model.get_params()
|
52 |
+
|
53 |
+
y_pred = best_model.predict(x_test)
|
54 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
55 |
+
container.set_y_pred(y_pred)
|
56 |
+
|
57 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
58 |
+
|
59 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
60 |
+
train_scores_std = np.std(train_scores, axis=1)
|
61 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
62 |
+
test_scores_std = np.std(test_scores, axis=1)
|
63 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
64 |
+
test_scores_std)
|
65 |
+
|
66 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
67 |
+
|
68 |
+
container.set_info(info)
|
69 |
+
container.set_status("trained")
|
70 |
+
container.set_model(best_model)
|
71 |
+
|
72 |
+
return container
|
73 |
+
|
74 |
+
|
75 |
+
class SVMClassifierParams:
|
76 |
+
@classmethod
|
77 |
+
def get_params_type(cls):
|
78 |
+
return {
|
79 |
+
"C": StaticValue.FLOAT,
|
80 |
+
"kernel": StaticValue.STR,
|
81 |
+
"gamma": StaticValue.FLOAT,
|
82 |
+
'random_state': StaticValue.INT
|
83 |
+
}
|
84 |
+
|
85 |
+
@classmethod
|
86 |
+
def get_params(cls):
|
87 |
+
return {
|
88 |
+
"C": [0.1, 1, 10, 100],
|
89 |
+
"kernel": ['linear', 'rbf', 'poly'],
|
90 |
+
"gamma": [0.1, 1, 10],
|
91 |
+
'random_state': [StaticValue.RANDOM_STATE]
|
92 |
+
}
|
93 |
+
|
94 |
+
|
95 |
+
# 支持向量机分类
|
96 |
+
def svm_classifier(container, params_list):
|
97 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
98 |
+
info = {}
|
99 |
+
|
100 |
+
params_list = transform_params_list(SVMClassifierParams, params_list)
|
101 |
+
|
102 |
+
svm_classifier_model = SVC(kernel="rbf", random_state=StaticValue.RANDOM_STATE)
|
103 |
+
params = params_list
|
104 |
+
|
105 |
+
if hyper_params_optimize == "grid_search":
|
106 |
+
best_model = grid_search(params, svm_classifier_model, x_train, y_train)
|
107 |
+
elif hyper_params_optimize == "bayes_search":
|
108 |
+
best_model = bayes_search(params, svm_classifier_model, x_train, y_train)
|
109 |
+
else:
|
110 |
+
best_model = svm_classifier_model
|
111 |
+
best_model.fit(x_train, y_train)
|
112 |
+
|
113 |
+
info["参数"] = best_model.get_params()
|
114 |
+
|
115 |
+
y_pred = best_model.predict(x_test)
|
116 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
117 |
+
container.set_y_pred(y_pred)
|
118 |
+
|
119 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
120 |
+
|
121 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
122 |
+
train_scores_std = np.std(train_scores, axis=1)
|
123 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
124 |
+
test_scores_std = np.std(test_scores, axis=1)
|
125 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
126 |
+
test_scores_std)
|
127 |
+
|
128 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
129 |
+
|
130 |
+
container.set_info(info)
|
131 |
+
container.set_status("trained")
|
132 |
+
container.set_model(best_model)
|
133 |
+
|
134 |
+
return container
|
analysis/model_train/linear_model.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import gradio as gr
|
3 |
+
from sklearn.linear_model import ElasticNet
|
4 |
+
from sklearn.linear_model import Lasso
|
5 |
+
from sklearn.linear_model import LinearRegression
|
6 |
+
from sklearn.linear_model import LogisticRegression
|
7 |
+
from sklearn.linear_model import Ridge
|
8 |
+
from sklearn.model_selection import learning_curve
|
9 |
+
from sklearn.pipeline import Pipeline
|
10 |
+
from sklearn.preprocessing import PolynomialFeatures
|
11 |
+
|
12 |
+
from functions.process import get_values_from_container_class, transform_params_list
|
13 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
14 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
15 |
+
from analysis.others.hyperparam_optimize import *
|
16 |
+
from classes.static_custom_class import StaticValue
|
17 |
+
|
18 |
+
|
19 |
+
class LinearRegressionParams:
|
20 |
+
@classmethod
|
21 |
+
def get_params_type(cls, sort):
|
22 |
+
if sort in ["Lasso", "Ridge", "ElasticNet"]:
|
23 |
+
return {
|
24 |
+
"fit_intercept": StaticValue.BOOL,
|
25 |
+
"alpha": StaticValue.FLOAT,
|
26 |
+
"random_state": StaticValue.INT
|
27 |
+
}
|
28 |
+
else:
|
29 |
+
return {
|
30 |
+
"fit_intercept": StaticValue.BOOL
|
31 |
+
}
|
32 |
+
|
33 |
+
@classmethod
|
34 |
+
def get_params(cls, sort):
|
35 |
+
if sort in ["Lasso", "Ridge", "ElasticNet"]:
|
36 |
+
return {
|
37 |
+
"fit_intercept": [True, False],
|
38 |
+
"alpha": [0.001, 0.01, 0.1, 1.0, 10.0],
|
39 |
+
"random_state": [StaticValue.RANDOM_STATE]
|
40 |
+
}
|
41 |
+
else:
|
42 |
+
return {
|
43 |
+
"fit_intercept": [True, False]
|
44 |
+
}
|
45 |
+
|
46 |
+
|
47 |
+
# 线性回归
|
48 |
+
def linear_regressor(container, params_list, model=None):
|
49 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
50 |
+
info = {}
|
51 |
+
|
52 |
+
input_params = transform_params_list(LinearRegressionParams, params_list, model)
|
53 |
+
|
54 |
+
if model == "Lasso":
|
55 |
+
linear_regression_model = Lasso(alpha=0.1, random_state=StaticValue.RANDOM_STATE)
|
56 |
+
params = input_params
|
57 |
+
elif model == "Ridge":
|
58 |
+
linear_regression_model = Ridge(alpha=0.1, random_state=StaticValue.RANDOM_STATE)
|
59 |
+
params = input_params
|
60 |
+
elif model == "ElasticNet":
|
61 |
+
linear_regression_model = ElasticNet(alpha=0.1, random_state=StaticValue.RANDOM_STATE)
|
62 |
+
params = input_params
|
63 |
+
elif model == "LinearRegression":
|
64 |
+
linear_regression_model = LinearRegression()
|
65 |
+
params = input_params
|
66 |
+
else:
|
67 |
+
linear_regression_model = LinearRegression()
|
68 |
+
params = input_params
|
69 |
+
|
70 |
+
try:
|
71 |
+
if hyper_params_optimize == "grid_search":
|
72 |
+
best_model = grid_search(params, linear_regression_model, x_train, y_train)
|
73 |
+
elif hyper_params_optimize == "bayes_search":
|
74 |
+
best_model = bayes_search(params, linear_regression_model, x_train, y_train)
|
75 |
+
else:
|
76 |
+
best_model = linear_regression_model
|
77 |
+
best_model.fit(x_train, y_train)
|
78 |
+
except Exception:
|
79 |
+
gr.Warning("超参数设置有误,将按照默认模型训练")
|
80 |
+
best_model = LinearRegression()
|
81 |
+
best_model.fit(x_train, y_train)
|
82 |
+
|
83 |
+
info["参数"] = best_model.get_params()
|
84 |
+
|
85 |
+
# lr_intercept = best_model.intercept_
|
86 |
+
# info["Intercept of linear regression equation"] = lr_intercept
|
87 |
+
#
|
88 |
+
# lr_coef = best_model.coef_
|
89 |
+
# info["Coefficients of linear regression equation"] = lr_coef
|
90 |
+
|
91 |
+
y_pred = best_model.predict(x_test)
|
92 |
+
container.set_y_pred(y_pred)
|
93 |
+
|
94 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
95 |
+
|
96 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
97 |
+
train_scores_std = np.std(train_scores, axis=1)
|
98 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
99 |
+
test_scores_std = np.std(test_scores, axis=1)
|
100 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
101 |
+
test_scores_std)
|
102 |
+
|
103 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
104 |
+
|
105 |
+
container.set_info(info)
|
106 |
+
container.set_status("trained")
|
107 |
+
container.set_model(best_model)
|
108 |
+
|
109 |
+
return container
|
110 |
+
|
111 |
+
|
112 |
+
class PolynomialRegressionParams:
|
113 |
+
@classmethod
|
114 |
+
def get_params_type(cls):
|
115 |
+
return {
|
116 |
+
"polynomial_features__degree": StaticValue.INT,
|
117 |
+
"linear_regression_model__fit_intercept": StaticValue.BOOL
|
118 |
+
}
|
119 |
+
|
120 |
+
@classmethod
|
121 |
+
def get_params(cls):
|
122 |
+
return {
|
123 |
+
"polynomial_features__degree": [2, 3],
|
124 |
+
"linear_regression_model__fit_intercept": [True, False]
|
125 |
+
}
|
126 |
+
|
127 |
+
|
128 |
+
# 多项式回归
|
129 |
+
def polynomial_regressor(container, params_list):
|
130 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
131 |
+
info = {}
|
132 |
+
|
133 |
+
params_list = transform_params_list(PolynomialRegressionParams, params_list)
|
134 |
+
|
135 |
+
polynomial_features = PolynomialFeatures(degree=2)
|
136 |
+
linear_regression_model = LinearRegression()
|
137 |
+
|
138 |
+
polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
|
139 |
+
("linear_regression_model", linear_regression_model)])
|
140 |
+
params = params_list
|
141 |
+
|
142 |
+
if hyper_params_optimize == "grid_search":
|
143 |
+
best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
|
144 |
+
elif hyper_params_optimize == "bayes_search":
|
145 |
+
best_model = bayes_search(params, polynomial_regression_model, x_train, y_train)
|
146 |
+
else:
|
147 |
+
best_model = polynomial_regression_model
|
148 |
+
best_model.fit(x_train, y_train)
|
149 |
+
|
150 |
+
info["参数"] = best_model.get_params()
|
151 |
+
|
152 |
+
# feature_names = best_model["polynomial_features"].get_feature_names_out()
|
153 |
+
# info["Feature names of polynomial regression"] = feature_names
|
154 |
+
#
|
155 |
+
# lr_intercept = best_model["linear_regression_model"].intercept_
|
156 |
+
# info["Intercept of polynomial regression equation"] = lr_intercept
|
157 |
+
#
|
158 |
+
# lr_coef = best_model["linear_regression_model"].coef_
|
159 |
+
# info["Coefficients of polynomial regression equation"] = lr_coef
|
160 |
+
|
161 |
+
x_test_ = best_model["polynomial_features"].fit_transform(x_test)
|
162 |
+
y_pred = best_model["linear_regression_model"].predict(x_test_)
|
163 |
+
container.set_y_pred(y_pred)
|
164 |
+
|
165 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
166 |
+
|
167 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
168 |
+
train_scores_std = np.std(train_scores, axis=1)
|
169 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
170 |
+
test_scores_std = np.std(test_scores, axis=1)
|
171 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
|
172 |
+
|
173 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
174 |
+
|
175 |
+
container.set_info(info)
|
176 |
+
container.set_status("trained")
|
177 |
+
container.set_model(best_model)
|
178 |
+
|
179 |
+
return container
|
180 |
+
|
181 |
+
|
182 |
+
class LogisticRegressionParams:
|
183 |
+
@classmethod
|
184 |
+
def get_params_type(cls):
|
185 |
+
return {
|
186 |
+
"C": StaticValue.FLOAT,
|
187 |
+
"max_iter": StaticValue.INT,
|
188 |
+
"solver": StaticValue.STR,
|
189 |
+
"random_state": StaticValue.INT
|
190 |
+
}
|
191 |
+
|
192 |
+
@classmethod
|
193 |
+
def get_params(cls):
|
194 |
+
return {
|
195 |
+
"C": [0.001, 0.01, 0.1, 1.0, 10.0],
|
196 |
+
"max_iter": [100, 200, 300],
|
197 |
+
"solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"],
|
198 |
+
"random_state": [StaticValue.RANDOM_STATE]
|
199 |
+
}
|
200 |
+
|
201 |
+
|
202 |
+
# 逻辑斯谛分类
|
203 |
+
def logistic_classifier(container, params_list):
|
204 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
205 |
+
info = {}
|
206 |
+
|
207 |
+
params_list = transform_params_list(LogisticRegressionParams, params_list)
|
208 |
+
|
209 |
+
logistic_regression_model = LogisticRegression(random_state=StaticValue.RANDOM_STATE)
|
210 |
+
params = params_list
|
211 |
+
|
212 |
+
if hyper_params_optimize == "grid_search":
|
213 |
+
best_model = grid_search(params, logistic_regression_model, x_train, y_train)
|
214 |
+
elif hyper_params_optimize == "bayes_search":
|
215 |
+
best_model = bayes_search(params, logistic_regression_model, x_train, y_train)
|
216 |
+
else:
|
217 |
+
best_model = logistic_regression_model
|
218 |
+
best_model.fit(x_train, y_train)
|
219 |
+
|
220 |
+
info["参数"] = best_model.get_params()
|
221 |
+
|
222 |
+
# lr_intercept = best_model.intercept_
|
223 |
+
# info["Intercept of logistic regression equation"] = lr_intercept.tolist()
|
224 |
+
#
|
225 |
+
# lr_coef = best_model.coef_
|
226 |
+
# info["Coefficients of logistic regression equation"] = lr_coef.tolist()
|
227 |
+
|
228 |
+
y_pred = best_model.predict(x_test)
|
229 |
+
container.set_y_pred(y_pred)
|
230 |
+
|
231 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
232 |
+
|
233 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
234 |
+
train_scores_std = np.std(train_scores, axis=1)
|
235 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
236 |
+
test_scores_std = np.std(test_scores, axis=1)
|
237 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
238 |
+
test_scores_std)
|
239 |
+
|
240 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
241 |
+
|
242 |
+
container.set_info(info)
|
243 |
+
container.set_status("trained")
|
244 |
+
container.set_model(best_model)
|
245 |
+
|
246 |
+
return container
|
analysis/model_train/tree_model.py
ADDED
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import lightgbm as lightGBMClassifier
|
2 |
+
from sklearn.ensemble import RandomForestClassifier
|
3 |
+
from sklearn.ensemble import RandomForestRegressor
|
4 |
+
from sklearn.model_selection import learning_curve
|
5 |
+
from sklearn.tree import DecisionTreeClassifier
|
6 |
+
from xgboost import XGBClassifier
|
7 |
+
|
8 |
+
from analysis.others.shap_model import *
|
9 |
+
from functions.process import get_values_from_container_class, transform_params_list
|
10 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
11 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
12 |
+
from analysis.others.hyperparam_optimize import *
|
13 |
+
from classes.static_custom_class import StaticValue
|
14 |
+
|
15 |
+
|
16 |
+
class RandomForestRegressionParams:
|
17 |
+
@classmethod
|
18 |
+
def get_params_type(cls):
|
19 |
+
return {
|
20 |
+
'n_estimators': StaticValue.INT,
|
21 |
+
'max_depth': StaticValue.INT,
|
22 |
+
'min_samples_split': StaticValue.INT,
|
23 |
+
'min_samples_leaf': StaticValue.INT,
|
24 |
+
'random_state': StaticValue.INT
|
25 |
+
}
|
26 |
+
|
27 |
+
@classmethod
|
28 |
+
def get_params(cls):
|
29 |
+
return {
|
30 |
+
'n_estimators': [10, 50, 100, 200],
|
31 |
+
'max_depth': [0, 10, 20, 30],
|
32 |
+
'min_samples_split': [2, 5, 10],
|
33 |
+
'min_samples_leaf': [1, 2, 4],
|
34 |
+
'random_state': [StaticValue.RANDOM_STATE]
|
35 |
+
}
|
36 |
+
|
37 |
+
|
38 |
+
# 随机森林回归
|
39 |
+
def random_forest_regressor(container, params_list):
|
40 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
41 |
+
info = {}
|
42 |
+
|
43 |
+
params_list = transform_params_list(RandomForestRegressionParams, params_list)
|
44 |
+
|
45 |
+
random_forest_regression_model = RandomForestRegressor(n_estimators=5, random_state=StaticValue.RANDOM_STATE)
|
46 |
+
params = params_list
|
47 |
+
|
48 |
+
if hyper_params_optimize == "grid_search":
|
49 |
+
best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
|
50 |
+
elif hyper_params_optimize == "bayes_search":
|
51 |
+
best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
|
52 |
+
else:
|
53 |
+
best_model = random_forest_regression_model
|
54 |
+
best_model.fit(x_train, y_train)
|
55 |
+
|
56 |
+
info["参数"] = best_model.get_params()
|
57 |
+
|
58 |
+
y_pred = best_model.predict(x_test)
|
59 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
60 |
+
container.set_y_pred(y_pred)
|
61 |
+
|
62 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
63 |
+
|
64 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
65 |
+
train_scores_std = np.std(train_scores, axis=1)
|
66 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
67 |
+
test_scores_std = np.std(test_scores, axis=1)
|
68 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
69 |
+
test_scores_std)
|
70 |
+
|
71 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
72 |
+
|
73 |
+
container.set_info(info)
|
74 |
+
container.set_status("trained")
|
75 |
+
container.set_model(best_model)
|
76 |
+
|
77 |
+
return container
|
78 |
+
|
79 |
+
|
80 |
+
class DecisionTreeClassifierParams:
|
81 |
+
@classmethod
|
82 |
+
def get_params_type(cls):
|
83 |
+
return {
|
84 |
+
"criterion": StaticValue.STR,
|
85 |
+
"splitter": StaticValue.STR,
|
86 |
+
"max_depth": StaticValue.INT,
|
87 |
+
"min_samples_split": StaticValue.INT,
|
88 |
+
"min_samples_leaf": StaticValue.INT,
|
89 |
+
'random_state': StaticValue.INT
|
90 |
+
}
|
91 |
+
|
92 |
+
@classmethod
|
93 |
+
def get_params(cls):
|
94 |
+
return {
|
95 |
+
"criterion": ["gini", "entropy"],
|
96 |
+
"splitter": ["best", "random"],
|
97 |
+
"max_depth": [0, 5, 10, 15],
|
98 |
+
"min_samples_split": [2, 5, 10],
|
99 |
+
"min_samples_leaf": [1, 2, 4],
|
100 |
+
'random_state': [StaticValue.RANDOM_STATE]
|
101 |
+
}
|
102 |
+
|
103 |
+
|
104 |
+
# 决策树分类
|
105 |
+
def decision_tree_classifier(container, params_list):
|
106 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
107 |
+
info = {}
|
108 |
+
|
109 |
+
params_list = transform_params_list(DecisionTreeClassifierParams, params_list)
|
110 |
+
|
111 |
+
random_forest_regression_model = DecisionTreeClassifier(random_state=StaticValue.RANDOM_STATE)
|
112 |
+
params = params_list
|
113 |
+
|
114 |
+
if hyper_params_optimize == "grid_search":
|
115 |
+
best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
|
116 |
+
elif hyper_params_optimize == "bayes_search":
|
117 |
+
best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
|
118 |
+
else:
|
119 |
+
best_model = random_forest_regression_model
|
120 |
+
best_model.fit(x_train, y_train)
|
121 |
+
|
122 |
+
info["参数"] = best_model.get_params()
|
123 |
+
|
124 |
+
y_pred = best_model.predict(x_test)
|
125 |
+
container.set_y_pred(y_pred)
|
126 |
+
|
127 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
128 |
+
|
129 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
130 |
+
train_scores_std = np.std(train_scores, axis=1)
|
131 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
132 |
+
test_scores_std = np.std(test_scores, axis=1)
|
133 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
134 |
+
test_scores_std)
|
135 |
+
|
136 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
137 |
+
|
138 |
+
container.set_info(info)
|
139 |
+
container.set_status("trained")
|
140 |
+
container.set_model(best_model)
|
141 |
+
|
142 |
+
return container
|
143 |
+
|
144 |
+
|
145 |
+
class RandomForestClassifierParams:
|
146 |
+
@classmethod
|
147 |
+
def get_params_type(cls):
|
148 |
+
return {
|
149 |
+
"criterion": StaticValue.STR,
|
150 |
+
"n_estimators": StaticValue.INT,
|
151 |
+
"max_depth": StaticValue.INT,
|
152 |
+
"min_samples_split": StaticValue.INT,
|
153 |
+
"min_samples_leaf": StaticValue.INT,
|
154 |
+
"random_state": StaticValue.INT
|
155 |
+
}
|
156 |
+
|
157 |
+
@classmethod
|
158 |
+
def get_params(cls):
|
159 |
+
return {
|
160 |
+
"criterion": ["gini", "entropy"],
|
161 |
+
"n_estimators": [50, 100, 150],
|
162 |
+
"max_depth": [0, 5, 10, 15],
|
163 |
+
"min_samples_split": [2, 5, 10],
|
164 |
+
"min_samples_leaf": [1, 2, 4],
|
165 |
+
"random_state": [StaticValue.RANDOM_STATE]
|
166 |
+
}
|
167 |
+
|
168 |
+
|
169 |
+
# 随机森林分类
|
170 |
+
def random_forest_classifier(container, params_list):
|
171 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
172 |
+
info = {}
|
173 |
+
|
174 |
+
params_list = transform_params_list(RandomForestClassifierParams, params_list)
|
175 |
+
|
176 |
+
random_forest_classifier_model = RandomForestClassifier(n_estimators=5, random_state=StaticValue.RANDOM_STATE)
|
177 |
+
params = params_list
|
178 |
+
|
179 |
+
if hyper_params_optimize == "grid_search":
|
180 |
+
best_model = grid_search(params, random_forest_classifier_model, x_train, y_train)
|
181 |
+
elif hyper_params_optimize == "bayes_search":
|
182 |
+
best_model = bayes_search(params, random_forest_classifier_model, x_train, y_train)
|
183 |
+
else:
|
184 |
+
best_model = random_forest_classifier_model
|
185 |
+
best_model.fit(x_train, y_train)
|
186 |
+
|
187 |
+
info["参数"] = best_model.get_params()
|
188 |
+
|
189 |
+
y_pred = best_model.predict(x_test)
|
190 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
191 |
+
container.set_y_pred(y_pred)
|
192 |
+
|
193 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
194 |
+
|
195 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
196 |
+
train_scores_std = np.std(train_scores, axis=1)
|
197 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
198 |
+
test_scores_std = np.std(test_scores, axis=1)
|
199 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
200 |
+
test_scores_std)
|
201 |
+
|
202 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
203 |
+
|
204 |
+
container.set_info(info)
|
205 |
+
container.set_status("trained")
|
206 |
+
container.set_model(best_model)
|
207 |
+
|
208 |
+
return container
|
209 |
+
|
210 |
+
|
211 |
+
class XgboostClassifierParams:
|
212 |
+
@classmethod
|
213 |
+
def get_params_type(cls):
|
214 |
+
return {
|
215 |
+
"n_estimators": StaticValue.INT,
|
216 |
+
"learning_rate": StaticValue.FLOAT,
|
217 |
+
"max_depth": StaticValue.INT,
|
218 |
+
"min_child_weight": StaticValue.INT,
|
219 |
+
"gamma": StaticValue.FLOAT,
|
220 |
+
"subsample": StaticValue.FLOAT,
|
221 |
+
"colsample_bytree": StaticValue.FLOAT,
|
222 |
+
"random_state": StaticValue.INT
|
223 |
+
}
|
224 |
+
|
225 |
+
@classmethod
|
226 |
+
def get_params(cls):
|
227 |
+
return {
|
228 |
+
"n_estimators": [50, 100, 150],
|
229 |
+
"learning_rate": [0.01, 0.1, 0.2],
|
230 |
+
"max_depth": [3, 4, 5],
|
231 |
+
"min_child_weight": [1, 2, 3],
|
232 |
+
"gamma": [0, 0.1, 0.2],
|
233 |
+
"subsample": [0.5, 0.8, 0.9, 1.0],
|
234 |
+
"colsample_bytree": [0.8, 0.9, 1.0],
|
235 |
+
"random_state": [StaticValue.RANDOM_STATE]
|
236 |
+
}
|
237 |
+
|
238 |
+
|
239 |
+
# xgboost分类
|
240 |
+
def xgboost_classifier(container, params_list):
|
241 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
242 |
+
info = {}
|
243 |
+
|
244 |
+
params_list = transform_params_list(XgboostClassifierParams, params_list)
|
245 |
+
|
246 |
+
xgboost_classifier_model = XGBClassifier(random_state=StaticValue.RANDOM_STATE)
|
247 |
+
params = params_list
|
248 |
+
|
249 |
+
if hyper_params_optimize == "grid_search":
|
250 |
+
best_model = grid_search(params, xgboost_classifier_model, x_train, y_train)
|
251 |
+
elif hyper_params_optimize == "bayes_search":
|
252 |
+
best_model = bayes_search(params, xgboost_classifier_model, x_train, y_train)
|
253 |
+
else:
|
254 |
+
best_model = xgboost_classifier_model
|
255 |
+
best_model.fit(x_train, y_train)
|
256 |
+
|
257 |
+
info["参数"] = best_model.get_params()
|
258 |
+
|
259 |
+
y_pred = best_model.predict(x_test)
|
260 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
261 |
+
container.set_y_pred(y_pred)
|
262 |
+
|
263 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
264 |
+
|
265 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
266 |
+
train_scores_std = np.std(train_scores, axis=1)
|
267 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
268 |
+
test_scores_std = np.std(test_scores, axis=1)
|
269 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
270 |
+
test_scores_std)
|
271 |
+
|
272 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
273 |
+
|
274 |
+
container.set_info(info)
|
275 |
+
container.set_status("trained")
|
276 |
+
container.set_model(best_model)
|
277 |
+
|
278 |
+
return container
|
279 |
+
|
280 |
+
|
281 |
+
class LightGBMClassifierParams:
|
282 |
+
@classmethod
|
283 |
+
def get_params(cls):
|
284 |
+
return
|
285 |
+
|
286 |
+
|
287 |
+
# lightGBM分类
|
288 |
+
def lightGBM_classifier(container, params_list):
|
289 |
+
x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
|
290 |
+
info = {}
|
291 |
+
|
292 |
+
params_list = transform_params_list(LightGBMClassifierParams, params_list)
|
293 |
+
|
294 |
+
lightgbm_classifier_model = lightGBMClassifier
|
295 |
+
params = params_list
|
296 |
+
|
297 |
+
if hyper_params_optimize == "grid_search":
|
298 |
+
best_model = grid_search(params, lightgbm_classifier_model, x_train, y_train)
|
299 |
+
elif hyper_params_optimize == "bayes_search":
|
300 |
+
best_model = bayes_search(params, lightgbm_classifier_model, x_train, y_train)
|
301 |
+
else:
|
302 |
+
best_model = lightgbm_classifier_model
|
303 |
+
best_model.train(x_train, y_train)
|
304 |
+
|
305 |
+
info["参数"] = best_model.get_params()
|
306 |
+
|
307 |
+
y_pred = best_model.predict(x_test)
|
308 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
309 |
+
container.set_y_pred(y_pred)
|
310 |
+
|
311 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
312 |
+
|
313 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
314 |
+
train_scores_std = np.std(train_scores, axis=1)
|
315 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
316 |
+
test_scores_std = np.std(test_scores, axis=1)
|
317 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
318 |
+
test_scores_std)
|
319 |
+
|
320 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
321 |
+
|
322 |
+
container.set_info(info)
|
323 |
+
container.set_status("trained")
|
324 |
+
container.set_model(best_model)
|
325 |
+
|
326 |
+
return container
|
327 |
+
|
328 |
+
|
329 |
+
|
analysis/others/hyperparam_optimize.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.model_selection import GridSearchCV
|
2 |
+
from skopt import BayesSearchCV
|
3 |
+
|
4 |
+
|
5 |
+
def grid_search(params, model, x_train, y_train, scoring=None):
|
6 |
+
info = {}
|
7 |
+
|
8 |
+
grid_search_model = GridSearchCV(model, params, cv=3, n_jobs=-1)
|
9 |
+
|
10 |
+
grid_search_model.fit(x_train, y_train.ravel())
|
11 |
+
|
12 |
+
info["Optimal hyperparameters"] = grid_search_model.best_params_
|
13 |
+
|
14 |
+
best_model = grid_search_model.best_estimator_
|
15 |
+
|
16 |
+
return best_model
|
17 |
+
|
18 |
+
|
19 |
+
def bayes_search(params, model, x_train, y_train, scoring=None):
|
20 |
+
info = {}
|
21 |
+
|
22 |
+
bayes_search_model = BayesSearchCV(model, params, cv=3, n_iter=50, n_jobs=-1)
|
23 |
+
|
24 |
+
bayes_search_model.fit(x_train, y_train)
|
25 |
+
|
26 |
+
info["Optimal hyperparameters"] = bayes_search_model.best_params_
|
27 |
+
|
28 |
+
best_model = bayes_search_model.best_estimator_
|
29 |
+
|
30 |
+
return best_model
|
analysis/others/shap_model.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
+
import shap
|
4 |
+
|
5 |
+
|
6 |
+
def draw_shap_beeswarm(model, x, feature_names, type, paint_object):
|
7 |
+
explainer = shap.KernelExplainer(model.predict, x)
|
8 |
+
shap_values = explainer(x)
|
9 |
+
|
10 |
+
shap.summary_plot(shap_values, x, feature_names=feature_names, plot_type=type, show=False)
|
11 |
+
|
12 |
+
plt.title(paint_object.get_name())
|
13 |
+
plt.tight_layout()
|
14 |
+
|
15 |
+
return plt, paint_object
|
16 |
+
|
17 |
+
|
18 |
+
def draw_waterfall(model, x, feature_names, number, paint_object):
|
19 |
+
explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
|
20 |
+
shap_values = explainer(x)
|
21 |
+
|
22 |
+
shap.waterfall_plot(shap_values[number], show=False)
|
23 |
+
|
24 |
+
plt.title(paint_object.get_name())
|
25 |
+
plt.tight_layout()
|
26 |
+
|
27 |
+
return plt, paint_object
|
28 |
+
|
29 |
+
|
30 |
+
def draw_force(model, x, feature_names, number, paint_object):
|
31 |
+
explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
|
32 |
+
shap_values = explainer(x[number])
|
33 |
+
|
34 |
+
shap.force_plot(explainer.expected_value, shap_values.values, feature_names=feature_names, show=False, matplotlib=True)
|
35 |
+
|
36 |
+
plt.title(paint_object.get_name())
|
37 |
+
plt.tight_layout()
|
38 |
+
|
39 |
+
return plt, paint_object
|
40 |
+
|
41 |
+
|
42 |
+
def draw_dependence(model, x, feature_names, col, paint_object):
|
43 |
+
explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
|
44 |
+
shap_values = explainer(x)
|
45 |
+
|
46 |
+
shap.dependence_plot(feature_names.index(col), shap_values.values, x, feature_names=feature_names, show=False)
|
47 |
+
|
48 |
+
plt.title(paint_object.get_name())
|
49 |
+
plt.tight_layout()
|
50 |
+
|
51 |
+
return plt, paint_object
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
|
app.py
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
classes/__init__.py
ADDED
File without changes
|
classes/static_custom_class.py
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# 全局静态变量值存储类
|
2 |
+
class StaticValue:
|
3 |
+
# 超参数文本框的最大组件数量
|
4 |
+
MAX_PARAMS_NUM = 60
|
5 |
+
# 颜色和标签显示的最大组件数量
|
6 |
+
MAX_NUM = 20
|
7 |
+
# 随机种子 (数据集切分+模型训练)
|
8 |
+
RANDOM_STATE = 123
|
9 |
+
|
10 |
+
# 参数类型
|
11 |
+
INT = "int"
|
12 |
+
FLOAT = "float"
|
13 |
+
BOOL = "bool"
|
14 |
+
STR = "str"
|
15 |
+
|
16 |
+
# 画图颜色组重复次数
|
17 |
+
COLOR_ITER_NUM = 3
|
18 |
+
|
19 |
+
# 颜色组
|
20 |
+
COLORS = [
|
21 |
+
"#ca5353",
|
22 |
+
"#c874a5",
|
23 |
+
"#b674c8",
|
24 |
+
"#8274c8",
|
25 |
+
"#748dc8",
|
26 |
+
"#74acc8",
|
27 |
+
"#74c8b7",
|
28 |
+
"#74c88d",
|
29 |
+
"#a6c874",
|
30 |
+
"#e0e27e",
|
31 |
+
"#df9b77",
|
32 |
+
"#404040",
|
33 |
+
"#999999",
|
34 |
+
"#d4d4d4"
|
35 |
+
] * COLOR_ITER_NUM
|
36 |
+
|
37 |
+
COLORS_0 = [
|
38 |
+
"#8074C8",
|
39 |
+
"#7895C1",
|
40 |
+
"#A8CBDF",
|
41 |
+
"#992224",
|
42 |
+
"#B54764",
|
43 |
+
"#E3625D",
|
44 |
+
"#EF8B67",
|
45 |
+
"#F0C284"
|
46 |
+
] * COLOR_ITER_NUM
|
47 |
+
|
48 |
+
COLORS_1 = [
|
49 |
+
"#4A5F7E",
|
50 |
+
"#719AAC",
|
51 |
+
"#72B063",
|
52 |
+
"#94C6CD",
|
53 |
+
"#B8DBB3",
|
54 |
+
"#E29135"
|
55 |
+
] * COLOR_ITER_NUM
|
56 |
+
|
57 |
+
COLORS_2 = [
|
58 |
+
"#4485C7",
|
59 |
+
"#D4562E",
|
60 |
+
"#DBB428",
|
61 |
+
"#682487",
|
62 |
+
"#84BA42",
|
63 |
+
"#7ABBDB",
|
64 |
+
"#A51C36"
|
65 |
+
] * COLOR_ITER_NUM
|
66 |
+
|
67 |
+
COLORS_3 = [
|
68 |
+
"#8074C8",
|
69 |
+
"#7895C1",
|
70 |
+
"#A8CBDF",
|
71 |
+
"#F5EBAE",
|
72 |
+
"#F0C284",
|
73 |
+
"#EF8B67",
|
74 |
+
"#E3625D",
|
75 |
+
"#B54764"
|
76 |
+
] * COLOR_ITER_NUM
|
77 |
+
|
78 |
+
COLORS_4 = [
|
79 |
+
"#979998",
|
80 |
+
"#C69287",
|
81 |
+
"#E79A90",
|
82 |
+
"#EFBC91",
|
83 |
+
"#E4CD87",
|
84 |
+
"#FAE5BB",
|
85 |
+
"#DDDDDF"
|
86 |
+
] * COLOR_ITER_NUM
|
87 |
+
|
88 |
+
COLORS_5 = [
|
89 |
+
"#91CCC0",
|
90 |
+
"#7FABD1",
|
91 |
+
"#F7AC53",
|
92 |
+
"#EC6E66",
|
93 |
+
"#B5CE4E",
|
94 |
+
"#BD7795",
|
95 |
+
"#7C7979"
|
96 |
+
] * COLOR_ITER_NUM
|
97 |
+
|
98 |
+
COLORS_6 = [
|
99 |
+
"#E9687A",
|
100 |
+
"#F58F7A",
|
101 |
+
"#FDE2D8",
|
102 |
+
"#CFCFD0",
|
103 |
+
"#B6B3D6"
|
104 |
+
] * COLOR_ITER_NUM
|
105 |
+
|
106 |
+
|
107 |
+
# 文件路径相关静态变量存储类
|
108 |
+
class FilePath:
|
109 |
+
png_base = "./buffer/{}.png"
|
110 |
+
excel_base = "./buffer/{}.xlsx"
|
111 |
+
|
112 |
+
# [绘图]
|
113 |
+
display_dataset = "current_excel_data"
|
114 |
+
|
115 |
+
data_distribution_plot = "data_distribution_plot"
|
116 |
+
descriptive_indicators_plot = "descriptive_indicators_plot"
|
117 |
+
heatmap_plot = "heatmap_plot"
|
118 |
+
learning_curve_plot = "learning_curve_plot"
|
119 |
+
shap_beeswarm_plot = "shap_beeswarm_plot"
|
120 |
+
data_fit_plot = "data_fit_plot"
|
121 |
+
waterfall_plot = "waterfall_plot"
|
122 |
+
force_plot = "force_plot"
|
123 |
+
dependence_plot = "dependence_plot"
|
124 |
+
# 绘图Step 15:在这里添加新的绘图方法名称
|
125 |
+
|
126 |
+
|
127 |
+
# 模型名称静态变量存储类
|
128 |
+
class MN: # ModelName
|
129 |
+
classification = "classification"
|
130 |
+
regression = "regression"
|
131 |
+
|
132 |
+
# [模型]
|
133 |
+
linear_regressor = "linear regressor"
|
134 |
+
polynomial_regressor = "polynomial regressor"
|
135 |
+
logistic_classifier = "logistic classifier"
|
136 |
+
decision_tree_classifier = "decision tree classifier"
|
137 |
+
random_forest_classifier = "random forest classifier"
|
138 |
+
random_forest_regressor = "random forest regressor"
|
139 |
+
xgboost_classifier = "xgboost classifier"
|
140 |
+
lightGBM_classifier = "lightGBM classifier"
|
141 |
+
gradient_boosting_regressor = "gradient boosting regressor"
|
142 |
+
svm_classifier = "svm classifier"
|
143 |
+
svm_regressor = "svm regressor"
|
144 |
+
knn_classifier = "knn classifier"
|
145 |
+
knn_regressor = "knn regressor"
|
146 |
+
naive_bayes_classifier = "naive bayes classifier"
|
147 |
+
# 模型Step 4:在这里添加新的模型名称
|
148 |
+
|
149 |
+
# [绘图]
|
150 |
+
data_distribution = "data_distribution"
|
151 |
+
descriptive_indicators = "descriptive_indicators"
|
152 |
+
heatmap = "heatmap"
|
153 |
+
learning_curve = "learning_curve"
|
154 |
+
shap_beeswarm = "shap_beeswarm"
|
155 |
+
data_fit = "data_fit"
|
156 |
+
waterfall = "waterfall"
|
157 |
+
force = "force"
|
158 |
+
dependence = "dependence"
|
159 |
+
# 绘图Step 4:在这里添加新的绘图方法名称
|
160 |
+
|
161 |
+
|
162 |
+
# 组件标签名称静态变量存储类
|
163 |
+
class LN: # LabelName
|
164 |
+
choose_dataset_radio = "选择所需数据源 [必选]"
|
165 |
+
display_total_col_num_text = "总列数"
|
166 |
+
display_total_row_num_text = "总行数"
|
167 |
+
display_na_list_text = "存在缺失值的列"
|
168 |
+
del_all_na_col_button = "删除所有存在缺失值的列 [可选]"
|
169 |
+
display_duplicate_num_text = "重复的行���"
|
170 |
+
del_col_checkboxgroup = "选择所需删除的列"
|
171 |
+
del_col_button = "删除 [可选]"
|
172 |
+
remain_row_slider = "保留的行数"
|
173 |
+
remain_row_button = "保留 [可选]"
|
174 |
+
del_duplicate_button = "删除所有重复行 [可选]"
|
175 |
+
encode_label_checkboxgroup = "选择所需标签编码的字符型数值列"
|
176 |
+
display_encode_label_dataframe = "标签编码信息"
|
177 |
+
encode_label_button = "字符型转数值型 [可选]"
|
178 |
+
change_data_type_to_float_button = "将所有数据强制转换为浮点型(除第1列以外)[必选]"
|
179 |
+
standardize_data_checkboxgroup = "选择所需标准化的列"
|
180 |
+
standardize_data_button = "标准化 [可选]"
|
181 |
+
select_as_y_radio = "选择因变量 [必选]"
|
182 |
+
choose_assign_radio = "选择任务类型(同时会根据任务类型将第1列数据强制转换)[必选]"
|
183 |
+
train_size_textbox = "分割出的训练集所占比例"
|
184 |
+
model_optimize_radio = "选择超参数优化方法"
|
185 |
+
model_train_input_params_dataframe = "超参数列表"
|
186 |
+
model_train_button = "训练"
|
187 |
+
model_train_params_dataframe = "训练后的模型参数"
|
188 |
+
model_train_metrics_dataframe = "训练后的模型指标"
|
189 |
+
select_as_model_radio = "选择所需训练的模型"
|
190 |
+
|
191 |
+
# [模型]
|
192 |
+
linear_regression_model_radio = "选择线性回归的模型"
|
193 |
+
naive_bayes_classification_model_radio = "选择朴素贝叶斯分类的模型"
|
194 |
+
# 模型Step 5:在这里添加新的模型额外组件名称
|
195 |
+
|
196 |
+
title_name_textbox = "标题"
|
197 |
+
x_label_textbox = "x 轴名称"
|
198 |
+
y_label_textbox = "y 轴名称"
|
199 |
+
colors = ["颜色 {}".format(i) for i in range(StaticValue.MAX_NUM)]
|
200 |
+
labels = ["图例 {}".format(i) for i in range(StaticValue.MAX_NUM)]
|
201 |
+
|
202 |
+
# [绘图]
|
203 |
+
heatmap_is_rotate = "x轴标签是否旋转"
|
204 |
+
heatmap_checkboxgroup = "选择所需绘制系数热力图的列"
|
205 |
+
heatmap_button = "绘制系数热力图"
|
206 |
+
data_distribution_radio = "选择所需绘制数据分布图的列"
|
207 |
+
data_distribution_is_rotate = "x轴标签是否旋转"
|
208 |
+
data_distribution_button = "绘制数据分布图"
|
209 |
+
descriptive_indicators_checkboxgroup = "选择所需绘制箱线统计图的列"
|
210 |
+
descriptive_indicators_is_rotate = "x轴标签是否旋转"
|
211 |
+
descriptive_indicators_button = "绘制箱线统计图"
|
212 |
+
learning_curve_checkboxgroup = "选择所需绘制学习曲线图的模型"
|
213 |
+
learning_curve_button = "绘制学习曲线图"
|
214 |
+
shap_beeswarm_radio = "选择所需绘制特征蜂群图的模型"
|
215 |
+
shap_beeswarm_type = "选择图像类型"
|
216 |
+
shap_beeswarm_button = "绘制特征蜂群图"
|
217 |
+
data_fit_checkboxgroup = "选择所需绘制数据拟合图的模型"
|
218 |
+
data_fit_button = "绘制数据拟合图"
|
219 |
+
waterfall_radio = "选择所需绘制特征瀑布图的模型"
|
220 |
+
waterfall_number = "输入相关特征的变量索引"
|
221 |
+
waterfall_button = "绘制特征瀑布图"
|
222 |
+
force_radio = "选择所需绘制特征力图的模型"
|
223 |
+
force_number = "输入相关特征的变量索引"
|
224 |
+
force_button = "绘制特征力图"
|
225 |
+
dependence_radio = "选择所需绘制特征依赖图的模型"
|
226 |
+
dependence_col = "选择相应的列"
|
227 |
+
dependence_button = "绘制特征依赖图"
|
228 |
+
# 绘图Step 5:在这里添加新的绘图方法相关组件名称
|
229 |
+
|
230 |
+
data_distribution_plot = "数据分布图"
|
231 |
+
descriptive_indicators_plot = "箱线统计图"
|
232 |
+
heatmap_plot = "系数热力图"
|
233 |
+
learning_curve_plot = "学习曲线图"
|
234 |
+
shap_beeswarm_plot = "特征蜂群图"
|
235 |
+
data_fit_plot = "数据拟合图"
|
236 |
+
waterfall_plot = "特征瀑布图"
|
237 |
+
force_plot = "特征力图"
|
238 |
+
dependence_plot = "特征依赖图"
|
239 |
+
# 绘图Step 6:在这里添加新的绘图方法名称
|
240 |
+
|
241 |
+
|
242 |
+
|
243 |
+
|
244 |
+
|
245 |
+
|
246 |
+
|
247 |
+
|
248 |
+
|
data/notes.md
CHANGED
@@ -1,12 +1,198 @@
|
|
1 |
# EasyMachineLearning
|
2 |
-
|
|
|
3 |
- 版本:v1.0
|
4 |
- 作者:李凌浩
|
5 |
-
-
|
|
|
|
|
6 |
- *( WX: llh13857750421 )*
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# EasyMachineLearning
|
2 |
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
3 |
+
## 介绍
|
4 |
- 版本:v1.0
|
5 |
- 作者:李凌浩
|
6 |
+
- 有任何出现的问题请私信 或 在Github上反馈 ~
|
7 |
+
- 有任何新功能的想法请加作者微信 ~
|
8 |
+
- 合作请联系作者 ~
|
9 |
- *( WX: llh13857750421 )*
|
10 |
+
- ଘ(੭ˊ꒳ˋ)੭
|
11 |
+
|
12 |
+
## 注意事项
|
13 |
+
- 模型训练和可视化过程暂未实现进度条,后续版本可能会出该功能
|
14 |
+
|
15 |
+
## 解释
|
16 |
+
### 1.数据源
|
17 |
+
##### *i.选择所需数据源 [必选]*
|
18 |
+
```angular2html
|
19 |
+
·自定义:上传自己的Excel表格作为数据源
|
20 |
+
·Iris Dataset: 鸢尾花数据集 (用于分类任务) [sklearn内置数据集]
|
21 |
+
·Wine Dataset: 酒的数据集 (用于分类任务) [sklearn内置数据集]
|
22 |
+
·Breast Cancer Dataset: 乳腺癌数据集 (用于分类任务) [sklearn内置数据集]
|
23 |
+
·Diabetes Dataset: 糖尿病数据集 (用于回归任务) [sklearn内置数据集]
|
24 |
+
·California Housing Dataset: 加利福尼亚房价数据集 (用于回归任务) [程序内置数据集]
|
25 |
+
|
26 |
+
·为机器学习任务的起始操作
|
27 |
+
·更改数据源会自动清理所有已训练的模型缓存和其他部分缓存
|
28 |
+
```
|
29 |
+
### 2.当前数据信息
|
30 |
+
```angular2html
|
31 |
+
·总列数: 当前操作的数据的总列数
|
32 |
+
·总行数: 当前操作的数据的总行数
|
33 |
+
·保留的行数 [可选]: 拖动滑动条可以选择 当前操作的数据中所需保留的行数,其余行全部删除
|
34 |
+
·存在缺失值的列: 显示当前操作的数据中有缺失值如Nan等的列名
|
35 |
+
·删除所有存在缺失值的列 [可选]: 删除当前操作的数据中含有缺失值如Nan等的列
|
36 |
+
·重复的行数: 当前操作的数据中一样的行数
|
37 |
+
·删除所有重复行 [可选]: 删除当前操作的数据中一样的行
|
38 |
+
|
39 |
+
·实时显示当前操作的数据 (只能查看,不能更改)
|
40 |
+
·[*index]列为当前数据索引,非数据内的列 (自动添加该列是为了方便查看数据)
|
41 |
+
·最左边第一列始终为因变量的列,其余为自变量的列
|
42 |
+
·可随时将当前操作的数据下载到本地 (Excel格式)
|
43 |
+
|
44 |
+
```
|
45 |
+
### 3.数据处理
|
46 |
+
##### *i.选择因变量 [必选]*
|
47 |
+
```angular2html
|
48 |
+
·当前操作数据中的所有列名
|
49 |
+
|
50 |
+
·在显示的所有行名字中选择作为任务的因变量的列 (选中后自动将该列移动到当前数据的第一列)
|
51 |
+
```
|
52 |
+
##### *ii.将所有数据强制转换为浮点型 (除第1列以外) [必选]*
|
53 |
+
```angular2html
|
54 |
+
·"列名-数据类型"数据表: 当前操作中的数据的所有列对应的数据类型 (字符型为"object")
|
55 |
+
|
56 |
+
·第1列因变量的列在这里不会成为被转换的对象
|
57 |
+
```
|
58 |
+
##### *iii.选择任务类型 (同时会根据任务类型将第1列数据强制转换) [必选]*
|
59 |
+
```angular2html
|
60 |
+
·分类
|
61 |
+
·回归
|
62 |
+
|
63 |
+
·选择分类会将第1列的数据强制转换为字符型数据
|
64 |
+
·选择回归会将第1列的数据强制转换为浮点型数据
|
65 |
+
```
|
66 |
+
##### *iv.选择所需删除的列*
|
67 |
+
```angular2html
|
68 |
+
·当前操作数据中的所有列名
|
69 |
+
|
70 |
+
·删除 [可选]: 在当前操作的数据中删除所选列
|
71 |
+
```
|
72 |
+
##### *v.选择所需标签编码的字符型数值列*
|
73 |
+
```angular2html
|
74 |
+
·当前操作数据中是字符型数据的所有列名
|
75 |
+
|
76 |
+
·字符型转数值型 [可选]: 将选中的数据列强制转换为浮点型
|
77 |
+
```
|
78 |
+
##### *vi.选择所需标准化的列*
|
79 |
+
```angular2html
|
80 |
+
·当前操作数据中尚未标准化的所有列名
|
81 |
+
|
82 |
+
·标准化 [可选]: 将选中的数据列根据各自列进行标准化 (变成0~1范围)
|
83 |
+
```
|
84 |
+
### 4.数据模型 (上述[必选]全部选择完毕后才会显示!)
|
85 |
+
##### *i.选择所需训练的模型*
|
86 |
+
```angular2html
|
87 |
+
·当前可选择的所有模型名
|
88 |
+
|
89 |
+
·选中后会展示当前选中模型的相关信息和选项
|
90 |
+
```
|
91 |
+
##### *ii.分割出的训练集所占比例*
|
92 |
+
```angular2html
|
93 |
+
·该比例为训练集占所有数据的比例
|
94 |
+
·默认为0.8
|
95 |
+
```
|
96 |
+
##### *iii.选择超参数优化方法*
|
97 |
+
```angular2html
|
98 |
+
·无: 直接训练 (速度快) (如果其他超参数优化方法速度较慢可选择当前项看效果)
|
99 |
+
·网格搜索: (速度相对适中) (请作为模型调参的首选项)
|
100 |
+
·贝叶斯优化: (速度很慢)
|
101 |
+
|
102 |
+
·请优先选择网格搜索
|
103 |
+
·树模型如决策树、随机森林、XGBoost模型的超参数优化速度在数据量大的情况下非常慢,自行考虑是否需要等待
|
104 |
+
·参数量越大速度越慢
|
105 |
+
```
|
106 |
+
##### *iv.超参数列表 (选择超参数优化方法后会显示)*
|
107 |
+
```angular2html
|
108 |
+
·橙色边框的文本框为超参数名称 (可修改)
|
109 |
+
·每行无颜色边框的文本框为超参数对应的各个候选参数 (可修改)
|
110 |
+
|
111 |
+
·将文本框中的参数删除后(空字符串),就相当于删除了该参数
|
112 |
+
·只能修改和减少参数,不能增加
|
113 |
+
·每次加载默认显示每个模型的默认参数字典
|
114 |
+
```
|
115 |
+
##### *v.模型是否完成训练*
|
116 |
+
```angular2html
|
117 |
+
·若选中的该模型训练完成后,会勾选
|
118 |
+
```
|
119 |
+
### 5.数据可视化
|
120 |
+
##### *i.数据分布图*
|
121 |
+
```angular2html
|
122 |
+
·需要选择可视化的相应列
|
123 |
+
·每列数据的数量统计的柱状图
|
124 |
+
```
|
125 |
+
##### *ii.箱线统计图*
|
126 |
+
```angular2html
|
127 |
+
·需要选择可视化的相应列
|
128 |
+
·每列数据的常见统计量的箱线图
|
129 |
+
```
|
130 |
+
##### *iii.系数热力图*
|
131 |
+
```angular2html
|
132 |
+
·需要选择可视化的相应列
|
133 |
+
·列与列之间的皮尔逊相关系数
|
134 |
+
```
|
135 |
+
##### *iv.学习曲线图*
|
136 |
+
```angular2html
|
137 |
+
·需要选择已训练的模型名称
|
138 |
+
·训练集与验证集数据的拟合效果 (1个模型有2条曲线)
|
139 |
+
```
|
140 |
+
##### *v.数据拟合图*
|
141 |
+
```angular2html
|
142 |
+
·需要选择已训练的模型名称
|
143 |
+
·测试集的真实数值曲线与模型预测数值曲线
|
144 |
+
```
|
145 |
+
##### *vi.特征蜂群图*
|
146 |
+
```angular2html
|
147 |
+
·需要选择已训练的模型名称
|
148 |
+
·需要选择图像类型
|
149 |
+
·特征对模型整体能力的表征程度
|
150 |
+
```
|
151 |
+
##### *vii.特征瀑布图*
|
152 |
+
```angular2html
|
153 |
+
·需要选择已训练的模型名称
|
154 |
+
·需要选择相关特征的变量索引
|
155 |
+
·特征对模型整体能力的表征程度
|
156 |
+
```
|
157 |
+
##### *viii.特征力图*
|
158 |
+
```angular2html
|
159 |
+
·需要选择已训练的模型名称
|
160 |
+
·需要选择相关特征的变量索引
|
161 |
+
·特征对模型整体能力的表征程度
|
162 |
+
```
|
163 |
+
##### *viiii.特征依赖图*
|
164 |
+
```angular2html
|
165 |
+
·需要选择已训练的模型名称
|
166 |
+
·需要选择对应的列
|
167 |
+
·特征对模型整体能力的表征程度
|
168 |
+
```
|
169 |
+
##### *图例*
|
170 |
+
```angular2html
|
171 |
+
·图中每个图例的名称
|
172 |
+
|
173 |
+
·图中有图例才会显示 (不支持中文)
|
174 |
+
```
|
175 |
+
##### *坐标轴*
|
176 |
+
```angular2html
|
177 |
+
·标题
|
178 |
+
·x轴名称
|
179 |
+
·y轴名称
|
180 |
+
|
181 |
+
·不支持中文
|
182 |
+
```
|
183 |
+
##### *颜色*
|
184 |
+
```angular2html
|
185 |
+
·图中每个颜色对应的取色器和十六进制
|
186 |
+
|
187 |
+
·图中有可更改的颜色才会显示
|
188 |
+
```
|
189 |
+
##### *图*
|
190 |
+
```angular2html
|
191 |
+
·可随时将当前操作的图片下载到本地 (png格式)
|
192 |
+
```
|
193 |
+
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
|
198 |
+
|
design/__init__.py
ADDED
File without changes
|
design/custom.css
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.params_name textarea {
|
2 |
+
font-weight: bold;
|
3 |
+
font-style: oblique;
|
4 |
+
border: solid #ee9900;
|
5 |
+
}
|
design/welcome.js
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
function createGradioAnimation() {
|
2 |
+
var container = document.createElement('div');
|
3 |
+
container.id = 'gradio-animation';
|
4 |
+
container.style.fontSize = '2em';
|
5 |
+
container.style.fontWeight = 'bold';
|
6 |
+
container.style.textAlign = 'center';
|
7 |
+
container.style.marginBottom = '20px';
|
8 |
+
|
9 |
+
var text = 'Welcome to EasyMachineLearning!';
|
10 |
+
for (var i = 0; i < text.length; i++) {
|
11 |
+
(function(i){
|
12 |
+
setTimeout(function(){
|
13 |
+
var letter = document.createElement('span');
|
14 |
+
letter.style.opacity = '0';
|
15 |
+
letter.style.transition = 'opacity 0.5s';
|
16 |
+
letter.innerText = text[i];
|
17 |
+
|
18 |
+
container.appendChild(letter);
|
19 |
+
|
20 |
+
setTimeout(function() {
|
21 |
+
letter.style.opacity = '1';
|
22 |
+
}, 50);
|
23 |
+
}, i * 250);
|
24 |
+
})(i);
|
25 |
+
}
|
26 |
+
|
27 |
+
var gradioContainer = document.querySelector('.gradio-container');
|
28 |
+
gradioContainer.insertBefore(container, gradioContainer.firstChild);
|
29 |
+
|
30 |
+
return 'Animation created';
|
31 |
+
}
|
functions/__init__.py
ADDED
File without changes
|
functions/process.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def get_values_from_container_class(container):
|
2 |
+
return container.x_train, container.y_train, container.x_test, container.y_test, container.hyper_params_optimize
|
3 |
+
|
4 |
+
|
5 |
+
def transform_params_list(params_class, params_list, model=None):
|
6 |
+
input_params_keys = []
|
7 |
+
input_params_values = []
|
8 |
+
inner_value_list = []
|
9 |
+
|
10 |
+
keys = params_class.get_params(model).keys() if model else params_class.get_params().keys()
|
11 |
+
for i, param in enumerate(params_list):
|
12 |
+
if param in keys:
|
13 |
+
input_params_keys.append(param)
|
14 |
+
if i != 0:
|
15 |
+
input_params_values.append(inner_value_list)
|
16 |
+
inner_value_list = []
|
17 |
+
else:
|
18 |
+
inner_value_list.append(param)
|
19 |
+
else:
|
20 |
+
input_params_values.append(inner_value_list)
|
21 |
+
input_params = dict(zip(input_params_keys, input_params_values))
|
22 |
+
|
23 |
+
for k, v in input_params.items():
|
24 |
+
if k in keys:
|
25 |
+
value_type = params_class.get_params_type(model)[k] if model else params_class.get_params_type()[k]
|
26 |
+
try:
|
27 |
+
if value_type == "int":
|
28 |
+
input_params[k] = [int(x) for x in input_params[k]]
|
29 |
+
elif value_type == "float":
|
30 |
+
input_params[k] = [float(x) for x in input_params[k]]
|
31 |
+
elif value_type == "bool":
|
32 |
+
input_params[k] = [x == "True" for x in input_params[k]]
|
33 |
+
elif value_type == "str":
|
34 |
+
input_params[k] = [str(x) for x in input_params[k]]
|
35 |
+
except Exception:
|
36 |
+
input_params[k] = [str(x) for x in input_params[k]]
|
37 |
+
|
38 |
+
return input_params
|
visualization/draw_boxplot.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
from static.config import Config
|
5 |
|
6 |
|
7 |
def draw_boxplot(x_data, paint_object, will_rotate=False):
|
|
|
1 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
def draw_boxplot(x_data, paint_object, will_rotate=False):
|
visualization/draw_data_fit_total.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
-
from static.new_class import PaintObject
|
5 |
-
from static.config import Config
|
6 |
|
7 |
-
|
8 |
-
def draw_data_fit_total(input_dict, paint_object: PaintObject):
|
9 |
plt.figure(figsize=(10, 6), dpi=300)
|
10 |
|
11 |
for i, input_dict_items in enumerate(input_dict.items()):
|
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
|
|
|
|
4 |
|
5 |
+
def draw_data_fit_total(input_dict, paint_object):
|
|
|
6 |
plt.figure(figsize=(10, 6), dpi=300)
|
7 |
|
8 |
for i, input_dict_items in enumerate(input_dict.items()):
|
visualization/draw_heat_map.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
|
|
3 |
import pandas as pd
|
4 |
|
5 |
-
from static.config import Config
|
6 |
-
|
7 |
|
8 |
def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
|
9 |
plt.rcParams.update({'figure.autolayout': True})
|
@@ -28,8 +26,8 @@ def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
|
|
28 |
|
29 |
plt.yticks(np.arange(len(col_list)), col_list)
|
30 |
plt.imshow(np_data)
|
31 |
-
plt.colorbar(
|
32 |
-
plt.tight_layout()
|
33 |
|
34 |
plt.title(paint_object.get_name())
|
35 |
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
import pandas as pd
|
4 |
|
|
|
|
|
5 |
|
6 |
def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
|
7 |
plt.rcParams.update({'figure.autolayout': True})
|
|
|
26 |
|
27 |
plt.yticks(np.arange(len(col_list)), col_list)
|
28 |
plt.imshow(np_data)
|
29 |
+
plt.colorbar()
|
30 |
+
# plt.tight_layout()
|
31 |
|
32 |
plt.title(paint_object.get_name())
|
33 |
|
visualization/draw_histogram.py
CHANGED
@@ -1,10 +1,5 @@
|
|
1 |
-
import random
|
2 |
-
|
3 |
-
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
-
|
6 |
-
from static.config import Config
|
7 |
-
from static.new_class import PaintObject
|
8 |
|
9 |
|
10 |
def draw_histogram(nums, labels, paint_object, will_rotate=False, will_show_text=True):
|
|
|
|
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
|
|
|
|
3 |
|
4 |
|
5 |
def draw_histogram(nums, labels, paint_object, will_rotate=False, will_show_text=True):
|
visualization/draw_histogram_line_subgraph.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
-
from
|
5 |
|
6 |
|
7 |
def draw_histogram_line_subgraph(total_data_for_plot):
|
@@ -22,7 +22,7 @@ def draw_histogram_line_subgraph(total_data_for_plot):
|
|
22 |
data[1],
|
23 |
data[2],
|
24 |
"-o",
|
25 |
-
color=
|
26 |
markersize=4
|
27 |
)
|
28 |
ax[str(chr(i+65))].set_title(data[3])
|
@@ -33,7 +33,7 @@ def draw_histogram_line_subgraph(total_data_for_plot):
|
|
33 |
data[1],
|
34 |
align="center",
|
35 |
alpha=1,
|
36 |
-
color=
|
37 |
tick_label=data[2]
|
38 |
)
|
39 |
|
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
+
from classes.static_custom_class import *
|
5 |
|
6 |
|
7 |
def draw_histogram_line_subgraph(total_data_for_plot):
|
|
|
22 |
data[1],
|
23 |
data[2],
|
24 |
"-o",
|
25 |
+
color=StaticValue.COLORS[0],
|
26 |
markersize=4
|
27 |
)
|
28 |
ax[str(chr(i+65))].set_title(data[3])
|
|
|
33 |
data[1],
|
34 |
align="center",
|
35 |
alpha=1,
|
36 |
+
color=StaticValue.COLORS,
|
37 |
tick_label=data[2]
|
38 |
)
|
39 |
|
visualization/draw_learning_curve.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
-
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
-
from
|
5 |
|
6 |
|
7 |
def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
|
@@ -12,13 +11,13 @@ def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_s
|
|
12 |
train_scores_mean - train_scores_std,
|
13 |
train_scores_mean + train_scores_std,
|
14 |
alpha=0.1,
|
15 |
-
color=
|
16 |
)
|
17 |
plt.plot(
|
18 |
train_sizes,
|
19 |
train_scores_mean,
|
20 |
"o-",
|
21 |
-
color=
|
22 |
label="Training score"
|
23 |
)
|
24 |
|
@@ -27,13 +26,13 @@ def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_s
|
|
27 |
test_scores_mean - test_scores_std,
|
28 |
test_scores_mean + test_scores_std,
|
29 |
alpha=0.1,
|
30 |
-
color=
|
31 |
)
|
32 |
plt.plot(
|
33 |
train_sizes,
|
34 |
test_scores_mean,
|
35 |
"o-",
|
36 |
-
color=
|
37 |
label="Cross-validation score"
|
38 |
)
|
39 |
|
|
|
|
|
1 |
from matplotlib import pyplot as plt
|
2 |
|
3 |
+
from classes.static_custom_class import *
|
4 |
|
5 |
|
6 |
def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
|
|
|
11 |
train_scores_mean - train_scores_std,
|
12 |
train_scores_mean + train_scores_std,
|
13 |
alpha=0.1,
|
14 |
+
color=StaticValue.COLORS[0]
|
15 |
)
|
16 |
plt.plot(
|
17 |
train_sizes,
|
18 |
train_scores_mean,
|
19 |
"o-",
|
20 |
+
color=StaticValue.COLORS[0],
|
21 |
label="Training score"
|
22 |
)
|
23 |
|
|
|
26 |
test_scores_mean - test_scores_std,
|
27 |
test_scores_mean + test_scores_std,
|
28 |
alpha=0.1,
|
29 |
+
color=StaticValue.COLORS[1]
|
30 |
)
|
31 |
plt.plot(
|
32 |
train_sizes,
|
33 |
test_scores_mean,
|
34 |
"o-",
|
35 |
+
color=StaticValue.COLORS[1],
|
36 |
label="Cross-validation score"
|
37 |
)
|
38 |
|
visualization/draw_learning_curve_total.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
from matplotlib import pyplot as plt
|
2 |
|
3 |
-
from static.new_class import PaintObject
|
4 |
|
5 |
-
|
6 |
-
def draw_learning_curve_total(input_dict, paint_object: PaintObject):
|
7 |
plt.figure(figsize=(10, 8), dpi=300)
|
8 |
|
9 |
for i, values in enumerate(input_dict.values()):
|
|
|
1 |
from matplotlib import pyplot as plt
|
2 |
|
|
|
3 |
|
4 |
+
def draw_learning_curve_total(input_dict, paint_object):
|
|
|
5 |
plt.figure(figsize=(10, 8), dpi=300)
|
6 |
|
7 |
for i, values in enumerate(input_dict.values()):
|
visualization/draw_line_graph.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
|
4 |
-
from static.config import Config
|
5 |
-
|
6 |
|
7 |
def draw_line_graph(nums, labels, paint_object):
|
8 |
plt.figure(figsize=(10, 8), dpi=300)
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
|
|
|
|
|
3 |
|
4 |
def draw_line_graph(nums, labels, paint_object):
|
5 |
plt.figure(figsize=(10, 8), dpi=300)
|
visualization/draw_momentum.py
CHANGED
@@ -1,9 +1,4 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
-
from sklearn.metrics import *
|
4 |
-
from sklearn.preprocessing import label_binarize
|
5 |
-
|
6 |
-
from coding.llh.static.config import Config
|
7 |
|
8 |
|
9 |
def draw_momentum(df, p1_name, p2_name):
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
def draw_momentum(df, p1_name, p2_name):
|
visualization/draw_parallel_coordinates.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
import pandas as pd
|
2 |
import matplotlib.pyplot as plt
|
3 |
-
|
4 |
-
from coding.llh.static.config import Config
|
5 |
|
6 |
|
7 |
def draw_parallel_coordinates(df):
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import pandas as pd
|
|
|
3 |
|
4 |
|
5 |
def draw_parallel_coordinates(df):
|
visualization/draw_play_flow.py
CHANGED
@@ -1,9 +1,4 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
-
from sklearn.metrics import *
|
4 |
-
from sklearn.preprocessing import label_binarize
|
5 |
-
|
6 |
-
from coding.llh.static.config import Config
|
7 |
|
8 |
|
9 |
def draw_play_flow(df, p1_name, p2_name, p1_ace, p2_ace, p1_net_pt_won, p2_net_pt_won, p1_break_pt_won, p2_break_pt_won):
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
def draw_play_flow(df, p1_name, p2_name, p1_ace, p2_ace, p1_net_pt_won, p2_net_pt_won, p1_break_pt_won, p2_break_pt_won):
|
visualization/draw_pred_total.py
CHANGED
@@ -1,8 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
-
from coding.llh.static.config import Config
|
5 |
-
|
6 |
|
7 |
def draw_pred_total(input_dict):
|
8 |
plt.figure(figsize=(10, 6))
|
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
|
|
|
|
4 |
|
5 |
def draw_pred_total(input_dict):
|
6 |
plt.figure(figsize=(10, 6))
|
visualization/draw_roc_auc_curve_total.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
from sklearn.metrics import *
|
4 |
-
from sklearn.preprocessing import label_binarize
|
5 |
|
6 |
-
from
|
7 |
|
8 |
|
9 |
def draw_roc_auc_curve_total(input_dict, type):
|
@@ -20,7 +18,7 @@ def draw_roc_auc_curve_total(input_dict, type):
|
|
20 |
fpr,
|
21 |
tpr,
|
22 |
"o-",
|
23 |
-
color=
|
24 |
label=label_name+str(round(auc(fpr, tpr), 2))
|
25 |
)
|
26 |
|
@@ -40,7 +38,7 @@ def draw_roc_auc_curve_total(input_dict, type):
|
|
40 |
fpr,
|
41 |
tpr,
|
42 |
"o-",
|
43 |
-
color=
|
44 |
label=label_name + str(round(auc(fpr, tpr), 2))
|
45 |
)
|
46 |
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
from sklearn.metrics import *
|
|
|
3 |
|
4 |
+
from classes.static_custom_class import *
|
5 |
|
6 |
|
7 |
def draw_roc_auc_curve_total(input_dict, type):
|
|
|
18 |
fpr,
|
19 |
tpr,
|
20 |
"o-",
|
21 |
+
color=StaticValue.COLORS[i],
|
22 |
label=label_name+str(round(auc(fpr, tpr), 2))
|
23 |
)
|
24 |
|
|
|
38 |
fpr,
|
39 |
tpr,
|
40 |
"o-",
|
41 |
+
color=StaticValue.COLORS[i],
|
42 |
label=label_name + str(round(auc(fpr, tpr), 2))
|
43 |
)
|
44 |
|
visualization/draw_scatter.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
|
|
3 |
from mpl_toolkits.mplot3d import Axes3D
|
4 |
|
5 |
-
from coding.llh.static.config import Config
|
6 |
-
|
7 |
|
8 |
# Draw scatter
|
9 |
def draw_scatter_2D(x_data, y_data, centers, title):
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
from mpl_toolkits.mplot3d import Axes3D
|
4 |
|
|
|
|
|
5 |
|
6 |
# Draw scatter
|
7 |
def draw_scatter_2D(x_data, y_data, centers, title):
|
visualization/draw_scatter_line_graph.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
|
|
3 |
|
4 |
-
from
|
5 |
|
6 |
|
7 |
# draw scatter line graph
|
@@ -15,9 +15,9 @@ def draw_scatter_line_graph(x_data, y_pred_data, y_real_data, coef, intercept, l
|
|
15 |
fig, ax = plt.subplot_mosaic(layout, figsize=(16, 16))
|
16 |
|
17 |
for i in range(np.size(x_data, 1)):
|
18 |
-
ax[str(chr(i+65))].scatter(x_data[:, i], y_pred_data.T, color=
|
19 |
-
ax[str(chr(i+65))].scatter(x_data[:, i], y_real_data, color=
|
20 |
-
ax[str(chr(i+65))].plot(x_data[:, i], x_data[:, i] * coef[i] + intercept, color=
|
21 |
ax[str(chr(i + 65))].legend()
|
22 |
|
23 |
plt.suptitle(title)
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
|
4 |
+
from classes.static_custom_class import *
|
5 |
|
6 |
|
7 |
# draw scatter line graph
|
|
|
15 |
fig, ax = plt.subplot_mosaic(layout, figsize=(16, 16))
|
16 |
|
17 |
for i in range(np.size(x_data, 1)):
|
18 |
+
ax[str(chr(i+65))].scatter(x_data[:, i], y_pred_data.T, color=StaticValue.COLORS[0], s=4, label=labels[0])
|
19 |
+
ax[str(chr(i+65))].scatter(x_data[:, i], y_real_data, color=StaticValue.COLORS[1], s=4, label=labels[1])
|
20 |
+
ax[str(chr(i+65))].plot(x_data[:, i], x_data[:, i] * coef[i] + intercept, color=StaticValue.COLORS[2], markersize=4)
|
21 |
ax[str(chr(i + 65))].legend()
|
22 |
|
23 |
plt.suptitle(title)
|
visualization/draw_swings_and_positives.py
CHANGED
@@ -1,9 +1,4 @@
|
|
1 |
-
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
-
from sklearn.metrics import *
|
4 |
-
from sklearn.preprocessing import label_binarize
|
5 |
-
|
6 |
-
from coding.llh.static.config import Config
|
7 |
|
8 |
|
9 |
def draw_swings_and_positives(df, p1_name, p2_name):
|
|
|
|
|
1 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
2 |
|
3 |
|
4 |
def draw_swings_and_positives(df, p1_name, p2_name):
|