Spaces:
Sleeping
Sleeping
LLH
commited on
Commit
·
10c7c36
1
Parent(s):
11b81b9
2024/02/20/14:15
Browse files- analysis/bayes_model.py +70 -16
- analysis/descriptive_analysis.py +1 -2
- analysis/distance_model.py +115 -0
- analysis/gradient_model.py +45 -52
- analysis/kernel_model.py +84 -62
- analysis/linear_model.py +79 -56
- analysis/others/__init__.py +0 -0
- analysis/others/evaluation_model.py +99 -0
- analysis/others/gaussian_model.py +28 -0
- analysis/others/markov_model.py +98 -0
- analysis/others/poly_model.py +12 -0
- analysis/shap_model.py +41 -4
- analysis/tree_model.py +208 -126
- app.py +903 -151
- data/__init__.py +0 -0
- data/fetch_california_housing.csv +0 -0
- data/notes.md +12 -0
- metrics/calculate_classification_metrics.py +14 -11
- metrics/calculate_regression_metrics.py +11 -27
- requirements.txt +5 -3
- static/config.py +109 -27
- static/new_class.py +195 -0
- static/process.py +26 -16
- visualization/draw_boxplot.py +18 -11
- visualization/draw_data_fit_total.py +48 -0
- visualization/draw_heat_map.py +16 -14
- visualization/draw_histogram.py +19 -14
- visualization/draw_histogram_line_subgraph.py +1 -1
- visualization/draw_learning_curve_total.py +45 -59
- visualization/draw_line_graph.py +10 -23
- visualization/draw_pred_total.py +10 -12
- visualization/draw_scatter_line_graph.py +1 -1
analysis/bayes_model.py
CHANGED
@@ -1,28 +1,82 @@
|
|
|
|
1 |
from sklearn.naive_bayes import *
|
|
|
2 |
|
3 |
-
from
|
4 |
-
from
|
5 |
-
from
|
6 |
-
from
|
|
|
|
|
7 |
|
8 |
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
info = {}
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
19 |
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
-
return
|
28 |
|
|
|
1 |
+
from sklearn.model_selection import learning_curve
|
2 |
from sklearn.naive_bayes import *
|
3 |
+
import numpy as np
|
4 |
|
5 |
+
from static.new_class import Container
|
6 |
+
from static.process import grid_search, bayes_search
|
7 |
+
from visualization.draw_line_graph import draw_line_graph
|
8 |
+
from visualization.draw_scatter_line_graph import draw_scatter_line_graph
|
9 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
10 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
11 |
|
12 |
|
13 |
+
class NaiveBayesClassifierParams:
|
14 |
+
@classmethod
|
15 |
+
def get_params(cls, sort):
|
16 |
+
if sort == "MultinomialNB":
|
17 |
+
return {
|
18 |
+
"alpha": [0.1, 0.5, 1.0, 2.0]
|
19 |
+
}
|
20 |
+
elif sort == "GaussianNB":
|
21 |
+
return {}
|
22 |
+
elif sort == "ComplementNB":
|
23 |
+
return {
|
24 |
+
"alpha": [0.1, 0.5, 1, 10],
|
25 |
+
"fit_prior": [True, False],
|
26 |
+
"norm": [True, False]
|
27 |
+
}
|
28 |
+
|
29 |
+
|
30 |
+
# 朴素贝叶斯分类
|
31 |
+
def naive_bayes_classification(container: Container, model=None):
|
32 |
+
x_train = container.x_train
|
33 |
+
y_train = container.y_train
|
34 |
+
x_test = container.x_test
|
35 |
+
y_test = container.y_test
|
36 |
+
hyper_params_optimize = container.hyper_params_optimize
|
37 |
info = {}
|
38 |
|
39 |
+
if model == "MultinomialNB":
|
40 |
+
naive_bayes_model = MultinomialNB()
|
41 |
+
params = NaiveBayesClassifierParams.get_params(model)
|
42 |
+
elif model == "GaussianNB":
|
43 |
+
naive_bayes_model = GaussianNB()
|
44 |
+
params = NaiveBayesClassifierParams.get_params(model)
|
45 |
+
elif model == "ComplementNB":
|
46 |
+
naive_bayes_model = ComplementNB()
|
47 |
+
params = NaiveBayesClassifierParams.get_params(model)
|
48 |
+
else:
|
49 |
+
naive_bayes_model = GaussianNB()
|
50 |
+
params = NaiveBayesClassifierParams.get_params(model)
|
51 |
+
|
52 |
+
if hyper_params_optimize == "grid_search":
|
53 |
+
best_model = grid_search(params, naive_bayes_model, x_train, y_train)
|
54 |
+
elif hyper_params_optimize == "bayes_search":
|
55 |
+
best_model = bayes_search(params, naive_bayes_model, x_train, y_train)
|
56 |
+
else:
|
57 |
+
best_model = naive_bayes_model
|
58 |
+
best_model.fit(x_train, y_train)
|
59 |
+
|
60 |
+
info["参数"] = best_model.get_params()
|
61 |
+
|
62 |
+
y_pred = best_model.predict(x_test)
|
63 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
64 |
+
container.set_y_pred(y_pred)
|
65 |
|
66 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
67 |
|
68 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
69 |
+
train_scores_std = np.std(train_scores, axis=1)
|
70 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
71 |
+
test_scores_std = np.std(test_scores, axis=1)
|
72 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
73 |
+
test_scores_std)
|
74 |
|
75 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
76 |
|
77 |
+
container.set_info(info)
|
78 |
+
container.set_status("trained")
|
79 |
+
container.set_model(best_model)
|
80 |
|
81 |
+
return container
|
82 |
|
analysis/descriptive_analysis.py
CHANGED
@@ -236,8 +236,7 @@ def get_descriptive_indicators_related(df):
|
|
236 |
descriptive_indicators_df["Upper Quartile"][col]
|
237 |
descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
|
238 |
descriptive_indicators_df["Skewness"][col] = df[col].skew()
|
239 |
-
descriptive_indicators_df["Coefficient of Variation"][col] = descriptive_indicators_df["Standard Deviation"][
|
240 |
-
col] \
|
241 |
/ descriptive_indicators_df["Avg"][col]
|
242 |
|
243 |
# draw_heat_map(descriptive_indicators_df.to_numpy(), "descriptive indicators", True)
|
|
|
236 |
descriptive_indicators_df["Upper Quartile"][col]
|
237 |
descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
|
238 |
descriptive_indicators_df["Skewness"][col] = df[col].skew()
|
239 |
+
descriptive_indicators_df["Coefficient of Variation"][col] = descriptive_indicators_df["Standard Deviation"][col] \
|
|
|
240 |
/ descriptive_indicators_df["Avg"][col]
|
241 |
|
242 |
# draw_heat_map(descriptive_indicators_df.to_numpy(), "descriptive indicators", True)
|
analysis/distance_model.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.model_selection import learning_curve
|
2 |
+
|
3 |
+
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
|
4 |
+
from analysis.shap_model import *
|
5 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
6 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
7 |
+
from static.new_class import *
|
8 |
+
from static.process import grid_search, bayes_search
|
9 |
+
|
10 |
+
|
11 |
+
class KNNClassifierParams:
|
12 |
+
@classmethod
|
13 |
+
def get_params(cls):
|
14 |
+
return {
|
15 |
+
"n_neighbors": [3, 5, 7, 9],
|
16 |
+
"weights": ['uniform', 'distance'],
|
17 |
+
"p": [1, 2]
|
18 |
+
}
|
19 |
+
|
20 |
+
|
21 |
+
# KNN分类
|
22 |
+
def knn_classifier(container: Container):
|
23 |
+
x_train = container.x_train
|
24 |
+
y_train = container.y_train
|
25 |
+
x_test = container.x_test
|
26 |
+
y_test = container.y_test
|
27 |
+
hyper_params_optimize = container.hyper_params_optimize
|
28 |
+
info = {}
|
29 |
+
|
30 |
+
knn_classifier_model = KNeighborsClassifier()
|
31 |
+
params = KNNClassifierParams.get_params()
|
32 |
+
|
33 |
+
if hyper_params_optimize == "grid_search":
|
34 |
+
best_model = grid_search(params, knn_classifier_model, x_train, y_train)
|
35 |
+
elif hyper_params_optimize == "bayes_search":
|
36 |
+
best_model = bayes_search(params, knn_classifier_model, x_train, y_train)
|
37 |
+
else:
|
38 |
+
best_model = knn_classifier_model
|
39 |
+
best_model.fit(x_train, y_train)
|
40 |
+
|
41 |
+
info["参数"] = best_model.get_params()
|
42 |
+
|
43 |
+
y_pred = best_model.predict(x_test)
|
44 |
+
container.set_y_pred(y_pred)
|
45 |
+
|
46 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
47 |
+
|
48 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
49 |
+
train_scores_std = np.std(train_scores, axis=1)
|
50 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
51 |
+
test_scores_std = np.std(test_scores, axis=1)
|
52 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
53 |
+
test_scores_std)
|
54 |
+
|
55 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
56 |
+
|
57 |
+
container.set_info(info)
|
58 |
+
container.set_status("trained")
|
59 |
+
container.set_model(best_model)
|
60 |
+
|
61 |
+
return container
|
62 |
+
|
63 |
+
|
64 |
+
class KNNRegressionParams:
|
65 |
+
@classmethod
|
66 |
+
def get_params(cls):
|
67 |
+
return {
|
68 |
+
"n_neighbors": [3, 5, 7, 9],
|
69 |
+
"weights": ['uniform', 'distance'],
|
70 |
+
"p": [1, 2]
|
71 |
+
}
|
72 |
+
|
73 |
+
|
74 |
+
# KNN回归
|
75 |
+
def knn_regression(container: Container):
|
76 |
+
x_train = container.x_train
|
77 |
+
y_train = container.y_train
|
78 |
+
x_test = container.x_test
|
79 |
+
y_test = container.y_test
|
80 |
+
hyper_params_optimize = container.hyper_params_optimize
|
81 |
+
info = {}
|
82 |
+
|
83 |
+
knn_regression_model = KNeighborsRegressor()
|
84 |
+
params = KNNRegressionParams.get_params()
|
85 |
+
|
86 |
+
if hyper_params_optimize == "grid_search":
|
87 |
+
best_model = grid_search(params, knn_regression_model, x_train, y_train)
|
88 |
+
elif hyper_params_optimize == "bayes_search":
|
89 |
+
best_model = bayes_search(params, knn_regression_model, x_train, y_train)
|
90 |
+
else:
|
91 |
+
best_model = knn_regression_model
|
92 |
+
best_model.fit(x_train, y_train)
|
93 |
+
|
94 |
+
info["参数"] = best_model.get_params()
|
95 |
+
|
96 |
+
y_pred = best_model.predict(x_test)
|
97 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
98 |
+
container.set_y_pred(y_pred)
|
99 |
+
|
100 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
101 |
+
|
102 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
103 |
+
train_scores_std = np.std(train_scores, axis=1)
|
104 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
105 |
+
test_scores_std = np.std(test_scores, axis=1)
|
106 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
107 |
+
test_scores_std)
|
108 |
+
|
109 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
110 |
+
|
111 |
+
container.set_info(info)
|
112 |
+
container.set_status("trained")
|
113 |
+
container.set_model(best_model)
|
114 |
+
|
115 |
+
return container
|
analysis/gradient_model.py
CHANGED
@@ -1,72 +1,65 @@
|
|
|
|
1 |
from sklearn.ensemble import GradientBoostingRegressor
|
2 |
-
from sklearn.tree import DecisionTreeClassifier
|
3 |
-
from sklearn.ensemble import RandomForestClassifier
|
4 |
-
from xgboost import XGBClassifier
|
5 |
from sklearn.model_selection import learning_curve
|
6 |
-
import numpy as np
|
7 |
-
|
8 |
-
from analysis.shap_model import shap_calculate
|
9 |
-
from coding.llh.static.config import Config
|
10 |
-
from coding.llh.static.process import grid_search, bayes_search
|
11 |
-
from coding.llh.visualization.draw_learning_curve import draw_learning_curve
|
12 |
-
from coding.llh.visualization.draw_line_graph import draw_line_graph
|
13 |
-
from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
|
14 |
-
from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
|
15 |
-
from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
|
16 |
-
from sklearn.ensemble import RandomForestRegressor
|
17 |
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
info = {}
|
21 |
-
model_name = "Double Exponential Smoothing Plus"
|
22 |
|
23 |
-
|
24 |
-
params =
|
25 |
-
'n_estimators': [50, 100, 150],
|
26 |
-
'learning_rate': [0.01, 0.1, 0.2],
|
27 |
-
'max_depth': [3, 5, 7],
|
28 |
-
'min_samples_split': [2, 5, 10],
|
29 |
-
'min_samples_leaf': [1, 2, 4]
|
30 |
-
}
|
31 |
|
32 |
if hyper_params_optimize == "grid_search":
|
33 |
-
best_model = grid_search(params,
|
34 |
elif hyper_params_optimize == "bayes_search":
|
35 |
-
best_model = bayes_search(params,
|
36 |
else:
|
37 |
-
best_model =
|
38 |
-
best_model.fit(
|
39 |
-
|
40 |
-
info["{} Params".format(model_name)] = best_model.get_params()
|
41 |
|
42 |
-
|
43 |
|
44 |
-
|
|
|
|
|
45 |
|
46 |
-
train_sizes, train_scores, test_scores = learning_curve(best_model,
|
47 |
|
48 |
train_scores_mean = np.mean(train_scores, axis=1)
|
49 |
train_scores_std = np.std(train_scores, axis=1)
|
50 |
test_scores_mean = np.mean(test_scores, axis=1)
|
51 |
test_scores_std = np.std(test_scores, axis=1)
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
train_scores_mean[0] = 0.984
|
55 |
-
test_scores_mean[1] = 0.89
|
56 |
-
test_scores_mean[2] = 0.93
|
57 |
-
test_scores_mean[3] = 0.97
|
58 |
-
test_scores_mean[4] = 0.98
|
59 |
-
|
60 |
-
|
61 |
-
# draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
|
62 |
-
|
63 |
-
# draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "logistic regression model residual plot")
|
64 |
-
|
65 |
-
info.update(calculate_regression_metrics(y_pred, y_test, model_name))
|
66 |
-
# info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
|
67 |
-
# mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
|
68 |
|
69 |
-
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
return y_pred, info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std
|
|
|
1 |
+
import numpy as np
|
2 |
from sklearn.ensemble import GradientBoostingRegressor
|
|
|
|
|
|
|
3 |
from sklearn.model_selection import learning_curve
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
+
from analysis.shap_model import draw_shap_beeswarm
|
6 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
7 |
+
from static.config import Config
|
8 |
+
from static.new_class import Container
|
9 |
+
from static.process import grid_search, bayes_search
|
10 |
+
|
11 |
+
|
12 |
+
class GradientBoostingParams:
|
13 |
+
@classmethod
|
14 |
+
def get_params(cls):
|
15 |
+
return {
|
16 |
+
'n_estimators': [50, 100, 150],
|
17 |
+
'learning_rate': [0.01, 0.1, 0.2],
|
18 |
+
'max_depth': [3, 5, 7],
|
19 |
+
'min_samples_split': [2, 5, 10],
|
20 |
+
'min_samples_leaf': [1, 2, 4]
|
21 |
+
}
|
22 |
+
|
23 |
+
|
24 |
+
# 梯度提升回归
|
25 |
+
def gradient_boosting_regression(container: Container):
|
26 |
+
x_train = container.x_train
|
27 |
+
y_train = container.y_train
|
28 |
+
x_test = container.x_test
|
29 |
+
y_test = container.y_test
|
30 |
+
hyper_params_optimize = container.hyper_params_optimize
|
31 |
info = {}
|
|
|
32 |
|
33 |
+
gradient_boosting_regression_model = GradientBoostingRegressor(random_state=Config.RANDOM_STATE)
|
34 |
+
params = GradientBoostingParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
if hyper_params_optimize == "grid_search":
|
37 |
+
best_model = grid_search(params, gradient_boosting_regression_model, x_train, y_train)
|
38 |
elif hyper_params_optimize == "bayes_search":
|
39 |
+
best_model = bayes_search(params, gradient_boosting_regression_model, x_train, y_train)
|
40 |
else:
|
41 |
+
best_model = gradient_boosting_regression_model
|
42 |
+
best_model.fit(x_train, y_train)
|
|
|
|
|
43 |
|
44 |
+
info["参数"] = best_model.get_params()
|
45 |
|
46 |
+
y_pred = best_model.predict(x_test)
|
47 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
48 |
+
container.set_y_pred(y_pred)
|
49 |
|
50 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
51 |
|
52 |
train_scores_mean = np.mean(train_scores, axis=1)
|
53 |
train_scores_std = np.std(train_scores, axis=1)
|
54 |
test_scores_mean = np.mean(test_scores, axis=1)
|
55 |
test_scores_std = np.std(test_scores, axis=1)
|
56 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
57 |
+
test_scores_std)
|
58 |
|
59 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
container.set_info(info)
|
62 |
+
container.set_status("trained")
|
63 |
+
container.set_model(best_model)
|
64 |
|
65 |
+
return container
|
|
analysis/kernel_model.py
CHANGED
@@ -1,97 +1,119 @@
|
|
|
|
1 |
from sklearn.model_selection import learning_curve
|
2 |
from sklearn.svm import SVC
|
3 |
from sklearn.svm import SVR
|
4 |
-
import numpy as np
|
5 |
-
|
6 |
-
from coding.llh.analysis.my_learning_curve import my_learning_curve
|
7 |
-
from coding.llh.analysis.shap_model import shap_calculate
|
8 |
-
from coding.llh.static.process import grid_search, bayes_search
|
9 |
-
from coding.llh.visualization.draw_line_graph import draw_line_graph
|
10 |
-
from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
|
11 |
-
from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
|
12 |
-
from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
|
13 |
-
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
info = {}
|
17 |
-
model_name = "Support Vector Regression"
|
18 |
|
19 |
-
|
20 |
-
params =
|
21 |
-
'kernel': ['linear', 'rbf'],
|
22 |
-
'C': [0.1, 1, 10, 100],
|
23 |
-
'gamma': [0.01, 0.1, 1, 10],
|
24 |
-
'epsilon': [0.01, 0.1, 1]
|
25 |
-
}
|
26 |
|
27 |
if hyper_params_optimize == "grid_search":
|
28 |
-
best_model = grid_search(params,
|
29 |
elif hyper_params_optimize == "bayes_search":
|
30 |
-
best_model = bayes_search(params,
|
31 |
else:
|
32 |
-
best_model =
|
33 |
-
best_model.fit(
|
34 |
|
35 |
-
info["
|
36 |
|
37 |
-
y_pred = best_model.predict(x_test)
|
|
|
|
|
38 |
|
39 |
-
|
40 |
-
|
41 |
-
# train_sizes, train_scores, test_scores = my_learning_curve(best_model, x[:300], y[:300], cv=5)
|
42 |
-
train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="r2")
|
43 |
|
44 |
train_scores_mean = np.mean(train_scores, axis=1)
|
45 |
train_scores_std = np.std(train_scores, axis=1)
|
46 |
test_scores_mean = np.mean(test_scores, axis=1)
|
47 |
test_scores_std = np.std(test_scores, axis=1)
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
train_scores_mean[0] = 0.99
|
51 |
-
test_scores_mean[0] = 0.02
|
52 |
-
|
53 |
-
# draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
|
54 |
|
55 |
-
|
|
|
|
|
56 |
|
57 |
-
|
58 |
-
# info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
|
59 |
-
# mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
|
60 |
|
61 |
-
# shap_calculate(best_model, x_test, feature_names)
|
62 |
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
|
66 |
-
#
|
67 |
-
def
|
|
|
|
|
|
|
|
|
|
|
68 |
info = {}
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
#
|
73 |
-
# # Polynomial kernel SVM
|
74 |
-
# svm_classification_model = SVC(kernel="poly")
|
75 |
-
#
|
76 |
-
# Radial base kernel SVM
|
77 |
-
svm_classification_model = SVC(kernel="rbf")
|
78 |
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
|
|
|
86 |
|
87 |
-
|
88 |
-
info["Coefficients of linear regression equation"] = lr_coef
|
89 |
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
-
|
93 |
|
94 |
-
|
95 |
-
|
|
|
96 |
|
97 |
-
return
|
|
|
1 |
+
import numpy as np
|
2 |
from sklearn.model_selection import learning_curve
|
3 |
from sklearn.svm import SVC
|
4 |
from sklearn.svm import SVR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
7 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
8 |
+
from static.config import Config
|
9 |
+
from static.new_class import Container
|
10 |
+
from static.process import grid_search, bayes_search
|
11 |
+
|
12 |
+
|
13 |
+
class SVMRegressionParams:
|
14 |
+
@classmethod
|
15 |
+
def get_params(cls):
|
16 |
+
return {
|
17 |
+
'kernel': ['linear', 'rbf'],
|
18 |
+
'C': [0.1, 1, 10, 100],
|
19 |
+
'gamma': [0.01, 0.1, 1, 10],
|
20 |
+
'epsilon': [0.01, 0.1, 1]
|
21 |
+
}
|
22 |
+
|
23 |
+
|
24 |
+
# 支持向量机回归
|
25 |
+
def svm_regression(container: Container):
|
26 |
+
x_train = container.x_train
|
27 |
+
y_train = container.y_train
|
28 |
+
x_test = container.x_test
|
29 |
+
y_test = container.y_test
|
30 |
+
hyper_params_optimize = container.hyper_params_optimize
|
31 |
info = {}
|
|
|
32 |
|
33 |
+
svm_regression_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
|
34 |
+
params = SVMRegressionParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
if hyper_params_optimize == "grid_search":
|
37 |
+
best_model = grid_search(params, svm_regression_model, x_train, y_train)
|
38 |
elif hyper_params_optimize == "bayes_search":
|
39 |
+
best_model = bayes_search(params, svm_regression_model, x_train, y_train)
|
40 |
else:
|
41 |
+
best_model = svm_regression_model
|
42 |
+
best_model.fit(x_train, y_train)
|
43 |
|
44 |
+
info["参数"] = best_model.get_params()
|
45 |
|
46 |
+
y_pred = best_model.predict(x_test)
|
47 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
48 |
+
container.set_y_pred(y_pred)
|
49 |
|
50 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
|
|
|
|
|
|
51 |
|
52 |
train_scores_mean = np.mean(train_scores, axis=1)
|
53 |
train_scores_std = np.std(train_scores, axis=1)
|
54 |
test_scores_mean = np.mean(test_scores, axis=1)
|
55 |
test_scores_std = np.std(test_scores, axis=1)
|
56 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
57 |
+
test_scores_std)
|
58 |
|
59 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
|
|
|
|
|
|
|
|
60 |
|
61 |
+
container.set_info(info)
|
62 |
+
container.set_status("trained")
|
63 |
+
container.set_model(best_model)
|
64 |
|
65 |
+
return container
|
|
|
|
|
66 |
|
|
|
67 |
|
68 |
+
class SVMClassifierParams:
|
69 |
+
@classmethod
|
70 |
+
def get_params(cls):
|
71 |
+
return {
|
72 |
+
"C": [0.1, 1, 10, 100],
|
73 |
+
"kernel": ['linear', 'rbf', 'poly'],
|
74 |
+
"gamma": [0.1, 1, 10]
|
75 |
+
}
|
76 |
|
77 |
|
78 |
+
# 支持向量机分类
|
79 |
+
def svm_classifier(container: Container):
|
80 |
+
x_train = container.x_train
|
81 |
+
y_train = container.y_train
|
82 |
+
x_test = container.x_test
|
83 |
+
y_test = container.y_test
|
84 |
+
hyper_params_optimize = container.hyper_params_optimize
|
85 |
info = {}
|
86 |
|
87 |
+
svm_classifier_model = SVC(kernel="rbf")
|
88 |
+
params = SVMClassifierParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
if hyper_params_optimize == "grid_search":
|
91 |
+
best_model = grid_search(params, svm_classifier_model, x_train, y_train)
|
92 |
+
elif hyper_params_optimize == "bayes_search":
|
93 |
+
best_model = bayes_search(params, svm_classifier_model, x_train, y_train)
|
94 |
+
else:
|
95 |
+
best_model = svm_classifier_model
|
96 |
+
best_model.fit(x_train, y_train)
|
97 |
|
98 |
+
info["参数"] = best_model.get_params()
|
99 |
|
100 |
+
y_pred = best_model.predict(x_test)
|
101 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
102 |
+
container.set_y_pred(y_pred)
|
103 |
|
104 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
|
|
105 |
|
106 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
107 |
+
train_scores_std = np.std(train_scores, axis=1)
|
108 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
109 |
+
test_scores_std = np.std(test_scores, axis=1)
|
110 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
111 |
+
test_scores_std)
|
112 |
|
113 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
114 |
|
115 |
+
container.set_info(info)
|
116 |
+
container.set_status("trained")
|
117 |
+
container.set_model(best_model)
|
118 |
|
119 |
+
return container
|
analysis/linear_model.py
CHANGED
@@ -11,7 +11,23 @@ from sklearn.model_selection import learning_curve
|
|
11 |
from static.process import grid_search, bayes_search
|
12 |
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
13 |
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
14 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
# 线性回归
|
@@ -24,28 +40,20 @@ def linear_regression(container: Container, model=None):
|
|
24 |
info = {}
|
25 |
|
26 |
if model == "Lasso":
|
27 |
-
linear_regression_model = Lasso(alpha=0.1)
|
28 |
-
params =
|
29 |
-
"fit_intercept": [True, False],
|
30 |
-
"alpha": [0.001, 0.01, 0.1, 1.0, 10.0]
|
31 |
-
}
|
32 |
elif model == "Ridge":
|
33 |
-
linear_regression_model = Ridge(alpha=0.1)
|
34 |
-
params =
|
35 |
-
"fit_intercept": [True, False],
|
36 |
-
"alpha": [0.001, 0.01, 0.1, 1.0, 10.0]
|
37 |
-
}
|
38 |
elif model == "ElasticNet":
|
39 |
-
linear_regression_model = ElasticNet(alpha=0.1)
|
40 |
-
params =
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
else:
|
45 |
linear_regression_model = LinearRegression()
|
46 |
-
params =
|
47 |
-
"fit_intercept": [True, False]
|
48 |
-
}
|
49 |
|
50 |
if hyper_params_optimize == "grid_search":
|
51 |
best_model = grid_search(params, linear_regression_model, x_train, y_train)
|
@@ -55,13 +63,13 @@ def linear_regression(container: Container, model=None):
|
|
55 |
best_model = linear_regression_model
|
56 |
best_model.fit(x_train, y_train)
|
57 |
|
58 |
-
info["
|
59 |
-
|
60 |
-
lr_intercept = best_model.intercept_
|
61 |
-
info["Intercept of linear regression equation"] = lr_intercept
|
62 |
|
63 |
-
|
64 |
-
info["
|
|
|
|
|
|
|
65 |
|
66 |
y_pred = best_model.predict(x_test)
|
67 |
container.set_y_pred(y_pred)
|
@@ -72,9 +80,10 @@ def linear_regression(container: Container, model=None):
|
|
72 |
train_scores_std = np.std(train_scores, axis=1)
|
73 |
test_scores_mean = np.mean(test_scores, axis=1)
|
74 |
test_scores_std = np.std(test_scores, axis=1)
|
75 |
-
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
|
|
76 |
|
77 |
-
info
|
78 |
|
79 |
container.set_info(info)
|
80 |
container.set_status("trained")
|
@@ -83,6 +92,15 @@ def linear_regression(container: Container, model=None):
|
|
83 |
return container
|
84 |
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
# 多项式回归
|
87 |
def polynomial_regression(container: Container):
|
88 |
x_train = container.x_train
|
@@ -97,10 +115,7 @@ def polynomial_regression(container: Container):
|
|
97 |
|
98 |
polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
|
99 |
("linear_regression_model", linear_regression_model)])
|
100 |
-
params =
|
101 |
-
"polynomial_features__degree": [2, 3],
|
102 |
-
"linear_regression_model__fit_intercept": [True, False]
|
103 |
-
}
|
104 |
|
105 |
if hyper_params_optimize == "grid_search":
|
106 |
best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
|
@@ -110,16 +125,16 @@ def polynomial_regression(container: Container):
|
|
110 |
best_model = polynomial_regression_model
|
111 |
best_model.fit(x_train, y_train)
|
112 |
|
113 |
-
info["
|
114 |
-
|
115 |
-
feature_names = best_model["polynomial_features"].get_feature_names_out()
|
116 |
-
info["Feature names of polynomial regression"] = feature_names
|
117 |
|
118 |
-
|
119 |
-
info["
|
120 |
-
|
121 |
-
|
122 |
-
info["
|
|
|
|
|
|
|
123 |
|
124 |
x_test_ = best_model["polynomial_features"].fit_transform(x_test)
|
125 |
y_pred = best_model["linear_regression_model"].predict(x_test_)
|
@@ -133,7 +148,7 @@ def polynomial_regression(container: Container):
|
|
133 |
test_scores_std = np.std(test_scores, axis=1)
|
134 |
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
|
135 |
|
136 |
-
info
|
137 |
|
138 |
container.set_info(info)
|
139 |
container.set_status("trained")
|
@@ -142,7 +157,18 @@ def polynomial_regression(container: Container):
|
|
142 |
return container
|
143 |
|
144 |
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
def logistic_regression(container: Container):
|
147 |
x_train = container.x_train
|
148 |
y_train = container.y_train
|
@@ -151,12 +177,8 @@ def logistic_regression(container: Container):
|
|
151 |
hyper_params_optimize = container.hyper_params_optimize
|
152 |
info = {}
|
153 |
|
154 |
-
logistic_regression_model = LogisticRegression()
|
155 |
-
params =
|
156 |
-
"C": [0.001, 0.01, 0.1, 1.0, 10.0],
|
157 |
-
"max_iter": [100, 200, 300],
|
158 |
-
"solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"]
|
159 |
-
}
|
160 |
|
161 |
if hyper_params_optimize == "grid_search":
|
162 |
best_model = grid_search(params, logistic_regression_model, x_train, y_train)
|
@@ -166,13 +188,13 @@ def logistic_regression(container: Container):
|
|
166 |
best_model = logistic_regression_model
|
167 |
best_model.fit(x_train, y_train)
|
168 |
|
169 |
-
info["
|
170 |
|
171 |
-
lr_intercept = best_model.intercept_
|
172 |
-
info["Intercept of logistic regression equation"] = lr_intercept.tolist()
|
173 |
-
|
174 |
-
lr_coef = best_model.coef_
|
175 |
-
info["Coefficients of logistic regression equation"] = lr_coef.tolist()
|
176 |
|
177 |
y_pred = best_model.predict(x_test)
|
178 |
container.set_y_pred(y_pred)
|
@@ -183,9 +205,10 @@ def logistic_regression(container: Container):
|
|
183 |
train_scores_std = np.std(train_scores, axis=1)
|
184 |
test_scores_mean = np.mean(test_scores, axis=1)
|
185 |
test_scores_std = np.std(test_scores, axis=1)
|
186 |
-
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
|
|
187 |
|
188 |
-
info
|
189 |
|
190 |
container.set_info(info)
|
191 |
container.set_status("trained")
|
|
|
11 |
from static.process import grid_search, bayes_search
|
12 |
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
13 |
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
14 |
+
from static.new_class import *
|
15 |
+
from static.config import Config
|
16 |
+
|
17 |
+
|
18 |
+
class LinearRegressionParams:
|
19 |
+
@classmethod
|
20 |
+
def get_params(cls, sort):
|
21 |
+
if sort in ["Lasso", "Ridge", "ElasticNet"]:
|
22 |
+
return {
|
23 |
+
"fit_intercept": [True, False],
|
24 |
+
"alpha": [0.001, 0.01, 0.1, 1.0, 10.0],
|
25 |
+
"random_state": [Config.RANDOM_STATE]
|
26 |
+
}
|
27 |
+
else:
|
28 |
+
return {
|
29 |
+
"fit_intercept": [True, False]
|
30 |
+
}
|
31 |
|
32 |
|
33 |
# 线性回归
|
|
|
40 |
info = {}
|
41 |
|
42 |
if model == "Lasso":
|
43 |
+
linear_regression_model = Lasso(alpha=0.1, random_state=Config.RANDOM_STATE)
|
44 |
+
params = LinearRegressionParams.get_params(model)
|
|
|
|
|
|
|
45 |
elif model == "Ridge":
|
46 |
+
linear_regression_model = Ridge(alpha=0.1, random_state=Config.RANDOM_STATE)
|
47 |
+
params = LinearRegressionParams.get_params(model)
|
|
|
|
|
|
|
48 |
elif model == "ElasticNet":
|
49 |
+
linear_regression_model = ElasticNet(alpha=0.1, random_state=Config.RANDOM_STATE)
|
50 |
+
params = LinearRegressionParams.get_params(model)
|
51 |
+
elif model == "LinearRegression":
|
52 |
+
linear_regression_model = LinearRegression()
|
53 |
+
params = LinearRegressionParams.get_params(model)
|
54 |
else:
|
55 |
linear_regression_model = LinearRegression()
|
56 |
+
params = LinearRegressionParams.get_params(model)
|
|
|
|
|
57 |
|
58 |
if hyper_params_optimize == "grid_search":
|
59 |
best_model = grid_search(params, linear_regression_model, x_train, y_train)
|
|
|
63 |
best_model = linear_regression_model
|
64 |
best_model.fit(x_train, y_train)
|
65 |
|
66 |
+
info["参数"] = best_model.get_params()
|
|
|
|
|
|
|
67 |
|
68 |
+
# lr_intercept = best_model.intercept_
|
69 |
+
# info["Intercept of linear regression equation"] = lr_intercept
|
70 |
+
#
|
71 |
+
# lr_coef = best_model.coef_
|
72 |
+
# info["Coefficients of linear regression equation"] = lr_coef
|
73 |
|
74 |
y_pred = best_model.predict(x_test)
|
75 |
container.set_y_pred(y_pred)
|
|
|
80 |
train_scores_std = np.std(train_scores, axis=1)
|
81 |
test_scores_mean = np.mean(test_scores, axis=1)
|
82 |
test_scores_std = np.std(test_scores, axis=1)
|
83 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
84 |
+
test_scores_std)
|
85 |
|
86 |
+
info["参数"] = calculate_regression_metrics(y_pred, y_test)
|
87 |
|
88 |
container.set_info(info)
|
89 |
container.set_status("trained")
|
|
|
92 |
return container
|
93 |
|
94 |
|
95 |
+
class PolynomialRegressionParams:
|
96 |
+
@classmethod
|
97 |
+
def get_params(cls):
|
98 |
+
return {
|
99 |
+
"polynomial_features__degree": [2, 3],
|
100 |
+
"linear_regression_model__fit_intercept": [True, False]
|
101 |
+
}
|
102 |
+
|
103 |
+
|
104 |
# 多项式回归
|
105 |
def polynomial_regression(container: Container):
|
106 |
x_train = container.x_train
|
|
|
115 |
|
116 |
polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
|
117 |
("linear_regression_model", linear_regression_model)])
|
118 |
+
params = PolynomialRegressionParams.get_params()
|
|
|
|
|
|
|
119 |
|
120 |
if hyper_params_optimize == "grid_search":
|
121 |
best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
|
|
|
125 |
best_model = polynomial_regression_model
|
126 |
best_model.fit(x_train, y_train)
|
127 |
|
128 |
+
info["参数"] = best_model.get_params()
|
|
|
|
|
|
|
129 |
|
130 |
+
# feature_names = best_model["polynomial_features"].get_feature_names_out()
|
131 |
+
# info["Feature names of polynomial regression"] = feature_names
|
132 |
+
#
|
133 |
+
# lr_intercept = best_model["linear_regression_model"].intercept_
|
134 |
+
# info["Intercept of polynomial regression equation"] = lr_intercept
|
135 |
+
#
|
136 |
+
# lr_coef = best_model["linear_regression_model"].coef_
|
137 |
+
# info["Coefficients of polynomial regression equation"] = lr_coef
|
138 |
|
139 |
x_test_ = best_model["polynomial_features"].fit_transform(x_test)
|
140 |
y_pred = best_model["linear_regression_model"].predict(x_test_)
|
|
|
148 |
test_scores_std = np.std(test_scores, axis=1)
|
149 |
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
|
150 |
|
151 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
152 |
|
153 |
container.set_info(info)
|
154 |
container.set_status("trained")
|
|
|
157 |
return container
|
158 |
|
159 |
|
160 |
+
class LogisticRegressionParams:
|
161 |
+
@classmethod
|
162 |
+
def get_params(cls):
|
163 |
+
return {
|
164 |
+
"C": [0.001, 0.01, 0.1, 1.0, 10.0],
|
165 |
+
"max_iter": [100, 200, 300],
|
166 |
+
"solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"],
|
167 |
+
"random_state": [Config.RANDOM_STATE]
|
168 |
+
}
|
169 |
+
|
170 |
+
|
171 |
+
# 逻辑斯谛分类
|
172 |
def logistic_regression(container: Container):
|
173 |
x_train = container.x_train
|
174 |
y_train = container.y_train
|
|
|
177 |
hyper_params_optimize = container.hyper_params_optimize
|
178 |
info = {}
|
179 |
|
180 |
+
logistic_regression_model = LogisticRegression(random_state=Config.RANDOM_STATE)
|
181 |
+
params = LogisticRegressionParams.get_params()
|
|
|
|
|
|
|
|
|
182 |
|
183 |
if hyper_params_optimize == "grid_search":
|
184 |
best_model = grid_search(params, logistic_regression_model, x_train, y_train)
|
|
|
188 |
best_model = logistic_regression_model
|
189 |
best_model.fit(x_train, y_train)
|
190 |
|
191 |
+
info["参数"] = best_model.get_params()
|
192 |
|
193 |
+
# lr_intercept = best_model.intercept_
|
194 |
+
# info["Intercept of logistic regression equation"] = lr_intercept.tolist()
|
195 |
+
#
|
196 |
+
# lr_coef = best_model.coef_
|
197 |
+
# info["Coefficients of logistic regression equation"] = lr_coef.tolist()
|
198 |
|
199 |
y_pred = best_model.predict(x_test)
|
200 |
container.set_y_pred(y_pred)
|
|
|
205 |
train_scores_std = np.std(train_scores, axis=1)
|
206 |
test_scores_mean = np.mean(test_scores, axis=1)
|
207 |
test_scores_std = np.std(test_scores, axis=1)
|
208 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
209 |
+
test_scores_std)
|
210 |
|
211 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
212 |
|
213 |
container.set_info(info)
|
214 |
container.set_status("trained")
|
analysis/others/__init__.py
ADDED
File without changes
|
analysis/others/evaluation_model.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import skfuzzy as fuzz
|
3 |
+
from skfuzzy import control as ctrl
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
|
6 |
+
|
7 |
+
def fuzzy_comprehensive_evaluation_model():
|
8 |
+
# 创建模糊变量和模糊集合
|
9 |
+
technical_skill = ctrl.Antecedent(np.arange(0, 101, 1), 'technical_skill')
|
10 |
+
physical_condition = ctrl.Antecedent(np.arange(0, 101, 1), 'physical_condition')
|
11 |
+
mental_toughness = ctrl.Antecedent(np.arange(0, 101, 1), 'mental_toughness')
|
12 |
+
opponent_strength = ctrl.Antecedent(np.arange(0, 101, 1), 'opponent_strength')
|
13 |
+
|
14 |
+
performance = ctrl.Consequent(np.arange(0, 101, 1), 'performance')
|
15 |
+
|
16 |
+
# 设定模糊隶属度函数
|
17 |
+
technical_skill['low'] = fuzz.trimf(technical_skill.universe, [0, 0, 50])
|
18 |
+
technical_skill['medium'] = fuzz.trimf(technical_skill.universe, [0, 50, 100])
|
19 |
+
technical_skill['high'] = fuzz.trimf(technical_skill.universe, [50, 100, 100])
|
20 |
+
|
21 |
+
physical_condition['low'] = fuzz.trimf(physical_condition.universe, [0, 0, 50])
|
22 |
+
physical_condition['medium'] = fuzz.trimf(physical_condition.universe, [0, 50, 100])
|
23 |
+
physical_condition['high'] = fuzz.trimf(physical_condition.universe, [50, 100, 100])
|
24 |
+
|
25 |
+
mental_toughness['low'] = fuzz.trimf(mental_toughness.universe, [0, 0, 50])
|
26 |
+
mental_toughness['medium'] = fuzz.trimf(mental_toughness.universe, [0, 50, 100])
|
27 |
+
mental_toughness['high'] = fuzz.trimf(mental_toughness.universe, [50, 100, 100])
|
28 |
+
|
29 |
+
opponent_strength['low'] = fuzz.trimf(opponent_strength.universe, [0, 0, 50])
|
30 |
+
opponent_strength['medium'] = fuzz.trimf(opponent_strength.universe, [0, 50, 100])
|
31 |
+
opponent_strength['high'] = fuzz.trimf(opponent_strength.universe, [50, 100, 100])
|
32 |
+
|
33 |
+
performance['poor'] = fuzz.trimf(performance.universe, [0, 0, 50])
|
34 |
+
performance['average'] = fuzz.trimf(performance.universe, [0, 50, 100])
|
35 |
+
performance['excellent'] = fuzz.trimf(performance.universe, [50, 100, 100])
|
36 |
+
|
37 |
+
# 设定输出的解模糊方法——质心解模糊方式
|
38 |
+
performance.defuzzify_method = 'centroid'
|
39 |
+
|
40 |
+
# 设定规则
|
41 |
+
rule1 = ctrl.Rule(
|
42 |
+
technical_skill['low'] | physical_condition['low'] | mental_toughness['low'] | opponent_strength['low'],
|
43 |
+
performance['poor']
|
44 |
+
)
|
45 |
+
rule2 = ctrl.Rule(
|
46 |
+
technical_skill['medium'] | physical_condition['medium'] | mental_toughness['medium'] | opponent_strength['medium'],
|
47 |
+
performance['average']
|
48 |
+
)
|
49 |
+
rule3 = ctrl.Rule(
|
50 |
+
technical_skill['high'] | physical_condition['high'] | mental_toughness['high'] | opponent_strength['high'],
|
51 |
+
performance['excellent']
|
52 |
+
)
|
53 |
+
|
54 |
+
# 创建控制系统
|
55 |
+
performance_evaluation = ctrl.ControlSystem([rule1, rule2, rule3])
|
56 |
+
performance_evaluator = ctrl.ControlSystemSimulation(performance_evaluation)
|
57 |
+
|
58 |
+
# 输入数据
|
59 |
+
performance_evaluator.input['technical_skill'] = 75
|
60 |
+
performance_evaluator.input['physical_condition'] = 80
|
61 |
+
performance_evaluator.input['mental_toughness'] = 85
|
62 |
+
performance_evaluator.input['opponent_strength'] = 60
|
63 |
+
|
64 |
+
# 计算模糊综合评分
|
65 |
+
performance_evaluator.compute()
|
66 |
+
|
67 |
+
# 输出结果
|
68 |
+
print("模糊综合评分:", performance_evaluator.output['performance'])
|
69 |
+
|
70 |
+
# 打印模糊集合的可视化图表
|
71 |
+
technical_skill.view("technical_skill", sim=performance_evaluator)
|
72 |
+
physical_condition.view("physical_condition", sim=performance_evaluator)
|
73 |
+
mental_toughness.view("mental_toughness", sim=performance_evaluator)
|
74 |
+
opponent_strength.view("opponent_strength", sim=performance_evaluator)
|
75 |
+
performance.view("performance", sim=performance_evaluator)
|
76 |
+
|
77 |
+
# Perform sensitivity analyze (to change input value)
|
78 |
+
|
79 |
+
# input_var_1:
|
80 |
+
|
81 |
+
# input_values = np.arange(0, 11, 1)
|
82 |
+
# output_values = []
|
83 |
+
#
|
84 |
+
# for val in input_values:
|
85 |
+
# fuzzy_control_sys_simulation.input["input_var_1"] = val
|
86 |
+
# fuzzy_control_sys_simulation.compute()
|
87 |
+
# output_values.append(fuzzy_control_sys_simulation.output["output_var"])
|
88 |
+
#
|
89 |
+
# plt.plot(
|
90 |
+
# input_values,
|
91 |
+
# output_values,
|
92 |
+
# label="Sensitivity Analysis"
|
93 |
+
# )
|
94 |
+
# plt.xlabel("Input Variable 1")
|
95 |
+
# plt.ylabel("Output Variable")
|
96 |
+
# plt.legend()
|
97 |
+
# plt.show()
|
98 |
+
#
|
99 |
+
# return fuzzy_control_sys_simulation.output["output_var"]
|
analysis/others/gaussian_model.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
from sklearn.mixture import GaussianMixture
|
4 |
+
|
5 |
+
|
6 |
+
def gaussian_mix(x):
|
7 |
+
x = x.reshape(-1, 1)
|
8 |
+
n_components = 2000 # 你可以根据需要调整混合组件的数量
|
9 |
+
gmm = GaussianMixture(n_components=n_components, covariance_type='full')
|
10 |
+
|
11 |
+
# 拟合模型
|
12 |
+
gmm.fit(x)
|
13 |
+
|
14 |
+
# 预测每个数据点所属的组件
|
15 |
+
continuous_data = gmm.sample(len(x))[0].reshape(-1)
|
16 |
+
|
17 |
+
return continuous_data
|
18 |
+
|
19 |
+
# 使用高斯混合模型拟合数据
|
20 |
+
# gmm = GaussianMixture(n_components=50) # 选择混合成分的数量
|
21 |
+
# gmm.fit(x.reshape(-1, 1))
|
22 |
+
|
23 |
+
# 生成连续数据
|
24 |
+
# return np.linspace(min(x), max(x), len(x)).flatten()
|
25 |
+
|
26 |
+
# z = np.exp(gmm.score_samples(y.reshape(-1, 1)))
|
27 |
+
|
28 |
+
# return z
|
analysis/others/markov_model.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from hmmlearn import hmm
|
4 |
+
|
5 |
+
|
6 |
+
def train_and_predict_hidden_markov_model(df):
|
7 |
+
window_size = 10
|
8 |
+
|
9 |
+
# train_df = df[['point_won', 'point_loss', 'ace', 'winner', 'double_fault', 'unf_err', 'net_point', 'net_point_won', 'break_pt', 'break_pt_won', 'break_pt_miss']]
|
10 |
+
|
11 |
+
train_df = df
|
12 |
+
# "p1_winner",
|
13 |
+
# "p2_winner",
|
14 |
+
# "winner_shot_type",
|
15 |
+
# "p1_double_fault",
|
16 |
+
# "p2_double_fault",
|
17 |
+
# "p1_unf_err",
|
18 |
+
# "p2_unf_err",
|
19 |
+
# "p1_net_pt_won",
|
20 |
+
# "p2_net_pt_won",
|
21 |
+
# "p1_break_pt_won",
|
22 |
+
# "p2_break_pt_won",
|
23 |
+
# "rally_count",
|
24 |
+
# "serve_width",
|
25 |
+
# "serve_depth",
|
26 |
+
# "return_depth"
|
27 |
+
df["observation"] = 0
|
28 |
+
|
29 |
+
# mapping = {}
|
30 |
+
# counter = 0
|
31 |
+
# for i in range(len(train_df)):
|
32 |
+
# cur_combination = train_df.iloc[i].to_list()
|
33 |
+
#
|
34 |
+
# if str(cur_combination) not in mapping.keys():
|
35 |
+
# mapping[str(cur_combination)] = counter
|
36 |
+
# df.loc[i, "observation"] = counter
|
37 |
+
# counter += 1
|
38 |
+
# else:
|
39 |
+
# df.loc[i, "observation"] = mapping[str(cur_combination)]
|
40 |
+
|
41 |
+
observation_list = df["observation"].to_list()
|
42 |
+
|
43 |
+
# value_separated_observation_list = [observation_list[i - window_size: i] for i in range(window_size, len(observation_list))]
|
44 |
+
# value_separated_observation_list = [[0] * window_size] * window_size + value_separated_observation_list
|
45 |
+
|
46 |
+
observations = np.array([np.sum(np.array([train_df.iloc[j].to_list() for j in range(i-window_size, i)]).astype(int), axis=0) for i in range(window_size, len(train_df))])
|
47 |
+
|
48 |
+
observations = abs(np.min(observations)) + observations
|
49 |
+
|
50 |
+
observations = observations.astype(int)
|
51 |
+
|
52 |
+
m_observations = np.concatenate(
|
53 |
+
(np.array([observations[0].tolist()] * window_size), observations),
|
54 |
+
axis=0
|
55 |
+
)
|
56 |
+
|
57 |
+
df = pd.concat([df, pd.DataFrame({"window_observation": m_observations.tolist()})], axis=1)
|
58 |
+
|
59 |
+
hidden_markov_model = hmm.MultinomialHMM(n_components=5, n_iter=50, tol=0.01)
|
60 |
+
|
61 |
+
hidden_markov_model.fit(observations)
|
62 |
+
|
63 |
+
start_prob = hidden_markov_model.startprob_
|
64 |
+
transition_prob = hidden_markov_model.transmat_
|
65 |
+
emission_prob = hidden_markov_model.emissionprob_
|
66 |
+
|
67 |
+
neg_log_likelihood, pred = calculate_momentum(df, hidden_markov_model, m_observations)
|
68 |
+
|
69 |
+
_, hidden2observation = hidden_markov_model.score_samples(observations)
|
70 |
+
|
71 |
+
state_impacts = np.sum(hidden2observation, axis=0)
|
72 |
+
|
73 |
+
return state_impacts, neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
|
74 |
+
|
75 |
+
state_impacts = np.zeros((num_states, num_obs))
|
76 |
+
|
77 |
+
for t in range(num_obs):
|
78 |
+
for i in range(num_states):
|
79 |
+
state_impacts[i, t] = (forward_prob[t, i] * backward_prob[t, i]) / np.sum(
|
80 |
+
forward_prob[t, :] * backward_prob[t, :])
|
81 |
+
|
82 |
+
return neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
|
83 |
+
|
84 |
+
|
85 |
+
def calculate_momentum(df, hidden_markov_model, m_observations):
|
86 |
+
# pred_list = []
|
87 |
+
# neg_log_likelihood_list = []
|
88 |
+
# for i in range(len(df)):
|
89 |
+
# neg_log_likelihood, pred = hidden_markov_model.decode(np.array([df.loc[i, "window_observation"]]))
|
90 |
+
# pred_list.append(pred[0])
|
91 |
+
# neg_log_likelihood_list.append(neg_log_likelihood)
|
92 |
+
#
|
93 |
+
# return pred_list, neg_log_likelihood_list
|
94 |
+
|
95 |
+
neg_log_likelihood, pred = hidden_markov_model.decode(m_observations)
|
96 |
+
|
97 |
+
return neg_log_likelihood, pred
|
98 |
+
|
analysis/others/poly_model.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import matplotlib.pyplot as plt
|
3 |
+
|
4 |
+
|
5 |
+
def poly_fit(x_values, y_values, degree=60):
|
6 |
+
# 使用 numpy 的 polyfit 函数进行多项式拟合
|
7 |
+
coefficients = np.polyfit(x_values, y_values, degree)
|
8 |
+
|
9 |
+
# 生成拟合的多项式函数
|
10 |
+
fitted_curve = np.poly1d(coefficients)
|
11 |
+
|
12 |
+
return fitted_curve(x_values)
|
analysis/shap_model.py
CHANGED
@@ -1,15 +1,52 @@
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
-
|
3 |
import shap
|
4 |
|
5 |
|
6 |
-
def
|
7 |
-
explainer = shap.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
shap_values = explainer(x)
|
9 |
|
10 |
-
shap.
|
11 |
|
12 |
plt.title(paint_object.get_name())
|
|
|
13 |
|
14 |
return plt, paint_object
|
15 |
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
import shap
|
4 |
|
5 |
|
6 |
+
def draw_shap_beeswarm(model, x, feature_names, type, paint_object):
|
7 |
+
explainer = shap.KernelExplainer(model.predict, x)
|
8 |
+
shap_values = explainer(x)
|
9 |
+
|
10 |
+
shap.summary_plot(shap_values, x, feature_names=feature_names, plot_type=type, show=False)
|
11 |
+
|
12 |
+
plt.title(paint_object.get_name())
|
13 |
+
plt.tight_layout()
|
14 |
+
|
15 |
+
return plt, paint_object
|
16 |
+
|
17 |
+
|
18 |
+
def draw_waterfall(model, x, feature_names, number, paint_object):
|
19 |
+
explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
|
20 |
+
shap_values = explainer(x)
|
21 |
+
|
22 |
+
shap.waterfall_plot(shap_values[number], show=False)
|
23 |
+
|
24 |
+
plt.title(paint_object.get_name())
|
25 |
+
plt.tight_layout()
|
26 |
+
|
27 |
+
return plt, paint_object
|
28 |
+
|
29 |
+
|
30 |
+
def draw_force(model, x, feature_names, number, paint_object):
|
31 |
+
explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
|
32 |
+
shap_values = explainer(x[number])
|
33 |
+
|
34 |
+
shap.force_plot(explainer.expected_value, shap_values.values, feature_names=feature_names, show=False, matplotlib=True)
|
35 |
+
|
36 |
+
plt.title(paint_object.get_name())
|
37 |
+
plt.tight_layout()
|
38 |
+
|
39 |
+
return plt, paint_object
|
40 |
+
|
41 |
+
|
42 |
+
def draw_dependence(model, x, feature_names, col, paint_object):
|
43 |
+
explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
|
44 |
shap_values = explainer(x)
|
45 |
|
46 |
+
shap.dependence_plot(feature_names.index(col), shap_values.values, x, feature_names=feature_names, show=False)
|
47 |
|
48 |
plt.title(paint_object.get_name())
|
49 |
+
plt.tight_layout()
|
50 |
|
51 |
return plt, paint_object
|
52 |
|
analysis/tree_model.py
CHANGED
@@ -1,208 +1,290 @@
|
|
1 |
-
from
|
2 |
from sklearn.ensemble import RandomForestClassifier
|
3 |
-
from xgboost import XGBClassifier
|
4 |
-
from sklearn.model_selection import learning_curve
|
5 |
-
import numpy as np
|
6 |
-
|
7 |
-
from coding.llh.analysis.shap_model import shap_calculate
|
8 |
-
from coding.llh.static.config import Config
|
9 |
-
from coding.llh.static.process import grid_search, bayes_search
|
10 |
-
from coding.llh.visualization.draw_learning_curve import draw_learning_curve
|
11 |
-
from coding.llh.visualization.draw_line_graph import draw_line_graph
|
12 |
-
from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
|
13 |
-
from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
|
14 |
-
from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
|
15 |
from sklearn.ensemble import RandomForestRegressor
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
info = {}
|
20 |
-
model_name = "Random Forest Regression"
|
21 |
|
22 |
-
|
23 |
-
params =
|
24 |
-
'n_estimators': [10, 50, 100, 200],
|
25 |
-
'max_depth': [None, 10, 20, 30],
|
26 |
-
'min_samples_split': [2, 5, 10],
|
27 |
-
'min_samples_leaf': [1, 2, 4]
|
28 |
-
}
|
29 |
|
30 |
if hyper_params_optimize == "grid_search":
|
31 |
-
best_model = grid_search(params,
|
32 |
elif hyper_params_optimize == "bayes_search":
|
33 |
-
best_model = bayes_search(params,
|
34 |
else:
|
35 |
-
best_model =
|
36 |
-
best_model.fit(
|
37 |
-
|
38 |
-
info["{} Params".format(model_name)] = best_model.get_params()
|
39 |
|
40 |
-
|
41 |
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
# 0202:
|
45 |
-
|
46 |
-
train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="r2")
|
47 |
|
48 |
train_scores_mean = np.mean(train_scores, axis=1)
|
49 |
train_scores_std = np.std(train_scores, axis=1)
|
50 |
test_scores_mean = np.mean(test_scores, axis=1)
|
51 |
test_scores_std = np.std(test_scores, axis=1)
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
train_scores_mean[0] = 0.98
|
55 |
|
56 |
-
|
|
|
|
|
57 |
|
58 |
-
|
59 |
|
60 |
-
info.update(calculate_regression_metrics(y_pred, y_test, model_name))
|
61 |
-
# info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
|
62 |
-
# mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
|
63 |
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
return y_pred, info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
71 |
info = {}
|
72 |
|
73 |
-
|
74 |
-
params =
|
75 |
-
"criterion": ["gini", "entropy"],
|
76 |
-
"splitter": ["best", "random"],
|
77 |
-
"max_depth": [None, 5, 10, 15],
|
78 |
-
"min_samples_split": [2, 5, 10],
|
79 |
-
"min_samples_leaf": [1, 2, 4]
|
80 |
-
}
|
81 |
|
82 |
if hyper_params_optimize == "grid_search":
|
83 |
-
best_model = grid_search(params,
|
84 |
elif hyper_params_optimize == "bayes_search":
|
85 |
-
best_model = bayes_search(params,
|
86 |
else:
|
87 |
-
best_model =
|
88 |
-
|
89 |
-
# TODO
|
90 |
-
x_train, x_validate, y_train, y_validate = epoch
|
91 |
|
92 |
-
|
93 |
|
94 |
y_pred = best_model.predict(x_test)
|
|
|
|
|
|
|
95 |
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
info
|
99 |
-
info.update(calculate_classification_metrics(y_pred, y_test, "decision tree classifier"))
|
100 |
|
101 |
-
|
|
|
|
|
102 |
|
|
|
103 |
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
info = {}
|
107 |
|
108 |
-
random_forest_classifier_model = RandomForestClassifier(random_state=Config.RANDOM_STATE)
|
109 |
-
params =
|
110 |
-
"criterion": ["gini", "entropy"],
|
111 |
-
"n_estimators": [50, 100, 150],
|
112 |
-
"max_depth": [None, 5, 10, 15],
|
113 |
-
"min_samples_split": [2, 5, 10],
|
114 |
-
"min_samples_leaf": [1, 2, 4],
|
115 |
-
"n_jobs": [-1]
|
116 |
-
}
|
117 |
|
118 |
if hyper_params_optimize == "grid_search":
|
119 |
-
best_model = grid_search(params, random_forest_classifier_model,
|
120 |
elif hyper_params_optimize == "bayes_search":
|
121 |
-
best_model = bayes_search(params, random_forest_classifier_model,
|
122 |
else:
|
123 |
best_model = random_forest_classifier_model
|
124 |
-
|
125 |
-
# TODO
|
126 |
-
x_train, x_validate, y_train, y_validate = epoch
|
127 |
|
128 |
-
|
129 |
-
|
130 |
-
info["random forest Params"] = best_model.get_params()
|
131 |
|
132 |
y_pred = best_model.predict(x_test)
|
|
|
|
|
133 |
|
134 |
-
|
135 |
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
train_scores_mean = np.mean(train_scores, axis=1)
|
139 |
train_scores_std = np.std(train_scores, axis=1)
|
140 |
test_scores_mean = np.mean(test_scores, axis=1)
|
141 |
test_scores_std = np.std(test_scores, axis=1)
|
|
|
|
|
142 |
|
143 |
-
|
144 |
|
145 |
-
|
|
|
|
|
146 |
|
147 |
-
|
148 |
-
# info.update(calculate_classification_metrics(y_pred, y_test, "random forest classifier"))
|
149 |
|
150 |
-
f1_score, fpr, tpr, thresholds = calculate_classification_metrics(y_pred, y_test, "random forest")
|
151 |
|
152 |
-
|
|
|
|
|
|
|
153 |
|
154 |
|
155 |
-
#
|
156 |
-
def
|
|
|
|
|
|
|
|
|
|
|
157 |
info = {}
|
158 |
|
159 |
-
|
160 |
-
params =
|
161 |
-
"n_estimators": [50, 100, 150],
|
162 |
-
"learning_rate": [0.01, 0.1, 0.2],
|
163 |
-
"max_depth": [3, 4, 5],
|
164 |
-
"min_child_weight": [1, 2, 3],
|
165 |
-
"gamma": [0, 0.1, 0.2],
|
166 |
-
"subsample": [0.8, 0.9, 1.0],
|
167 |
-
"colsample_bytree": [0.8, 0.9, 1.0]
|
168 |
-
}
|
169 |
|
170 |
if hyper_params_optimize == "grid_search":
|
171 |
-
best_model = grid_search(params,
|
172 |
elif hyper_params_optimize == "bayes_search":
|
173 |
-
best_model = bayes_search(params,
|
174 |
else:
|
175 |
-
best_model =
|
176 |
-
|
177 |
-
# TODO
|
178 |
-
x_train, x_validate, y_train, y_validate = epoch
|
179 |
|
180 |
-
|
181 |
-
|
182 |
-
info["xgboost Params"] = best_model.get_params()
|
183 |
|
184 |
y_pred = best_model.predict(x_test)
|
|
|
|
|
185 |
|
186 |
-
|
187 |
-
|
188 |
-
train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="accuracy")
|
189 |
|
190 |
train_scores_mean = np.mean(train_scores, axis=1)
|
191 |
train_scores_std = np.std(train_scores, axis=1)
|
192 |
test_scores_mean = np.mean(test_scores, axis=1)
|
193 |
test_scores_std = np.std(test_scores, axis=1)
|
|
|
|
|
194 |
|
195 |
-
|
196 |
-
|
197 |
-
# draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "xgboost classifier model residual plot")
|
198 |
-
|
199 |
-
# info.update(calculate_regression_metrics(y_pred, y_test, "xgboost classifier"))
|
200 |
-
# info.update(calculate_classification_metrics(y_pred, y_test, "xgboost classifier"))
|
201 |
-
|
202 |
-
f1_score, fpr, tpr, thresholds = calculate_classification_metrics(y_pred, y_test, "xgboost")
|
203 |
|
204 |
-
|
|
|
|
|
205 |
|
|
|
206 |
|
207 |
|
208 |
|
|
|
1 |
+
from metrics.calculate_regression_metrics import calculate_regression_metrics
|
2 |
from sklearn.ensemble import RandomForestClassifier
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from sklearn.ensemble import RandomForestRegressor
|
4 |
+
from sklearn.model_selection import learning_curve
|
5 |
+
from sklearn.tree import DecisionTreeClassifier
|
6 |
+
from xgboost import XGBClassifier
|
7 |
+
import lightgbm as lightGBMClassifier
|
8 |
+
|
9 |
+
from analysis.shap_model import *
|
10 |
+
from metrics.calculate_classification_metrics import calculate_classification_metrics
|
11 |
+
from static.config import Config
|
12 |
+
from static.process import grid_search, bayes_search
|
13 |
+
from static.new_class import *
|
14 |
+
|
15 |
+
|
16 |
+
class RandomForestRegressionParams:
|
17 |
+
@classmethod
|
18 |
+
def get_params(cls):
|
19 |
+
return {
|
20 |
+
'n_estimators': [10, 50, 100, 200],
|
21 |
+
'max_depth': [None, 10, 20, 30],
|
22 |
+
'min_samples_split': [2, 5, 10],
|
23 |
+
'min_samples_leaf': [1, 2, 4]
|
24 |
+
}
|
25 |
+
|
26 |
+
|
27 |
+
# 随机森林回归
|
28 |
+
def random_forest_regression(container: Container):
|
29 |
+
x_train = container.x_train
|
30 |
+
y_train = container.y_train
|
31 |
+
x_test = container.x_test
|
32 |
+
y_test = container.y_test
|
33 |
+
hyper_params_optimize = container.hyper_params_optimize
|
34 |
info = {}
|
|
|
35 |
|
36 |
+
random_forest_regression_model = RandomForestRegressor(n_estimators=5, random_state=Config.RANDOM_STATE)
|
37 |
+
params = RandomForestRegressionParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
if hyper_params_optimize == "grid_search":
|
40 |
+
best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
|
41 |
elif hyper_params_optimize == "bayes_search":
|
42 |
+
best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
|
43 |
else:
|
44 |
+
best_model = random_forest_regression_model
|
45 |
+
best_model.fit(x_train, y_train)
|
|
|
|
|
46 |
|
47 |
+
info["参数"] = best_model.get_params()
|
48 |
|
49 |
+
y_pred = best_model.predict(x_test)
|
50 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
51 |
+
container.set_y_pred(y_pred)
|
52 |
|
53 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
|
|
|
|
|
|
54 |
|
55 |
train_scores_mean = np.mean(train_scores, axis=1)
|
56 |
train_scores_std = np.std(train_scores, axis=1)
|
57 |
test_scores_mean = np.mean(test_scores, axis=1)
|
58 |
test_scores_std = np.std(test_scores, axis=1)
|
59 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
60 |
+
test_scores_std)
|
61 |
|
62 |
+
info["指标"] = calculate_regression_metrics(y_pred, y_test)
|
|
|
63 |
|
64 |
+
container.set_info(info)
|
65 |
+
container.set_status("trained")
|
66 |
+
container.set_model(best_model)
|
67 |
|
68 |
+
return container
|
69 |
|
|
|
|
|
|
|
70 |
|
71 |
+
class DecisionTreeClassifierParams:
|
72 |
+
@classmethod
|
73 |
+
def get_params(cls):
|
74 |
+
return {
|
75 |
+
"criterion": ["gini", "entropy"],
|
76 |
+
"splitter": ["best", "random"],
|
77 |
+
"max_depth": [None, 5, 10, 15],
|
78 |
+
"min_samples_split": [2, 5, 10],
|
79 |
+
"min_samples_leaf": [1, 2, 4]
|
80 |
+
}
|
81 |
|
|
|
82 |
|
83 |
+
# 决策树分类
|
84 |
+
def decision_tree_classifier(container: Container):
|
85 |
+
x_train = container.x_train
|
86 |
+
y_train = container.y_train
|
87 |
+
x_test = container.x_test
|
88 |
+
y_test = container.y_test
|
89 |
+
hyper_params_optimize = container.hyper_params_optimize
|
90 |
info = {}
|
91 |
|
92 |
+
random_forest_regression_model = DecisionTreeClassifier(random_state=Config.RANDOM_STATE)
|
93 |
+
params = DecisionTreeClassifierParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
if hyper_params_optimize == "grid_search":
|
96 |
+
best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
|
97 |
elif hyper_params_optimize == "bayes_search":
|
98 |
+
best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
|
99 |
else:
|
100 |
+
best_model = random_forest_regression_model
|
101 |
+
best_model.fit(x_train, y_train)
|
|
|
|
|
102 |
|
103 |
+
info["参数"] = best_model.get_params()
|
104 |
|
105 |
y_pred = best_model.predict(x_test)
|
106 |
+
container.set_y_pred(y_pred)
|
107 |
+
|
108 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
109 |
|
110 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
111 |
+
train_scores_std = np.std(train_scores, axis=1)
|
112 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
113 |
+
test_scores_std = np.std(test_scores, axis=1)
|
114 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
115 |
+
test_scores_std)
|
116 |
|
117 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
|
|
118 |
|
119 |
+
container.set_info(info)
|
120 |
+
container.set_status("trained")
|
121 |
+
container.set_model(best_model)
|
122 |
|
123 |
+
return container
|
124 |
|
125 |
+
|
126 |
+
class RandomForestClassifierParams:
|
127 |
+
@classmethod
|
128 |
+
def get_params(cls):
|
129 |
+
return {
|
130 |
+
"criterion": ["gini", "entropy"],
|
131 |
+
"n_estimators": [50, 100, 150],
|
132 |
+
"max_depth": [None, 5, 10, 15],
|
133 |
+
"min_samples_split": [2, 5, 10],
|
134 |
+
"min_samples_leaf": [1, 2, 4]
|
135 |
+
}
|
136 |
+
|
137 |
+
|
138 |
+
# 随机森林分类
|
139 |
+
def random_forest_classifier(container: Container):
|
140 |
+
x_train = container.x_train
|
141 |
+
y_train = container.y_train
|
142 |
+
x_test = container.x_test
|
143 |
+
y_test = container.y_test
|
144 |
+
hyper_params_optimize = container.hyper_params_optimize
|
145 |
info = {}
|
146 |
|
147 |
+
random_forest_classifier_model = RandomForestClassifier(n_estimators=5, random_state=Config.RANDOM_STATE)
|
148 |
+
params = RandomForestClassifierParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
if hyper_params_optimize == "grid_search":
|
151 |
+
best_model = grid_search(params, random_forest_classifier_model, x_train, y_train)
|
152 |
elif hyper_params_optimize == "bayes_search":
|
153 |
+
best_model = bayes_search(params, random_forest_classifier_model, x_train, y_train)
|
154 |
else:
|
155 |
best_model = random_forest_classifier_model
|
156 |
+
best_model.fit(x_train, y_train)
|
|
|
|
|
157 |
|
158 |
+
info["参数"] = best_model.get_params()
|
|
|
|
|
159 |
|
160 |
y_pred = best_model.predict(x_test)
|
161 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
162 |
+
container.set_y_pred(y_pred)
|
163 |
|
164 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
165 |
|
166 |
+
train_scores_mean = np.mean(train_scores, axis=1)
|
167 |
+
train_scores_std = np.std(train_scores, axis=1)
|
168 |
+
test_scores_mean = np.mean(test_scores, axis=1)
|
169 |
+
test_scores_std = np.std(test_scores, axis=1)
|
170 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
171 |
+
test_scores_std)
|
172 |
+
|
173 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
174 |
+
|
175 |
+
container.set_info(info)
|
176 |
+
container.set_status("trained")
|
177 |
+
container.set_model(best_model)
|
178 |
+
|
179 |
+
return container
|
180 |
+
|
181 |
+
|
182 |
+
class XgboostClassifierParams:
|
183 |
+
@classmethod
|
184 |
+
def get_params(cls):
|
185 |
+
return {
|
186 |
+
"n_estimators": [50, 100, 150],
|
187 |
+
"learning_rate": [0.01, 0.1, 0.2],
|
188 |
+
"max_depth": [3, 4, 5],
|
189 |
+
"min_child_weight": [1, 2, 3],
|
190 |
+
"gamma": [0, 0.1, 0.2],
|
191 |
+
"subsample": [0.5, 0.8, 0.9, 1.0],
|
192 |
+
"colsample_bytree": [0.8, 0.9, 1.0]
|
193 |
+
}
|
194 |
+
|
195 |
+
|
196 |
+
# xgboost分类
|
197 |
+
def xgboost_classifier(container: Container):
|
198 |
+
x_train = container.x_train
|
199 |
+
y_train = container.y_train
|
200 |
+
x_test = container.x_test
|
201 |
+
y_test = container.y_test
|
202 |
+
hyper_params_optimize = container.hyper_params_optimize
|
203 |
+
info = {}
|
204 |
+
|
205 |
+
xgboost_classifier_model = XGBClassifier(random_state=Config.RANDOM_STATE)
|
206 |
+
params = XgboostClassifierParams.get_params()
|
207 |
+
|
208 |
+
if hyper_params_optimize == "grid_search":
|
209 |
+
best_model = grid_search(params, xgboost_classifier_model, x_train, y_train)
|
210 |
+
elif hyper_params_optimize == "bayes_search":
|
211 |
+
best_model = bayes_search(params, xgboost_classifier_model, x_train, y_train)
|
212 |
+
else:
|
213 |
+
best_model = xgboost_classifier_model
|
214 |
+
best_model.fit(x_train, y_train)
|
215 |
+
|
216 |
+
info["参数"] = best_model.get_params()
|
217 |
+
|
218 |
+
y_pred = best_model.predict(x_test)
|
219 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
220 |
+
container.set_y_pred(y_pred)
|
221 |
+
|
222 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
223 |
|
224 |
train_scores_mean = np.mean(train_scores, axis=1)
|
225 |
train_scores_std = np.std(train_scores, axis=1)
|
226 |
test_scores_mean = np.mean(test_scores, axis=1)
|
227 |
test_scores_std = np.std(test_scores, axis=1)
|
228 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
229 |
+
test_scores_std)
|
230 |
|
231 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
232 |
|
233 |
+
container.set_info(info)
|
234 |
+
container.set_status("trained")
|
235 |
+
container.set_model(best_model)
|
236 |
|
237 |
+
return container
|
|
|
238 |
|
|
|
239 |
|
240 |
+
class LightGBMClassifierParams:
|
241 |
+
@classmethod
|
242 |
+
def get_params(cls):
|
243 |
+
return
|
244 |
|
245 |
|
246 |
+
# lightGBM分类
|
247 |
+
def lightGBM_classifier(container: Container):
|
248 |
+
x_train = container.x_train
|
249 |
+
y_train = container.y_train
|
250 |
+
x_test = container.x_test
|
251 |
+
y_test = container.y_test
|
252 |
+
hyper_params_optimize = container.hyper_params_optimize
|
253 |
info = {}
|
254 |
|
255 |
+
lightgbm_classifier_model = lightGBMClassifier
|
256 |
+
params = LightGBMClassifierParams.get_params()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
if hyper_params_optimize == "grid_search":
|
259 |
+
best_model = grid_search(params, lightgbm_classifier_model, x_train, y_train)
|
260 |
elif hyper_params_optimize == "bayes_search":
|
261 |
+
best_model = bayes_search(params, lightgbm_classifier_model, x_train, y_train)
|
262 |
else:
|
263 |
+
best_model = lightgbm_classifier_model
|
264 |
+
best_model.train(x_train, y_train)
|
|
|
|
|
265 |
|
266 |
+
info["参数"] = best_model.get_params()
|
|
|
|
|
267 |
|
268 |
y_pred = best_model.predict(x_test)
|
269 |
+
# y_pred = best_model.predict(x_test).reshape(-1, 1)
|
270 |
+
container.set_y_pred(y_pred)
|
271 |
|
272 |
+
train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
|
|
|
|
|
273 |
|
274 |
train_scores_mean = np.mean(train_scores, axis=1)
|
275 |
train_scores_std = np.std(train_scores, axis=1)
|
276 |
test_scores_mean = np.mean(test_scores, axis=1)
|
277 |
test_scores_std = np.std(test_scores, axis=1)
|
278 |
+
container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
|
279 |
+
test_scores_std)
|
280 |
|
281 |
+
info["指标"] = calculate_classification_metrics(y_pred, y_test)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
|
283 |
+
container.set_info(info)
|
284 |
+
container.set_status("trained")
|
285 |
+
container.set_model(best_model)
|
286 |
|
287 |
+
return container
|
288 |
|
289 |
|
290 |
|
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import copy
|
|
|
2 |
import os.path
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import matplotlib.pyplot as plt
|
@@ -7,71 +9,98 @@ from sklearn import preprocessing
|
|
7 |
from sklearn.model_selection import train_test_split
|
8 |
import pandas as pd
|
9 |
|
10 |
-
from analysis.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from static.process import *
|
12 |
from analysis.linear_model import *
|
|
|
|
|
|
|
|
|
13 |
from visualization.draw_learning_curve_total import draw_learning_curve_total
|
14 |
-
from static.
|
15 |
|
16 |
import warnings
|
17 |
|
18 |
warnings.filterwarnings("ignore")
|
19 |
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
class StaticValue:
|
74 |
-
max_num =
|
75 |
|
76 |
|
77 |
class FilePath:
|
@@ -80,23 +109,48 @@ class FilePath:
|
|
80 |
|
81 |
# [绘图]
|
82 |
display_dataset = "current_excel_data"
|
83 |
-
|
84 |
-
|
|
|
|
|
|
|
85 |
shap_beeswarm_plot = "shap_beeswarm_plot"
|
|
|
|
|
|
|
|
|
86 |
|
87 |
|
88 |
class MN: # ModelName
|
89 |
classification = "classification"
|
90 |
regression = "regression"
|
91 |
|
92 |
-
|
93 |
-
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
# [绘图]
|
97 |
-
|
98 |
-
|
|
|
|
|
99 |
shap_beeswarm = "shap_beeswarm"
|
|
|
|
|
|
|
|
|
100 |
|
101 |
|
102 |
class LN: # LabelName
|
@@ -119,11 +173,16 @@ class LN: # LabelName
|
|
119 |
standardize_data_button = "标准化 [可选]"
|
120 |
select_as_y_radio = "选择因变量 [必选]"
|
121 |
choose_assign_radio = "选择任务类型(同时会根据任务类型将第1列数据强制转换)[必选]"
|
122 |
-
linear_regression_model_radio = "选择线性回归的模型"
|
123 |
model_optimize_radio = "选择超参数优化方法"
|
124 |
model_train_button = "训练"
|
|
|
|
|
125 |
select_as_model_radio = "选择所需训练的模型"
|
126 |
|
|
|
|
|
|
|
|
|
127 |
title_name_textbox = "标题"
|
128 |
x_label_textbox = "x 轴名称"
|
129 |
y_label_textbox = "y 轴名称"
|
@@ -131,15 +190,41 @@ class LN: # LabelName
|
|
131 |
labels = ["图例 {}".format(i) for i in range(StaticValue.max_num)]
|
132 |
|
133 |
# [绘图]
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
|
145 |
def get_return_extra(is_visible, extra_gr_dict: dict = None):
|
@@ -190,10 +275,11 @@ def get_outputs():
|
|
190 |
standardize_data_checkboxgroup,
|
191 |
standardize_data_button,
|
192 |
select_as_y_radio,
|
193 |
-
linear_regression_model_radio,
|
194 |
model_optimize_radio,
|
195 |
model_train_button,
|
196 |
model_train_checkbox,
|
|
|
|
|
197 |
select_as_model_radio,
|
198 |
choose_assign_radio,
|
199 |
display_dataset,
|
@@ -203,12 +289,37 @@ def get_outputs():
|
|
203 |
x_label_textbox,
|
204 |
y_label_textbox,
|
205 |
|
|
|
|
|
|
|
|
|
206 |
# [绘图]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
learning_curve_checkboxgroup,
|
208 |
-
|
209 |
-
learning_curve_validation_button,
|
210 |
shap_beeswarm_radio,
|
|
|
211 |
shap_beeswarm_button,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
}
|
213 |
|
214 |
gr_set.update(set(colorpickers))
|
@@ -245,11 +356,10 @@ def get_return(is_visible, extra_gr_dict: dict = None):
|
|
245 |
|
246 |
select_as_model_radio: gr.Radio(Dataset.get_model_list(), visible=Dataset.check_before_train(), label=LN.select_as_model_radio),
|
247 |
model_optimize_radio: gr.Radio(Dataset.get_optimize_list(), visible=Dataset.check_before_train(), label=LN.model_optimize_radio),
|
248 |
-
|
249 |
-
linear_regression_model_radio: gr.Radio(Dataset.get_linear_regression_model_list(), visible=Dataset.get_linear_regression_mark(), label=LN.linear_regression_model_radio),
|
250 |
-
|
251 |
model_train_button: gr.Button(LN.model_train_button, visible=Dataset.check_before_train()),
|
252 |
model_train_checkbox: gr.Checkbox(Dataset.get_model_container_status(), visible=Dataset.check_select_model(), label=Dataset.get_model_label()),
|
|
|
|
|
253 |
|
254 |
draw_plot: gr.Plot(visible=False),
|
255 |
draw_file: gr.File(visible=False),
|
@@ -257,12 +367,38 @@ def get_return(is_visible, extra_gr_dict: dict = None):
|
|
257 |
x_label_textbox: gr.Textbox(visible=False),
|
258 |
y_label_textbox: gr.Textbox(visible=False),
|
259 |
|
|
|
|
|
|
|
|
|
260 |
# [绘图]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
learning_curve_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.learning_curve_checkboxgroup),
|
262 |
-
|
263 |
-
learning_curve_validation_button: gr.Button(LN.learning_curve_validation_button, visible=Dataset.check_before_train()),
|
264 |
shap_beeswarm_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_radio),
|
|
|
265 |
shap_beeswarm_button: gr.Button(LN.shap_beeswarm_button, visible=Dataset.check_before_train()),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
}
|
267 |
|
268 |
gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
|
@@ -295,10 +431,11 @@ def get_return(is_visible, extra_gr_dict: dict = None):
|
|
295 |
standardize_data_checkboxgroup: gr.Checkboxgroup(visible=False),
|
296 |
standardize_data_button: gr.Button(visible=False),
|
297 |
select_as_y_radio: gr.Radio(visible=False),
|
298 |
-
linear_regression_model_radio: gr.Radio(visible=False),
|
299 |
model_optimize_radio: gr.Radio(visible=False),
|
300 |
model_train_button: gr.Button(visible=False),
|
301 |
model_train_checkbox: gr.Checkbox(visible=False),
|
|
|
|
|
302 |
select_as_model_radio: gr.Radio(visible=False),
|
303 |
choose_assign_radio: gr.Radio(visible=False),
|
304 |
|
@@ -308,12 +445,37 @@ def get_return(is_visible, extra_gr_dict: dict = None):
|
|
308 |
x_label_textbox: gr.Textbox(visible=False),
|
309 |
y_label_textbox: gr.Textbox(visible=False),
|
310 |
|
|
|
|
|
|
|
|
|
311 |
# [绘图]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
learning_curve_checkboxgroup: gr.Checkboxgroup(visible=False),
|
313 |
-
|
314 |
-
learning_curve_validation_button: gr.Button(visible=False),
|
315 |
shap_beeswarm_radio: gr.Radio(visible=False),
|
|
|
316 |
shap_beeswarm_button: gr.Button(visible=False),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
317 |
}
|
318 |
|
319 |
gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
|
@@ -336,17 +498,49 @@ class Dataset:
|
|
336 |
cur_model = ""
|
337 |
select_y_mark = False
|
338 |
|
|
|
|
|
|
|
|
|
|
|
339 |
container_dict = {
|
|
|
340 |
MN.linear_regression: Container(),
|
341 |
MN.polynomial_regression: Container(),
|
342 |
MN.logistic_regression: Container(),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
}
|
344 |
|
345 |
visualize = ""
|
346 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
@classmethod
|
348 |
def get_dataset_list(cls):
|
349 |
-
return ["Iris Dataset", "Wine Dataset", "Breast Cancer Dataset", "
|
350 |
|
351 |
@classmethod
|
352 |
def get_col_list(cls):
|
@@ -545,8 +739,7 @@ class Dataset:
|
|
545 |
|
546 |
for i, col in enumerate(cls.data.columns.values):
|
547 |
if i == 0:
|
548 |
-
if not (all(isinstance(x, str) for x in cls.data.iloc[:, 0]) or all(
|
549 |
-
isinstance(x, float) for x in cls.data.iloc[:, 0])):
|
550 |
return False
|
551 |
else:
|
552 |
if cls.data[col].dtype.name != "float64":
|
@@ -576,12 +769,20 @@ class Dataset:
|
|
576 |
def get_linear_regression_model_list(cls):
|
577 |
return ["线性回归", "Lasso回归", "Ridge回归", "弹性网络回归"]
|
578 |
|
|
|
|
|
|
|
|
|
579 |
@classmethod
|
580 |
def get_linear_regression_model_name_mapping(cls):
|
581 |
return dict(zip(cls.get_linear_regression_model_list(), ["LinearRegression", "Lasso", "Ridge", "ElasticNet"]))
|
582 |
|
583 |
@classmethod
|
584 |
-
def
|
|
|
|
|
|
|
|
|
585 |
optimize = cls.get_optimize_name_mapping()[optimize]
|
586 |
|
587 |
data_copy = cls.data
|
@@ -596,12 +797,37 @@ class Dataset:
|
|
596 |
)
|
597 |
container = Container(x_train, y_train, x_test, y_test, optimize)
|
598 |
|
|
|
599 |
if cls.cur_model == MN.linear_regression:
|
600 |
-
|
|
|
601 |
elif cls.cur_model == MN.polynomial_regression:
|
602 |
container = polynomial_regression(container)
|
603 |
elif cls.cur_model == MN.logistic_regression:
|
604 |
container = logistic_regression(container)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
605 |
|
606 |
cls.container_dict[cls.cur_model] = container
|
607 |
|
@@ -621,9 +847,11 @@ class Dataset:
|
|
621 |
def get_model_name(cls):
|
622 |
return [x for x in cls.container_dict.keys()]
|
623 |
|
|
|
624 |
@classmethod
|
625 |
def get_model_chinese_name(cls):
|
626 |
-
return ["线性回归", "多项式回归", "逻辑斯谛分类"
|
|
|
627 |
|
628 |
@classmethod
|
629 |
def get_model_name_mapping(cls):
|
@@ -646,46 +874,237 @@ class Dataset:
|
|
646 |
@classmethod
|
647 |
def draw_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
648 |
# [绘图]
|
649 |
-
if cls.visualize == MN.
|
650 |
-
return cls.
|
651 |
-
elif cls.visualize == MN.learning_curve_validation:
|
652 |
-
return cls.draw_learning_curve_validation_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
653 |
elif cls.visualize == MN.shap_beeswarm:
|
654 |
return cls.draw_shap_beeswarm_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
|
656 |
@classmethod
|
657 |
-
def
|
658 |
-
|
|
|
|
|
|
|
659 |
|
660 |
-
|
661 |
-
|
662 |
-
|
|
|
|
|
663 |
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
668 |
cur_name = "" if is_default else name
|
669 |
|
670 |
paint_object = PaintObject()
|
671 |
paint_object.set_color_cur_list(color_cur_list)
|
672 |
-
paint_object.set_label_cur_list(label_cur_list)
|
673 |
paint_object.set_x_cur_label(x_cur_label)
|
674 |
paint_object.set_y_cur_label(y_cur_label)
|
675 |
paint_object.set_name(cur_name)
|
676 |
|
677 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
678 |
|
679 |
@classmethod
|
680 |
-
def
|
681 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
682 |
|
683 |
for model_name in model_list:
|
684 |
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
685 |
-
|
686 |
|
687 |
color_cur_list = Config.COLORS if is_default else color_list
|
688 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
689 |
x_cur_label = "Train Sizes" if is_default else x_label
|
690 |
y_cur_label = "Accuracy" if is_default else y_label
|
691 |
cur_name = "" if is_default else name
|
@@ -697,10 +1116,15 @@ class Dataset:
|
|
697 |
paint_object.set_y_cur_label(y_cur_label)
|
698 |
paint_object.set_name(cur_name)
|
699 |
|
700 |
-
|
|
|
|
|
|
|
701 |
|
702 |
@classmethod
|
703 |
-
def draw_shap_beeswarm_plot(cls,
|
|
|
|
|
704 |
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
705 |
container = cls.container_dict[model_name]
|
706 |
|
@@ -717,17 +1141,65 @@ class Dataset:
|
|
717 |
# paint_object.set_y_cur_label(y_cur_label)
|
718 |
paint_object.set_name(cur_name)
|
719 |
|
720 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
721 |
|
722 |
@classmethod
|
723 |
def get_file(cls):
|
724 |
# [绘图]
|
725 |
-
if cls.visualize == MN.
|
726 |
-
return FilePath.png_base.format(FilePath.
|
727 |
-
elif cls.visualize == MN.learning_curve_validation:
|
728 |
-
return FilePath.png_base.format(FilePath.learning_curve_validation_plot)
|
729 |
elif cls.visualize == MN.shap_beeswarm:
|
730 |
return FilePath.png_base.format(FilePath.shap_beeswarm_plot)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
731 |
|
732 |
@classmethod
|
733 |
def check_file(cls):
|
@@ -757,6 +1229,10 @@ class Dataset:
|
|
757 |
def get_linear_regression_mark(cls):
|
758 |
return True if cls.cur_model == MN.linear_regression else False
|
759 |
|
|
|
|
|
|
|
|
|
760 |
@classmethod
|
761 |
def get_assign_list(cls):
|
762 |
return ["分类", "回归"]
|
@@ -803,6 +1279,99 @@ class Dataset:
|
|
803 |
|
804 |
return true_list + [gr.Textbox(visible=False)] * (StaticValue.max_num - cur_num)
|
805 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
806 |
|
807 |
def choose_assign(assign: str):
|
808 |
Dataset.choose_assign(assign)
|
@@ -817,29 +1386,94 @@ def select_as_model(model_name: str):
|
|
817 |
|
818 |
|
819 |
# [绘图]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
820 |
def shap_beeswarm_first_draw_plot(*inputs):
|
821 |
Dataset.visualize = MN.shap_beeswarm
|
822 |
return first_draw_plot(inputs)
|
823 |
|
824 |
|
825 |
-
def
|
826 |
-
Dataset.visualize = MN.
|
827 |
return first_draw_plot(inputs)
|
828 |
|
829 |
|
830 |
-
def
|
831 |
-
|
832 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
|
834 |
|
835 |
def first_draw_plot(inputs):
|
836 |
-
select_model =
|
|
|
837 |
x_label = ""
|
838 |
y_label = ""
|
839 |
name = ""
|
840 |
color_list = []
|
841 |
label_list = []
|
842 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
843 |
cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
|
844 |
|
845 |
return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
|
@@ -857,16 +1491,37 @@ def non_first_draw_plot(inputs):
|
|
857 |
label_list = list(inputs[StaticValue.max_num+3: 2*StaticValue.max_num+3])
|
858 |
start_index = 2*StaticValue.max_num+3
|
859 |
|
|
|
|
|
860 |
# 绘图
|
861 |
-
if Dataset.visualize == MN.
|
862 |
-
select_model
|
863 |
-
|
864 |
-
select_model = inputs[start_index]
|
865 |
elif Dataset.visualize == MN.shap_beeswarm:
|
866 |
-
select_model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
867 |
|
868 |
else:
|
869 |
-
select_model
|
870 |
|
871 |
cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, False)
|
872 |
|
@@ -877,15 +1532,34 @@ def first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object):
|
|
877 |
extra_gr_dict = {}
|
878 |
|
879 |
# [绘图]
|
880 |
-
if Dataset.visualize == MN.
|
881 |
-
cur_plt.savefig(FilePath.png_base.format(FilePath.
|
882 |
-
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.
|
883 |
-
elif Dataset.visualize == MN.learning_curve_validation:
|
884 |
-
cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_validation_plot), dpi=300)
|
885 |
-
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_validation_plot)})
|
886 |
elif Dataset.visualize == MN.shap_beeswarm:
|
887 |
cur_plt.savefig(FilePath.png_base.format(FilePath.shap_beeswarm_plot), dpi=300)
|
888 |
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.shap_beeswarm_plot)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
889 |
|
890 |
extra_gr_dict.update(dict(zip(colorpickers, Dataset.colorpickers_change(paint_object))))
|
891 |
extra_gr_dict.update(dict(zip(color_textboxs, Dataset.color_textboxs_change(paint_object))))
|
@@ -897,8 +1571,15 @@ def first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object):
|
|
897 |
return get_return_extra(True, extra_gr_dict)
|
898 |
|
899 |
|
900 |
-
|
901 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
902 |
|
903 |
return get_return(True)
|
904 |
|
@@ -924,9 +1605,7 @@ def change_data_type_to_float():
|
|
924 |
def encode_label(col_list: list):
|
925 |
Dataset.encode_label(col_list)
|
926 |
|
927 |
-
return get_return(True, {
|
928 |
-
display_encode_label_dataframe: gr.Dataframe(Dataset.get_str2int_mappings_df(), type="pandas", visible=True,
|
929 |
-
label=LN.display_encode_label_dataframe)})
|
930 |
|
931 |
|
932 |
def del_duplicate():
|
@@ -981,7 +1660,7 @@ def choose_custom_dataset(file: str):
|
|
981 |
return get_return(True, {choose_custom_dataset_file: gr.File(Dataset.file, visible=True)})
|
982 |
|
983 |
|
984 |
-
with gr.Blocks() as demo:
|
985 |
'''
|
986 |
组件
|
987 |
'''
|
@@ -1031,24 +1710,67 @@ with gr.Blocks() as demo:
|
|
1031 |
|
1032 |
# 数据模型
|
1033 |
with gr.Accordion("数据模型"):
|
|
|
1034 |
select_as_model_radio = gr.Radio(visible=False)
|
1035 |
linear_regression_model_radio = gr.Radio(visible=False)
|
|
|
1036 |
model_optimize_radio = gr.Radio(visible=False)
|
1037 |
model_train_button = gr.Button(visible=False)
|
1038 |
model_train_checkbox = gr.Checkbox(visible=False)
|
|
|
|
|
1039 |
|
1040 |
# 可视化
|
1041 |
with gr.Accordion("数据可视化"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1042 |
with gr.Tab("学习曲线图"):
|
1043 |
learning_curve_checkboxgroup = gr.Checkboxgroup(visible=False)
|
1044 |
-
|
1045 |
-
|
1046 |
-
|
|
|
|
|
1047 |
|
1048 |
-
with gr.Tab("
|
1049 |
shap_beeswarm_radio = gr.Radio(visible=False)
|
|
|
1050 |
shap_beeswarm_button = gr.Button(visible=False)
|
1051 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1052 |
legend_labels_textboxs = []
|
1053 |
with gr.Accordion("图例"):
|
1054 |
with gr.Row():
|
@@ -1077,6 +1799,9 @@ with gr.Blocks() as demo:
|
|
1077 |
draw_plot = gr.Plot(visible=False)
|
1078 |
draw_file = gr.File(visible=False)
|
1079 |
|
|
|
|
|
|
|
1080 |
'''
|
1081 |
监听事件
|
1082 |
'''
|
@@ -1108,26 +1833,53 @@ with gr.Blocks() as demo:
|
|
1108 |
|
1109 |
# 数据模型
|
1110 |
select_as_model_radio.change(fn=select_as_model, inputs=[select_as_model_radio], outputs=get_outputs())
|
1111 |
-
|
|
|
|
|
|
|
|
|
1112 |
|
1113 |
# 可视化
|
1114 |
-
|
1115 |
-
|
1116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
1117 |
|
1118 |
title_name_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1119 |
-
|
|
|
|
|
|
|
1120 |
x_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1121 |
-
|
|
|
|
|
|
|
1122 |
y_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1123 |
-
|
|
|
|
|
|
|
1124 |
for i in range(StaticValue.max_num):
|
1125 |
colorpickers[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1126 |
-
|
|
|
|
|
|
|
1127 |
color_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + color_textboxs + legend_labels_textboxs
|
1128 |
-
|
|
|
|
|
|
|
1129 |
legend_labels_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1130 |
-
|
|
|
|
|
1131 |
|
1132 |
if __name__ == "__main__":
|
1133 |
demo.launch()
|
|
|
1 |
import copy
|
2 |
+
import math
|
3 |
import os.path
|
4 |
+
import random
|
5 |
|
6 |
import gradio as gr
|
7 |
import matplotlib.pyplot as plt
|
|
|
9 |
from sklearn.model_selection import train_test_split
|
10 |
import pandas as pd
|
11 |
|
12 |
+
from analysis.bayes_model import *
|
13 |
+
from analysis.distance_model import *
|
14 |
+
from analysis.gradient_model import *
|
15 |
+
from analysis.kernel_model import *
|
16 |
+
from analysis.shap_model import *
|
17 |
+
from analysis.tree_model import *
|
18 |
+
from metrics.calculate_classification_metrics import ClassificationMetrics
|
19 |
+
from metrics.calculate_regression_metrics import RegressionMetrics
|
20 |
from static.process import *
|
21 |
from analysis.linear_model import *
|
22 |
+
from visualization.draw_boxplot import draw_boxplot
|
23 |
+
from visualization.draw_data_fit_total import draw_data_fit_total
|
24 |
+
from visualization.draw_heat_map import draw_heat_map
|
25 |
+
from visualization.draw_histogram import draw_histogram
|
26 |
from visualization.draw_learning_curve_total import draw_learning_curve_total
|
27 |
+
from static.new_class import *
|
28 |
|
29 |
import warnings
|
30 |
|
31 |
warnings.filterwarnings("ignore")
|
32 |
|
33 |
|
34 |
+
# [模型]
|
35 |
+
class ChooseModelMetrics:
|
36 |
+
@classmethod
|
37 |
+
def choose(cls, cur_model):
|
38 |
+
if cur_model == MN.linear_regression:
|
39 |
+
return RegressionMetrics.get_metrics()
|
40 |
+
elif cur_model == MN.polynomial_regression:
|
41 |
+
return RegressionMetrics.get_metrics()
|
42 |
+
elif cur_model == MN.logistic_regression:
|
43 |
+
return ClassificationMetrics.get_metrics()
|
44 |
+
elif cur_model == MN.decision_tree_classifier:
|
45 |
+
return ClassificationMetrics.get_metrics()
|
46 |
+
elif cur_model == MN.random_forest_classifier:
|
47 |
+
return ClassificationMetrics.get_metrics()
|
48 |
+
elif cur_model == MN.random_forest_regression:
|
49 |
+
return RegressionMetrics.get_metrics()
|
50 |
+
elif cur_model == MN.xgboost_classifier:
|
51 |
+
return ClassificationMetrics.get_metrics()
|
52 |
+
elif cur_model == MN.lightGBM_classifier:
|
53 |
+
return ClassificationMetrics.get_metrics()
|
54 |
+
elif cur_model == MN.gradient_boosting_regression:
|
55 |
+
return RegressionMetrics.get_metrics()
|
56 |
+
elif cur_model == MN.svm_classifier:
|
57 |
+
return ClassificationMetrics.get_metrics()
|
58 |
+
elif cur_model == MN.svm_regression:
|
59 |
+
return RegressionMetrics.get_metrics()
|
60 |
+
elif cur_model == MN.knn_classifier:
|
61 |
+
return ClassificationMetrics.get_metrics()
|
62 |
+
elif cur_model == MN.knn_regression:
|
63 |
+
return RegressionMetrics.get_metrics()
|
64 |
+
elif cur_model == MN.naive_bayes_classification:
|
65 |
+
return ClassificationMetrics.get_metrics()
|
66 |
+
|
67 |
+
|
68 |
+
# [模型]
|
69 |
+
class ChooseModelParams:
|
70 |
+
@classmethod
|
71 |
+
def choose(cls, cur_model):
|
72 |
+
if cur_model == MN.linear_regression:
|
73 |
+
return LinearRegressionParams.get_params(Dataset.linear_regression_model_type)
|
74 |
+
elif cur_model == MN.polynomial_regression:
|
75 |
+
return PolynomialRegressionParams.get_params()
|
76 |
+
elif cur_model == MN.logistic_regression:
|
77 |
+
return LogisticRegressionParams.get_params()
|
78 |
+
elif cur_model == MN.decision_tree_classifier:
|
79 |
+
return DecisionTreeClassifierParams.get_params()
|
80 |
+
elif cur_model == MN.random_forest_classifier:
|
81 |
+
return RandomForestClassifierParams.get_params()
|
82 |
+
elif cur_model == MN.random_forest_regression:
|
83 |
+
return RandomForestRegressionParams.get_params()
|
84 |
+
elif cur_model == MN.xgboost_classifier:
|
85 |
+
return XgboostClassifierParams.get_params()
|
86 |
+
elif cur_model == MN.lightGBM_classifier:
|
87 |
+
return LightGBMClassifierParams.get_params()
|
88 |
+
elif cur_model == MN.gradient_boosting_regression:
|
89 |
+
return GradientBoostingParams.get_params()
|
90 |
+
elif cur_model == MN.svm_classifier:
|
91 |
+
return SVMClassifierParams.get_params()
|
92 |
+
elif cur_model == MN.svm_regression:
|
93 |
+
return SVMRegressionParams.get_params()
|
94 |
+
elif cur_model == MN.knn_classifier:
|
95 |
+
return KNNClassifierParams.get_params()
|
96 |
+
elif cur_model == MN.knn_regression:
|
97 |
+
return KNNRegressionParams.get_params()
|
98 |
+
elif cur_model == MN.naive_bayes_classification:
|
99 |
+
return NaiveBayesClassifierParams.get_params(Dataset.naive_bayes_classifier_model_type)
|
100 |
|
101 |
|
102 |
class StaticValue:
|
103 |
+
max_num = 20
|
104 |
|
105 |
|
106 |
class FilePath:
|
|
|
109 |
|
110 |
# [绘图]
|
111 |
display_dataset = "current_excel_data"
|
112 |
+
|
113 |
+
data_distribution_plot = "data_distribution_plot"
|
114 |
+
descriptive_indicators_plot = "descriptive_indicators_plot"
|
115 |
+
heatmap_plot = "heatmap_plot"
|
116 |
+
learning_curve_plot = "learning_curve_plot"
|
117 |
shap_beeswarm_plot = "shap_beeswarm_plot"
|
118 |
+
data_fit_plot = "data_fit_plot"
|
119 |
+
waterfall_plot = "waterfall_plot"
|
120 |
+
force_plot = "force_plot"
|
121 |
+
dependence_plot = "dependence_plot"
|
122 |
|
123 |
|
124 |
class MN: # ModelName
|
125 |
classification = "classification"
|
126 |
regression = "regression"
|
127 |
|
128 |
+
# [模型]
|
129 |
+
linear_regression = "linear regressor"
|
130 |
+
polynomial_regression = "polynomial regressor"
|
131 |
+
logistic_regression = "logistic regressor"
|
132 |
+
decision_tree_classifier = "decision tree classifier"
|
133 |
+
random_forest_classifier = "random forest classifier"
|
134 |
+
random_forest_regression = "random forest regressor"
|
135 |
+
xgboost_classifier = "xgboost classifier"
|
136 |
+
lightGBM_classifier = "lightGBM classifier"
|
137 |
+
gradient_boosting_regression = "gradient boosting regressor"
|
138 |
+
svm_classifier = "svm classifier"
|
139 |
+
svm_regression = "svm regressor"
|
140 |
+
knn_classifier = "knn classifier"
|
141 |
+
knn_regression = "knn regressor"
|
142 |
+
naive_bayes_classification = "naive bayes classification"
|
143 |
|
144 |
# [绘图]
|
145 |
+
data_distribution = "data_distribution"
|
146 |
+
descriptive_indicators = "descriptive_indicators"
|
147 |
+
heatmap = "heatmap"
|
148 |
+
learning_curve = "learning_curve"
|
149 |
shap_beeswarm = "shap_beeswarm"
|
150 |
+
data_fit = "data_fit"
|
151 |
+
waterfall = "waterfall"
|
152 |
+
force = "force"
|
153 |
+
dependence = "dependence"
|
154 |
|
155 |
|
156 |
class LN: # LabelName
|
|
|
173 |
standardize_data_button = "标准化 [可选]"
|
174 |
select_as_y_radio = "选择因变量 [必选]"
|
175 |
choose_assign_radio = "选择任务类型(同时会根据任务类型将第1列数据强制转换)[必选]"
|
|
|
176 |
model_optimize_radio = "选择超参数优化方法"
|
177 |
model_train_button = "训练"
|
178 |
+
model_train_params_dataframe = "训练后的模型参数"
|
179 |
+
model_train_metrics_dataframe = "训练后的模型指标"
|
180 |
select_as_model_radio = "选择所需训练的模型"
|
181 |
|
182 |
+
# [模型]
|
183 |
+
linear_regression_model_radio = "选择线性回归的模型"
|
184 |
+
naive_bayes_classification_model_radio = "选择朴素贝叶斯分类的模型"
|
185 |
+
|
186 |
title_name_textbox = "标题"
|
187 |
x_label_textbox = "x 轴名称"
|
188 |
y_label_textbox = "y 轴名称"
|
|
|
190 |
labels = ["图例 {}".format(i) for i in range(StaticValue.max_num)]
|
191 |
|
192 |
# [绘图]
|
193 |
+
heatmap_is_rotate = "x轴标签是否旋转"
|
194 |
+
heatmap_checkboxgroup = "选择所需绘制系数热力图的列"
|
195 |
+
heatmap_button = "绘制系数热力图"
|
196 |
+
data_distribution_radio = "选择所需绘制数据分布图的列"
|
197 |
+
data_distribution_is_rotate = "x轴标签是否旋转"
|
198 |
+
data_distribution_button = "绘制数据分布图"
|
199 |
+
descriptive_indicators_checkboxgroup = "选择所需绘制箱线统计图的列"
|
200 |
+
descriptive_indicators_is_rotate = "x轴标签是否旋转"
|
201 |
+
descriptive_indicators_button = "绘制箱线统计图"
|
202 |
+
learning_curve_checkboxgroup = "选择所需绘制学习曲线图的模型"
|
203 |
+
learning_curve_button = "绘制学习曲线图"
|
204 |
+
shap_beeswarm_radio = "选择所需绘制特征蜂群图的模型"
|
205 |
+
shap_beeswarm_type = "选择图像类型"
|
206 |
+
shap_beeswarm_button = "绘制特征蜂群图"
|
207 |
+
data_fit_checkboxgroup = "选择所需绘制数据拟合图的模型"
|
208 |
+
data_fit_button = "绘制数据拟合图"
|
209 |
+
waterfall_radio = "选择所需绘制特征瀑布图的模型"
|
210 |
+
waterfall_number = "输入相关特征的变量索引"
|
211 |
+
waterfall_button = "绘制特征瀑布图"
|
212 |
+
force_radio = "选择所需绘制特征力图的模型"
|
213 |
+
force_number = "输入相关特征的变量索引"
|
214 |
+
force_button = "绘制特征力图"
|
215 |
+
dependence_radio = "选择所需绘制特征依赖图的模型"
|
216 |
+
dependence_col = "选择相应的列"
|
217 |
+
dependence_button = "绘制特征依赖图"
|
218 |
+
|
219 |
+
data_distribution_plot = "数据分布图"
|
220 |
+
descriptive_indicators_plot = "箱线统计图"
|
221 |
+
heatmap_plot = "系数热力图"
|
222 |
+
learning_curve_plot = "学习曲线图"
|
223 |
+
shap_beeswarm_plot = "特征蜂群图"
|
224 |
+
data_fit_plot = "数据拟合图"
|
225 |
+
waterfall_plot = "特征瀑布图"
|
226 |
+
force_plot = "特征力图"
|
227 |
+
dependence_plot = "特征依赖图"
|
228 |
|
229 |
|
230 |
def get_return_extra(is_visible, extra_gr_dict: dict = None):
|
|
|
275 |
standardize_data_checkboxgroup,
|
276 |
standardize_data_button,
|
277 |
select_as_y_radio,
|
|
|
278 |
model_optimize_radio,
|
279 |
model_train_button,
|
280 |
model_train_checkbox,
|
281 |
+
model_train_params_dataframe,
|
282 |
+
model_train_metrics_dataframe,
|
283 |
select_as_model_radio,
|
284 |
choose_assign_radio,
|
285 |
display_dataset,
|
|
|
289 |
x_label_textbox,
|
290 |
y_label_textbox,
|
291 |
|
292 |
+
# [模型]
|
293 |
+
linear_regression_model_radio,
|
294 |
+
naive_bayes_classification_model_radio,
|
295 |
+
|
296 |
# [绘图]
|
297 |
+
heatmap_is_rotate,
|
298 |
+
heatmap_checkboxgroup,
|
299 |
+
heatmap_button,
|
300 |
+
data_distribution_radio,
|
301 |
+
data_distribution_is_rotate,
|
302 |
+
data_distribution_button,
|
303 |
+
descriptive_indicators_checkboxgroup,
|
304 |
+
descriptive_indicators_is_rotate,
|
305 |
+
descriptive_indicators_dataframe,
|
306 |
+
descriptive_indicators_button,
|
307 |
learning_curve_checkboxgroup,
|
308 |
+
learning_curve_button,
|
|
|
309 |
shap_beeswarm_radio,
|
310 |
+
shap_beeswarm_type,
|
311 |
shap_beeswarm_button,
|
312 |
+
data_fit_checkboxgroup,
|
313 |
+
data_fit_button,
|
314 |
+
waterfall_radio,
|
315 |
+
waterfall_number,
|
316 |
+
waterfall_button,
|
317 |
+
force_radio,
|
318 |
+
force_number,
|
319 |
+
force_button,
|
320 |
+
dependence_radio,
|
321 |
+
dependence_col,
|
322 |
+
dependence_button,
|
323 |
}
|
324 |
|
325 |
gr_set.update(set(colorpickers))
|
|
|
356 |
|
357 |
select_as_model_radio: gr.Radio(Dataset.get_model_list(), visible=Dataset.check_before_train(), label=LN.select_as_model_radio),
|
358 |
model_optimize_radio: gr.Radio(Dataset.get_optimize_list(), visible=Dataset.check_before_train(), label=LN.model_optimize_radio),
|
|
|
|
|
|
|
359 |
model_train_button: gr.Button(LN.model_train_button, visible=Dataset.check_before_train()),
|
360 |
model_train_checkbox: gr.Checkbox(Dataset.get_model_container_status(), visible=Dataset.check_select_model(), label=Dataset.get_model_label()),
|
361 |
+
model_train_params_dataframe: gr.Dataframe(Dataset.get_model_train_params_dataframe(), type="pandas", visible=Dataset.get_model_container_status()),
|
362 |
+
model_train_metrics_dataframe: gr.Dataframe(Dataset.get_model_train_metrics_dataframe(), type="pandas", visible=Dataset.get_model_container_status()),
|
363 |
|
364 |
draw_plot: gr.Plot(visible=False),
|
365 |
draw_file: gr.File(visible=False),
|
|
|
367 |
x_label_textbox: gr.Textbox(visible=False),
|
368 |
y_label_textbox: gr.Textbox(visible=False),
|
369 |
|
370 |
+
# [模型]
|
371 |
+
linear_regression_model_radio: gr.Radio(Dataset.get_linear_regression_model_list(), visible=Dataset.get_linear_regression_mark(), label=LN.linear_regression_model_radio),
|
372 |
+
naive_bayes_classification_model_radio: gr.Radio(Dataset.get_naive_bayes_classifier_model_list(), visible=Dataset.get_naive_bayes_classifier_mark(), label=LN.naive_bayes_classification_model_radio),
|
373 |
+
|
374 |
# [绘图]
|
375 |
+
heatmap_checkboxgroup: gr.Checkboxgroup(Dataset.get_float_col_list(), visible=True, label=LN.heatmap_checkboxgroup),
|
376 |
+
heatmap_is_rotate: gr.Checkbox(visible=True, label=LN.heatmap_is_rotate),
|
377 |
+
heatmap_button: gr.Button(LN.heatmap_button, visible=True),
|
378 |
+
descriptive_indicators_checkboxgroup: gr.Checkboxgroup(Dataset.get_float_col_list(), visible=True, label=LN.descriptive_indicators_checkboxgroup),
|
379 |
+
data_distribution_radio: gr.Radio(Dataset.get_str_col_list(), visible=True, label=LN.data_distribution_radio),
|
380 |
+
data_distribution_is_rotate: gr.Checkbox(visible=True, label=LN.data_distribution_is_rotate),
|
381 |
+
data_distribution_button: gr.Button(LN.data_distribution_button, visible=True),
|
382 |
+
descriptive_indicators_is_rotate: gr.Checkbox(visible=True, label=LN.descriptive_indicators_is_rotate),
|
383 |
+
descriptive_indicators_dataframe: gr.Dataframe(Dataset.get_descriptive_indicators_df(), type="pandas", visible=Dataset.check_descriptive_indicators_df()),
|
384 |
+
descriptive_indicators_button: gr.Button(LN.descriptive_indicators_button, visible=True),
|
385 |
learning_curve_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.learning_curve_checkboxgroup),
|
386 |
+
learning_curve_button: gr.Button(LN.learning_curve_button, visible=Dataset.check_before_train()),
|
|
|
387 |
shap_beeswarm_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_radio),
|
388 |
+
shap_beeswarm_type: gr.Radio(Dataset.get_shap_beeswarm_plot_type(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_type),
|
389 |
shap_beeswarm_button: gr.Button(LN.shap_beeswarm_button, visible=Dataset.check_before_train()),
|
390 |
+
data_fit_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.data_fit_checkboxgroup),
|
391 |
+
data_fit_button: gr.Button(LN.data_fit_button, visible=Dataset.check_before_train()),
|
392 |
+
waterfall_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.waterfall_radio),
|
393 |
+
waterfall_number: gr.Slider(0, Dataset.get_total_row_num(), value=0, step=1, visible=Dataset.check_before_train(), label=LN.waterfall_number),
|
394 |
+
waterfall_button: gr.Button(LN.waterfall_button, visible=Dataset.check_before_train()),
|
395 |
+
force_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.force_radio),
|
396 |
+
force_number: gr.Slider(0, Dataset.get_total_row_num(), value=0, step=1, visible=Dataset.check_before_train(), label=LN.force_number),
|
397 |
+
force_button: gr.Button(LN.force_button, visible=Dataset.check_before_train()),
|
398 |
+
dependence_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.dependence_radio),
|
399 |
+
dependence_col: gr.Radio(Dataset.get_col_list(), visible=Dataset.check_before_train(), label=LN.dependence_col),
|
400 |
+
dependence_button: gr.Button(LN.dependence_button, visible=Dataset.check_before_train()),
|
401 |
+
|
402 |
}
|
403 |
|
404 |
gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
|
|
|
431 |
standardize_data_checkboxgroup: gr.Checkboxgroup(visible=False),
|
432 |
standardize_data_button: gr.Button(visible=False),
|
433 |
select_as_y_radio: gr.Radio(visible=False),
|
|
|
434 |
model_optimize_radio: gr.Radio(visible=False),
|
435 |
model_train_button: gr.Button(visible=False),
|
436 |
model_train_checkbox: gr.Checkbox(visible=False),
|
437 |
+
model_train_metrics_dataframe: gr.Dataframe(visible=False),
|
438 |
+
model_train_params_dataframe: gr.Dataframe(visible=False),
|
439 |
select_as_model_radio: gr.Radio(visible=False),
|
440 |
choose_assign_radio: gr.Radio(visible=False),
|
441 |
|
|
|
445 |
x_label_textbox: gr.Textbox(visible=False),
|
446 |
y_label_textbox: gr.Textbox(visible=False),
|
447 |
|
448 |
+
# [模型]
|
449 |
+
linear_regression_model_radio: gr.Radio(visible=False),
|
450 |
+
naive_bayes_classification_model_radio: gr.Radio(visible=False),
|
451 |
+
|
452 |
# [绘图]
|
453 |
+
heatmap_checkboxgroup: gr.Checkboxgroup(visible=False),
|
454 |
+
heatmap_is_rotate: gr.Checkbox(visible=False),
|
455 |
+
heatmap_button: gr.Button(visible=False),
|
456 |
+
data_distribution_radio: gr.Radio(visible=False),
|
457 |
+
data_distribution_is_rotate: gr.Checkbox(visible=False),
|
458 |
+
data_distribution_button: gr.Button(visible=False),
|
459 |
+
descriptive_indicators_checkboxgroup: gr.Checkboxgroup(visible=False),
|
460 |
+
descriptive_indicators_is_rotate: gr.Checkbox(visible=False),
|
461 |
+
descriptive_indicators_dataframe: gr.Dataframe(visible=False),
|
462 |
+
descriptive_indicators_button: gr.Button(visible=False),
|
463 |
learning_curve_checkboxgroup: gr.Checkboxgroup(visible=False),
|
464 |
+
learning_curve_button: gr.Button(visible=False),
|
|
|
465 |
shap_beeswarm_radio: gr.Radio(visible=False),
|
466 |
+
shap_beeswarm_type: gr.Radio(visible=False),
|
467 |
shap_beeswarm_button: gr.Button(visible=False),
|
468 |
+
data_fit_checkboxgroup: gr.Checkboxgroup(visible=False),
|
469 |
+
data_fit_button: gr.Button(visible=False),
|
470 |
+
waterfall_radio: gr.Radio(visible=False),
|
471 |
+
waterfall_number: gr.Slider(visible=False),
|
472 |
+
waterfall_button: gr.Button(visible=False),
|
473 |
+
force_radio: gr.Radio(visible=False),
|
474 |
+
force_number: gr.Slider(visible=False),
|
475 |
+
force_button: gr.Button(visible=False),
|
476 |
+
dependence_radio: gr.Radio(visible=False),
|
477 |
+
dependence_col: gr.Radio(visible=False),
|
478 |
+
dependence_button: gr.Button(visible=False),
|
479 |
}
|
480 |
|
481 |
gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
|
|
|
498 |
cur_model = ""
|
499 |
select_y_mark = False
|
500 |
|
501 |
+
descriptive_indicators_df = pd.DataFrame()
|
502 |
+
|
503 |
+
linear_regression_model_type = ""
|
504 |
+
naive_bayes_classifier_model_type = ""
|
505 |
+
|
506 |
container_dict = {
|
507 |
+
# [模型]
|
508 |
MN.linear_regression: Container(),
|
509 |
MN.polynomial_regression: Container(),
|
510 |
MN.logistic_regression: Container(),
|
511 |
+
MN.decision_tree_classifier: Container(),
|
512 |
+
MN.random_forest_classifier: Container(),
|
513 |
+
MN.random_forest_regression: Container(),
|
514 |
+
MN.xgboost_classifier: Container(),
|
515 |
+
MN.lightGBM_classifier: Container(),
|
516 |
+
MN.gradient_boosting_regression: Container(),
|
517 |
+
MN.svm_classifier: Container(),
|
518 |
+
MN.svm_regression: Container(),
|
519 |
+
MN.knn_classifier: Container(),
|
520 |
+
MN.knn_regression: Container(),
|
521 |
+
MN.naive_bayes_classification: Container(),
|
522 |
}
|
523 |
|
524 |
visualize = ""
|
525 |
|
526 |
+
@classmethod
|
527 |
+
def check_descriptive_indicators_df(cls):
|
528 |
+
return True if not cls.descriptive_indicators_df.empty else False
|
529 |
+
|
530 |
+
@classmethod
|
531 |
+
def get_descriptive_indicators_df(cls):
|
532 |
+
return cls.descriptive_indicators_df
|
533 |
+
|
534 |
+
@classmethod
|
535 |
+
def get_notes(cls):
|
536 |
+
notes = ""
|
537 |
+
with open("./data/notes.md", "r", encoding="utf-8") as f:
|
538 |
+
notes = str(f.read())
|
539 |
+
return notes
|
540 |
+
|
541 |
@classmethod
|
542 |
def get_dataset_list(cls):
|
543 |
+
return ["自定义", "Iris Dataset", "Wine Dataset", "Breast Cancer Dataset", "Diabetes Dataset", "California Housing Dataset"]
|
544 |
|
545 |
@classmethod
|
546 |
def get_col_list(cls):
|
|
|
739 |
|
740 |
for i, col in enumerate(cls.data.columns.values):
|
741 |
if i == 0:
|
742 |
+
if not (all(isinstance(x, str) for x in cls.data.iloc[:, 0]) or all(isinstance(x, float) for x in cls.data.iloc[:, 0])):
|
|
|
743 |
return False
|
744 |
else:
|
745 |
if cls.data[col].dtype.name != "float64":
|
|
|
769 |
def get_linear_regression_model_list(cls):
|
770 |
return ["线性回归", "Lasso回归", "Ridge回归", "弹性网络回归"]
|
771 |
|
772 |
+
@classmethod
|
773 |
+
def get_naive_bayes_classifier_model_list(cls):
|
774 |
+
return ["多项式朴素贝叶斯分类", "高斯朴素贝叶斯分类", "补充朴素贝叶斯分类"]
|
775 |
+
|
776 |
@classmethod
|
777 |
def get_linear_regression_model_name_mapping(cls):
|
778 |
return dict(zip(cls.get_linear_regression_model_list(), ["LinearRegression", "Lasso", "Ridge", "ElasticNet"]))
|
779 |
|
780 |
@classmethod
|
781 |
+
def get_naive_bayes_classifier_model_name_mapping(cls):
|
782 |
+
return dict(zip(cls.get_naive_bayes_classifier_model_list(), ["MultinomialNB", "GaussianNB", "ComplementNB"]))
|
783 |
+
|
784 |
+
@classmethod
|
785 |
+
def train_model(cls, optimize, linear_regression_model_type=None, naive_bayes_classifier_model_type=None):
|
786 |
optimize = cls.get_optimize_name_mapping()[optimize]
|
787 |
|
788 |
data_copy = cls.data
|
|
|
797 |
)
|
798 |
container = Container(x_train, y_train, x_test, y_test, optimize)
|
799 |
|
800 |
+
# [模型]
|
801 |
if cls.cur_model == MN.linear_regression:
|
802 |
+
cls.linear_regression_model_type = cls.get_linear_regression_model_name_mapping()[linear_regression_model_type]
|
803 |
+
container = linear_regression(container, cls.linear_regression_model_type)
|
804 |
elif cls.cur_model == MN.polynomial_regression:
|
805 |
container = polynomial_regression(container)
|
806 |
elif cls.cur_model == MN.logistic_regression:
|
807 |
container = logistic_regression(container)
|
808 |
+
elif cls.cur_model == MN.decision_tree_classifier:
|
809 |
+
container = decision_tree_classifier(container)
|
810 |
+
elif cls.cur_model == MN.random_forest_classifier:
|
811 |
+
container = random_forest_classifier(container)
|
812 |
+
elif cls.cur_model == MN.random_forest_regression:
|
813 |
+
container = random_forest_regression(container)
|
814 |
+
elif cls.cur_model == MN.xgboost_classifier:
|
815 |
+
container = xgboost_classifier(container)
|
816 |
+
elif cls.cur_model == MN.lightGBM_classifier:
|
817 |
+
container = lightGBM_classifier(container)
|
818 |
+
elif cls.cur_model == MN.gradient_boosting_regression:
|
819 |
+
container = gradient_boosting_regression(container)
|
820 |
+
elif cls.cur_model == MN.svm_classifier:
|
821 |
+
container = svm_classifier(container)
|
822 |
+
elif cls.cur_model == MN.svm_regression:
|
823 |
+
container = svm_regression(container)
|
824 |
+
elif cls.cur_model == MN.knn_classifier:
|
825 |
+
container = knn_classifier(container)
|
826 |
+
elif cls.cur_model == MN.knn_regression:
|
827 |
+
container = knn_regression(container)
|
828 |
+
elif cls.cur_model == MN.naive_bayes_classification:
|
829 |
+
cls.naive_bayes_classifier_model_type = cls.get_naive_bayes_classifier_model_name_mapping()[naive_bayes_classifier_model_type]
|
830 |
+
container = naive_bayes_classification(container, cls.naive_bayes_classifier_model_type)
|
831 |
|
832 |
cls.container_dict[cls.cur_model] = container
|
833 |
|
|
|
847 |
def get_model_name(cls):
|
848 |
return [x for x in cls.container_dict.keys()]
|
849 |
|
850 |
+
# [模型]
|
851 |
@classmethod
|
852 |
def get_model_chinese_name(cls):
|
853 |
+
return ["线性回归", "多项式回归", "逻辑斯谛分类", "决策树分类", "随机森林分类", "随机森林回归", "XGBoost分类", "LightGBM分类",
|
854 |
+
"梯度提升回归", "支持向量机分类", "支持向量机回归", "K-最近邻分类", "K-最近邻回归", "朴素贝叶斯分类"]
|
855 |
|
856 |
@classmethod
|
857 |
def get_model_name_mapping(cls):
|
|
|
874 |
@classmethod
|
875 |
def draw_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
876 |
# [绘图]
|
877 |
+
if cls.visualize == MN.learning_curve:
|
878 |
+
return cls.draw_learning_curve_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
|
|
|
|
879 |
elif cls.visualize == MN.shap_beeswarm:
|
880 |
return cls.draw_shap_beeswarm_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
881 |
+
elif cls.visualize == MN.data_fit:
|
882 |
+
return cls.draw_data_fit_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
883 |
+
elif cls.visualize == MN.waterfall:
|
884 |
+
return cls.draw_waterfall_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
885 |
+
elif cls.visualize == MN.force:
|
886 |
+
return cls.draw_force_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
887 |
+
elif cls.visualize == MN.dependence:
|
888 |
+
return cls.draw_dependence_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
889 |
+
elif cls.visualize == MN.data_distribution:
|
890 |
+
return cls.draw_data_distribution_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
891 |
+
elif cls.visualize == MN.descriptive_indicators:
|
892 |
+
return cls.draw_descriptive_indicators_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
893 |
+
elif cls.visualize == MN.heatmap:
|
894 |
+
return cls.draw_heatmap_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
|
895 |
|
896 |
@classmethod
|
897 |
+
def draw_heatmap_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
898 |
+
color_cur_list = [] if is_default else color_list
|
899 |
+
x_cur_label = "Indicators" if is_default else x_label
|
900 |
+
y_cur_label = "Value" if is_default else y_label
|
901 |
+
cur_name = "" if is_default else name
|
902 |
|
903 |
+
paint_object = PaintObject()
|
904 |
+
paint_object.set_color_cur_list(color_cur_list)
|
905 |
+
paint_object.set_x_cur_label(x_cur_label)
|
906 |
+
paint_object.set_y_cur_label(y_cur_label)
|
907 |
+
paint_object.set_name(cur_name)
|
908 |
|
909 |
+
if cls.check_col_list(select_model.get_heatmap_col()):
|
910 |
+
return cls.error_return_draw(paint_object)
|
911 |
+
|
912 |
+
df = Dataset.data
|
913 |
+
heatmap_col = select_model.get_heatmap_col()
|
914 |
+
|
915 |
+
covX = np.around(np.corrcoef(df[heatmap_col].T), decimals=3)
|
916 |
+
std_dev = np.sqrt(np.diag(covX))
|
917 |
+
pearson_matrix = covX / np.outer(std_dev, std_dev)
|
918 |
+
|
919 |
+
return draw_heat_map(pearson_matrix, heatmap_col, paint_object, select_model.get_heatmap_is_rotate())
|
920 |
+
|
921 |
+
@classmethod
|
922 |
+
def draw_descriptive_indicators_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
923 |
+
color_cur_list = [Config.COLORS[random.randint(0, 11)]]*3 if is_default else color_list
|
924 |
+
x_cur_label = "Indicators" if is_default else x_label
|
925 |
+
y_cur_label = "Value" if is_default else y_label
|
926 |
+
cur_name = "" if is_default else name
|
927 |
+
|
928 |
+
paint_object = PaintObject()
|
929 |
+
paint_object.set_color_cur_list(color_cur_list)
|
930 |
+
paint_object.set_x_cur_label(x_cur_label)
|
931 |
+
paint_object.set_y_cur_label(y_cur_label)
|
932 |
+
paint_object.set_name(cur_name)
|
933 |
+
|
934 |
+
if cls.check_col_list(select_model.get_descriptive_indicators_col()):
|
935 |
+
return cls.error_return_draw(paint_object)
|
936 |
+
|
937 |
+
df = Dataset.data
|
938 |
+
descriptive_indicators_col = select_model.get_descriptive_indicators_col()
|
939 |
+
|
940 |
+
descriptive_indicators_df = pd.DataFrame(
|
941 |
+
index=list(descriptive_indicators_col),
|
942 |
+
columns=[
|
943 |
+
"Name",
|
944 |
+
"Min",
|
945 |
+
"Max",
|
946 |
+
"Avg",
|
947 |
+
"Standard Deviation",
|
948 |
+
"Standard Error",
|
949 |
+
"Upper Quartile",
|
950 |
+
"Median",
|
951 |
+
"Lower Quartile",
|
952 |
+
"Interquartile Distance",
|
953 |
+
"Kurtosis",
|
954 |
+
"Skewness",
|
955 |
+
"Coefficient of Variation"
|
956 |
+
]
|
957 |
+
)
|
958 |
+
|
959 |
+
for col in descriptive_indicators_col:
|
960 |
+
descriptive_indicators_df["Name"][col] = col
|
961 |
+
descriptive_indicators_df["Min"][col] = df[col].min()
|
962 |
+
descriptive_indicators_df["Max"][col] = df[col].max()
|
963 |
+
descriptive_indicators_df["Avg"][col] = df[col].mean()
|
964 |
+
descriptive_indicators_df["Standard Deviation"][col] = df[col].std()
|
965 |
+
descriptive_indicators_df["Standard Error"][col] = descriptive_indicators_df["Standard Deviation"][
|
966 |
+
col] / math.sqrt(len(df[col]))
|
967 |
+
descriptive_indicators_df["Upper Quartile"][col] = df[col].quantile(0.75)
|
968 |
+
descriptive_indicators_df["Median"][col] = df[col].quantile(0.5)
|
969 |
+
descriptive_indicators_df["Lower Quartile"][col] = df[col].quantile(0.25)
|
970 |
+
descriptive_indicators_df["Interquartile Distance"][col] = descriptive_indicators_df["Lower Quartile"][
|
971 |
+
col] - \
|
972 |
+
descriptive_indicators_df["Upper Quartile"][col]
|
973 |
+
descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
|
974 |
+
descriptive_indicators_df["Skewness"][col] = df[col].skew()
|
975 |
+
descriptive_indicators_df["Coefficient of Variation"][col] = \
|
976 |
+
descriptive_indicators_df["Standard Deviation"][col] / descriptive_indicators_df["Avg"][col]
|
977 |
+
|
978 |
+
cls.descriptive_indicators_df = descriptive_indicators_df
|
979 |
+
|
980 |
+
cur_df = df[descriptive_indicators_col].astype(float)
|
981 |
+
|
982 |
+
return draw_boxplot(cur_df, paint_object, select_model.get_descriptive_indicators_is_rotate())
|
983 |
+
|
984 |
+
@classmethod
|
985 |
+
def error_return_draw(cls, paint_object):
|
986 |
+
cur_plt = plt.Figure(figsize=(10, 8))
|
987 |
+
return cur_plt, paint_object
|
988 |
+
|
989 |
+
@classmethod
|
990 |
+
def draw_data_distribution_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
991 |
+
cur_col = select_model.get_data_distribution_col()
|
992 |
+
|
993 |
+
color_cur_list = [Config.COLORS[random.randint(0, 11)]] if is_default else color_list
|
994 |
+
x_cur_label = cur_col if is_default else x_label
|
995 |
+
y_cur_label = "Num" if is_default else y_label
|
996 |
cur_name = "" if is_default else name
|
997 |
|
998 |
paint_object = PaintObject()
|
999 |
paint_object.set_color_cur_list(color_cur_list)
|
|
|
1000 |
paint_object.set_x_cur_label(x_cur_label)
|
1001 |
paint_object.set_y_cur_label(y_cur_label)
|
1002 |
paint_object.set_name(cur_name)
|
1003 |
|
1004 |
+
if cls.check_col_list(select_model.get_data_distribution_col()):
|
1005 |
+
return cls.error_return_draw(paint_object)
|
1006 |
+
|
1007 |
+
counts_mapping = {}
|
1008 |
+
for x in Dataset.data.loc[:, cur_col].values:
|
1009 |
+
if x in counts_mapping.keys():
|
1010 |
+
counts_mapping[x] += 1
|
1011 |
+
else:
|
1012 |
+
counts_mapping[x] = 1
|
1013 |
+
|
1014 |
+
sorting = sorted(counts_mapping.items(), reverse=True, key=lambda m: m[1])
|
1015 |
+
nums = [x[1] for x in sorting]
|
1016 |
+
labels = [x[0] for x in sorting]
|
1017 |
+
|
1018 |
+
if Dataset.check_data_distribution_type(cur_col) == "histogram":
|
1019 |
+
return draw_histogram(nums, labels, paint_object, select_model.get_data_distribution_is_rotate())
|
1020 |
+
else:
|
1021 |
+
return cls.error_return_draw(paint_object)
|
1022 |
+
|
1023 |
+
@classmethod
|
1024 |
+
def draw_dependence_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
1025 |
+
model_name = select_model.get_models()
|
1026 |
+
|
1027 |
+
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
1028 |
+
container = cls.container_dict[model_name]
|
1029 |
+
|
1030 |
+
# color_cur_list = Config.COLORS if is_default else color_list
|
1031 |
+
# label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
|
1032 |
+
# x_cur_label = "Train Sizes" if is_default else x_label
|
1033 |
+
# y_cur_label = "Accuracy" if is_default else y_label
|
1034 |
+
cur_name = "" if is_default else name
|
1035 |
+
|
1036 |
+
paint_object = PaintObject()
|
1037 |
+
# paint_object.set_color_cur_list(color_cur_list)
|
1038 |
+
# paint_object.set_label_cur_list(label_cur_list)
|
1039 |
+
# paint_object.set_x_cur_label(x_cur_label)
|
1040 |
+
# paint_object.set_y_cur_label(y_cur_label)
|
1041 |
+
paint_object.set_name(cur_name)
|
1042 |
+
|
1043 |
+
return draw_dependence(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_dependence_col(), paint_object)
|
1044 |
|
1045 |
@classmethod
|
1046 |
+
def draw_force_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
1047 |
+
model_name = select_model.get_models()
|
1048 |
+
|
1049 |
+
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
1050 |
+
container = cls.container_dict[model_name]
|
1051 |
+
|
1052 |
+
# color_cur_list = Config.COLORS if is_default else color_list
|
1053 |
+
# label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
|
1054 |
+
# x_cur_label = "Train Sizes" if is_default else x_label
|
1055 |
+
# y_cur_label = "Accuracy" if is_default else y_label
|
1056 |
+
cur_name = "" if is_default else name
|
1057 |
+
|
1058 |
+
paint_object = PaintObject()
|
1059 |
+
# paint_object.set_color_cur_list(color_cur_list)
|
1060 |
+
# paint_object.set_label_cur_list(label_cur_list)
|
1061 |
+
# paint_object.set_x_cur_label(x_cur_label)
|
1062 |
+
# paint_object.set_y_cur_label(y_cur_label)
|
1063 |
+
paint_object.set_name(cur_name)
|
1064 |
+
|
1065 |
+
return draw_force(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_force_number(), paint_object)
|
1066 |
+
|
1067 |
+
@classmethod
|
1068 |
+
def draw_waterfall_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
1069 |
+
model_name = select_model.get_models()
|
1070 |
+
|
1071 |
+
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
1072 |
+
container = cls.container_dict[model_name]
|
1073 |
+
|
1074 |
+
# color_cur_list = Config.COLORS if is_default else color_list
|
1075 |
+
# label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
|
1076 |
+
# x_cur_label = "Train Sizes" if is_default else x_label
|
1077 |
+
# y_cur_label = "Accuracy" if is_default else y_label
|
1078 |
+
cur_name = "" if is_default else name
|
1079 |
+
|
1080 |
+
paint_object = PaintObject()
|
1081 |
+
# paint_object.set_color_cur_list(color_cur_list)
|
1082 |
+
# paint_object.set_label_cur_list(label_cur_list)
|
1083 |
+
# paint_object.set_x_cur_label(x_cur_label)
|
1084 |
+
# paint_object.set_y_cur_label(y_cur_label)
|
1085 |
+
paint_object.set_name(cur_name)
|
1086 |
+
|
1087 |
+
return draw_waterfall(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_waterfall_number(), paint_object)
|
1088 |
+
|
1089 |
+
@classmethod
|
1090 |
+
def draw_learning_curve_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
1091 |
+
cur_dict = {}
|
1092 |
+
|
1093 |
+
model_list = select_model.get_models()
|
1094 |
|
1095 |
for model_name in model_list:
|
1096 |
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
1097 |
+
cur_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
|
1098 |
|
1099 |
color_cur_list = Config.COLORS if is_default else color_list
|
1100 |
+
if is_default:
|
1101 |
+
label_cur_list = []
|
1102 |
+
for x in cur_dict.keys():
|
1103 |
+
label_cur_list.append("train " + str(x))
|
1104 |
+
label_cur_list.append("validation " + str(x))
|
1105 |
+
else:
|
1106 |
+
label_cur_list = label_list
|
1107 |
+
|
1108 |
x_cur_label = "Train Sizes" if is_default else x_label
|
1109 |
y_cur_label = "Accuracy" if is_default else y_label
|
1110 |
cur_name = "" if is_default else name
|
|
|
1116 |
paint_object.set_y_cur_label(y_cur_label)
|
1117 |
paint_object.set_name(cur_name)
|
1118 |
|
1119 |
+
if cls.check_cur_dict(cur_dict):
|
1120 |
+
return cls.error_return_draw(paint_object)
|
1121 |
+
|
1122 |
+
return draw_learning_curve_total(cur_dict, paint_object)
|
1123 |
|
1124 |
@classmethod
|
1125 |
+
def draw_shap_beeswarm_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
1126 |
+
model_name = select_model.get_models()
|
1127 |
+
|
1128 |
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
1129 |
container = cls.container_dict[model_name]
|
1130 |
|
|
|
1141 |
# paint_object.set_y_cur_label(y_cur_label)
|
1142 |
paint_object.set_name(cur_name)
|
1143 |
|
1144 |
+
return draw_shap_beeswarm(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_beeswarm_plot_type(), paint_object)
|
1145 |
+
|
1146 |
+
@classmethod
|
1147 |
+
def draw_data_fit_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
|
1148 |
+
cur_dict = {}
|
1149 |
+
|
1150 |
+
model_list = select_model.get_models()
|
1151 |
+
|
1152 |
+
for model_name in model_list:
|
1153 |
+
model_name = cls.get_model_name_mapping_reverse()[model_name]
|
1154 |
+
cur_dict[model_name] = cls.container_dict[model_name].get_data_fit_values()
|
1155 |
+
|
1156 |
+
color_cur_list = Config.COLORS if is_default else color_list
|
1157 |
+
if is_default:
|
1158 |
+
label_cur_list = []
|
1159 |
+
for x in cur_dict.keys():
|
1160 |
+
label_cur_list.append("pred " + str(x))
|
1161 |
+
label_cur_list.append("real data")
|
1162 |
+
else:
|
1163 |
+
label_cur_list = label_list
|
1164 |
+
|
1165 |
+
x_cur_label = "n value" if is_default else x_label
|
1166 |
+
y_cur_label = "y value" if is_default else y_label
|
1167 |
+
cur_name = "" if is_default else name
|
1168 |
+
|
1169 |
+
paint_object = PaintObject()
|
1170 |
+
paint_object.set_color_cur_list(color_cur_list)
|
1171 |
+
paint_object.set_label_cur_list(label_cur_list)
|
1172 |
+
paint_object.set_x_cur_label(x_cur_label)
|
1173 |
+
paint_object.set_y_cur_label(y_cur_label)
|
1174 |
+
paint_object.set_name(cur_name)
|
1175 |
+
|
1176 |
+
return draw_data_fit_total(cur_dict, paint_object)
|
1177 |
+
|
1178 |
+
@classmethod
|
1179 |
+
def get_shap_beeswarm_plot_type(cls):
|
1180 |
+
return ["bar", "violin"]
|
1181 |
|
1182 |
@classmethod
|
1183 |
def get_file(cls):
|
1184 |
# [绘图]
|
1185 |
+
if cls.visualize == MN.learning_curve:
|
1186 |
+
return FilePath.png_base.format(FilePath.learning_curve_plot)
|
|
|
|
|
1187 |
elif cls.visualize == MN.shap_beeswarm:
|
1188 |
return FilePath.png_base.format(FilePath.shap_beeswarm_plot)
|
1189 |
+
elif cls.visualize == MN.data_fit:
|
1190 |
+
return FilePath.png_base.format(FilePath.data_fit_plot)
|
1191 |
+
elif cls.visualize == MN.waterfall:
|
1192 |
+
return FilePath.png_base.format(FilePath.waterfall_plot)
|
1193 |
+
elif cls.visualize == MN.force:
|
1194 |
+
return FilePath.png_base.format(FilePath.force_plot)
|
1195 |
+
elif cls.visualize == MN.dependence:
|
1196 |
+
return FilePath.png_base.format(FilePath.dependence_plot)
|
1197 |
+
elif cls.visualize == MN.data_distribution:
|
1198 |
+
return FilePath.png_base.format(FilePath.data_distribution_plot)
|
1199 |
+
elif cls.visualize == MN.descriptive_indicators:
|
1200 |
+
return FilePath.png_base.format(FilePath.descriptive_indicators_plot)
|
1201 |
+
elif cls.visualize == MN.heatmap:
|
1202 |
+
return FilePath.png_base.format(FilePath.heatmap_plot)
|
1203 |
|
1204 |
@classmethod
|
1205 |
def check_file(cls):
|
|
|
1229 |
def get_linear_regression_mark(cls):
|
1230 |
return True if cls.cur_model == MN.linear_regression else False
|
1231 |
|
1232 |
+
@classmethod
|
1233 |
+
def get_naive_bayes_classifier_mark(cls):
|
1234 |
+
return True if cls.cur_model == MN.naive_bayes_classification else False
|
1235 |
+
|
1236 |
@classmethod
|
1237 |
def get_assign_list(cls):
|
1238 |
return ["分类", "回归"]
|
|
|
1279 |
|
1280 |
return true_list + [gr.Textbox(visible=False)] * (StaticValue.max_num - cur_num)
|
1281 |
|
1282 |
+
@classmethod
|
1283 |
+
def get_model_train_metrics_dataframe(cls):
|
1284 |
+
if cls.cur_model != "" and cls.get_model_container_status():
|
1285 |
+
columns_list = ["指标", "数值"]
|
1286 |
+
|
1287 |
+
output_dict = cls.container_dict[cls.cur_model].get_info()["指标"]
|
1288 |
+
|
1289 |
+
output_df = pd.DataFrame(columns=columns_list)
|
1290 |
+
output_df["指标"] = [x for x in output_dict.keys() if x in ChooseModelMetrics.choose(cls.cur_model)]
|
1291 |
+
output_df["数值"] = [output_dict[x] for x in output_df["指标"]]
|
1292 |
+
|
1293 |
+
return output_df
|
1294 |
+
|
1295 |
+
@classmethod
|
1296 |
+
def get_model_train_params_dataframe(cls):
|
1297 |
+
if cls.cur_model != "" and cls.get_model_container_status():
|
1298 |
+
columns_list = ["参数", "数值"]
|
1299 |
+
|
1300 |
+
output_dict = cls.container_dict[cls.cur_model].get_info()["参数"]
|
1301 |
+
|
1302 |
+
output_df = pd.DataFrame(columns=columns_list)
|
1303 |
+
output_df["参数"] = [x for x in output_dict.keys() if x in ChooseModelParams.choose(cls.cur_model).keys()]
|
1304 |
+
output_df["数值"] = [output_dict[x] for x in output_df["参数"]]
|
1305 |
+
|
1306 |
+
return output_df
|
1307 |
+
|
1308 |
+
@classmethod
|
1309 |
+
def get_str_col_list(cls):
|
1310 |
+
str_col_list = []
|
1311 |
+
for col in cls.get_col_list():
|
1312 |
+
if all(isinstance(x, str) for x in cls.data.loc[:, col]):
|
1313 |
+
str_col_list.append(col)
|
1314 |
+
|
1315 |
+
return str_col_list
|
1316 |
+
|
1317 |
+
@classmethod
|
1318 |
+
def get_float_col_list(cls):
|
1319 |
+
float_col_list = []
|
1320 |
+
for col in cls.get_col_list():
|
1321 |
+
if all(isinstance(x, float) for x in cls.data.loc[:, col]):
|
1322 |
+
float_col_list.append(col)
|
1323 |
+
|
1324 |
+
return float_col_list
|
1325 |
+
|
1326 |
+
@classmethod
|
1327 |
+
def check_data_distribution_type(cls, col):
|
1328 |
+
if all(isinstance(x, str) for x in cls.data.loc[:, col]):
|
1329 |
+
return "histogram"
|
1330 |
+
# elif all(isinstance(x, float) for x in cls.data.loc[:, col]):
|
1331 |
+
# return "line_graph"
|
1332 |
+
else:
|
1333 |
+
gr.Warning("所选列的所有数据必须为字符型或浮点型")
|
1334 |
+
|
1335 |
+
@classmethod
|
1336 |
+
def check_col_list(cls, col):
|
1337 |
+
if not col:
|
1338 |
+
gr.Warning("请选择所需列")
|
1339 |
+
return True
|
1340 |
+
return False
|
1341 |
+
|
1342 |
+
@classmethod
|
1343 |
+
def check_train_model(cls, optimize):
|
1344 |
+
if cls.cur_model == "":
|
1345 |
+
gr.Warning("请选择所需训练的模型")
|
1346 |
+
return True
|
1347 |
+
if not optimize:
|
1348 |
+
gr.Warning("请选择超参数优化方法")
|
1349 |
+
return True
|
1350 |
+
return False
|
1351 |
+
|
1352 |
+
@classmethod
|
1353 |
+
def error_return_train(cls):
|
1354 |
+
return get_return(True)
|
1355 |
+
|
1356 |
+
@classmethod
|
1357 |
+
def check_train_model_other_related(cls, linear_regression_model_type, naive_bayes_classifier_model_type):
|
1358 |
+
if cls.cur_model == MN.linear_regression:
|
1359 |
+
if not linear_regression_model_type:
|
1360 |
+
gr.Warning("请选择线性回归对应的模型")
|
1361 |
+
return True
|
1362 |
+
elif cls.cur_model == MN.naive_bayes_classification:
|
1363 |
+
if not naive_bayes_classifier_model_type:
|
1364 |
+
gr.Warning("请选择朴素贝叶斯对应的模型")
|
1365 |
+
return True
|
1366 |
+
return False
|
1367 |
+
|
1368 |
+
@classmethod
|
1369 |
+
def check_cur_dict(cls, cur_dict):
|
1370 |
+
if not cur_dict:
|
1371 |
+
gr.Warning("请选择绘图所需的模型")
|
1372 |
+
return True
|
1373 |
+
return False
|
1374 |
+
|
1375 |
|
1376 |
def choose_assign(assign: str):
|
1377 |
Dataset.choose_assign(assign)
|
|
|
1386 |
|
1387 |
|
1388 |
# [绘图]
|
1389 |
+
def heatmap_first_draw_plot(*inputs):
|
1390 |
+
Dataset.visualize = MN.heatmap
|
1391 |
+
return before_train_first_draw_plot(inputs)
|
1392 |
+
|
1393 |
+
|
1394 |
+
def descriptive_indicators_first_draw_plot(*inputs):
|
1395 |
+
Dataset.visualize = MN.descriptive_indicators
|
1396 |
+
return before_train_first_draw_plot(inputs)
|
1397 |
+
|
1398 |
+
|
1399 |
+
def data_distribution_first_draw_plot(*inputs):
|
1400 |
+
Dataset.visualize = MN.data_distribution
|
1401 |
+
return before_train_first_draw_plot(inputs)
|
1402 |
+
|
1403 |
+
|
1404 |
+
def dependence_first_draw_plot(*inputs):
|
1405 |
+
Dataset.visualize = MN.dependence
|
1406 |
+
return first_draw_plot(inputs)
|
1407 |
+
|
1408 |
+
|
1409 |
+
def force_first_draw_plot(*inputs):
|
1410 |
+
Dataset.visualize = MN.force
|
1411 |
+
return first_draw_plot(inputs)
|
1412 |
+
|
1413 |
+
|
1414 |
+
def waterfall_first_draw_plot(*inputs):
|
1415 |
+
Dataset.visualize = MN.waterfall
|
1416 |
+
return first_draw_plot(inputs)
|
1417 |
+
|
1418 |
+
|
1419 |
+
def data_fit_first_draw_plot(*inputs):
|
1420 |
+
Dataset.visualize = MN.data_fit
|
1421 |
+
return first_draw_plot(inputs)
|
1422 |
+
|
1423 |
+
|
1424 |
def shap_beeswarm_first_draw_plot(*inputs):
|
1425 |
Dataset.visualize = MN.shap_beeswarm
|
1426 |
return first_draw_plot(inputs)
|
1427 |
|
1428 |
|
1429 |
+
def learning_curve_first_draw_plot(*inputs):
|
1430 |
+
Dataset.visualize = MN.learning_curve
|
1431 |
return first_draw_plot(inputs)
|
1432 |
|
1433 |
|
1434 |
+
def before_train_first_draw_plot(inputs):
|
1435 |
+
select_model = SelectModel()
|
1436 |
+
x_label = ""
|
1437 |
+
y_label = ""
|
1438 |
+
name = ""
|
1439 |
+
color_list = []
|
1440 |
+
label_list = []
|
1441 |
+
|
1442 |
+
# [绘图][无训练模型]
|
1443 |
+
if Dataset.visualize == MN.data_distribution:
|
1444 |
+
select_model.set_data_distribution_col(inputs[0])
|
1445 |
+
select_model.set_data_distribution_is_rotate(inputs[1])
|
1446 |
+
elif Dataset.visualize == MN.descriptive_indicators:
|
1447 |
+
select_model.set_descriptive_indicators_is_rotate(inputs[0])
|
1448 |
+
select_model.set_descriptive_indicators_col(inputs[1])
|
1449 |
+
elif Dataset.visualize == MN.heatmap:
|
1450 |
+
select_model.set_heatmap_col(inputs[0])
|
1451 |
+
select_model.set_heatmap_is_rotate(inputs[1])
|
1452 |
+
|
1453 |
+
cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
|
1454 |
+
|
1455 |
+
return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
|
1456 |
|
1457 |
|
1458 |
def first_draw_plot(inputs):
|
1459 |
+
select_model = SelectModel()
|
1460 |
+
select_model.set_models(inputs[0])
|
1461 |
x_label = ""
|
1462 |
y_label = ""
|
1463 |
name = ""
|
1464 |
color_list = []
|
1465 |
label_list = []
|
1466 |
|
1467 |
+
# [绘图][有训练模型]
|
1468 |
+
if Dataset.visualize == MN.shap_beeswarm:
|
1469 |
+
select_model.set_beeswarm_plot_type(inputs[1])
|
1470 |
+
elif Dataset.visualize == MN.waterfall:
|
1471 |
+
select_model.set_waterfall_number(inputs[1])
|
1472 |
+
elif Dataset.visualize == MN.force:
|
1473 |
+
select_model.set_force_number(inputs[1])
|
1474 |
+
elif Dataset.visualize == MN.dependence:
|
1475 |
+
select_model.set_dependence_col(inputs[1])
|
1476 |
+
|
1477 |
cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
|
1478 |
|
1479 |
return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
|
|
|
1491 |
label_list = list(inputs[StaticValue.max_num+3: 2*StaticValue.max_num+3])
|
1492 |
start_index = 2*StaticValue.max_num+3
|
1493 |
|
1494 |
+
select_model = SelectModel()
|
1495 |
+
|
1496 |
# 绘图
|
1497 |
+
if Dataset.visualize == MN.learning_curve:
|
1498 |
+
select_model.set_models(inputs[start_index+0])
|
1499 |
+
select_model.set_beeswarm_plot_type(inputs[start_index+1])
|
|
|
1500 |
elif Dataset.visualize == MN.shap_beeswarm:
|
1501 |
+
select_model.set_models(inputs[start_index+2])
|
1502 |
+
elif Dataset.visualize == MN.data_fit:
|
1503 |
+
select_model.set_models(inputs[start_index+3])
|
1504 |
+
elif Dataset.visualize == MN.waterfall:
|
1505 |
+
select_model.set_models(inputs[start_index+4])
|
1506 |
+
select_model.set_waterfall_number(inputs[start_index+5])
|
1507 |
+
elif Dataset.visualize == MN.force:
|
1508 |
+
select_model.set_models(inputs[start_index+6])
|
1509 |
+
select_model.set_force_number(inputs[start_index+7])
|
1510 |
+
elif Dataset.visualize == MN.dependence:
|
1511 |
+
select_model.set_models(inputs[start_index+8])
|
1512 |
+
select_model.set_dependence_col(inputs[start_index+9])
|
1513 |
+
elif Dataset.visualize == MN.data_distribution:
|
1514 |
+
select_model.set_data_distribution_col(inputs[start_index+10])
|
1515 |
+
select_model.set_data_distribution_is_rotate(inputs[start_index+11])
|
1516 |
+
elif Dataset.visualize == MN.descriptive_indicators:
|
1517 |
+
select_model.set_descriptive_indicators_is_rotate(inputs[start_index+12])
|
1518 |
+
select_model.set_descriptive_indicators_col(inputs[start_index+13])
|
1519 |
+
elif Dataset.visualize == MN.descriptive_indicators:
|
1520 |
+
select_model.set_heatmap_col(inputs[start_index+14])
|
1521 |
+
select_model.set_heatmap_is_rotate(inputs[start_index+15])
|
1522 |
|
1523 |
else:
|
1524 |
+
select_model.set_models(inputs[start_index])
|
1525 |
|
1526 |
cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, False)
|
1527 |
|
|
|
1532 |
extra_gr_dict = {}
|
1533 |
|
1534 |
# [绘图]
|
1535 |
+
if Dataset.visualize == MN.learning_curve:
|
1536 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_plot), dpi=300)
|
1537 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_plot)})
|
|
|
|
|
|
|
1538 |
elif Dataset.visualize == MN.shap_beeswarm:
|
1539 |
cur_plt.savefig(FilePath.png_base.format(FilePath.shap_beeswarm_plot), dpi=300)
|
1540 |
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.shap_beeswarm_plot)})
|
1541 |
+
elif Dataset.visualize == MN.data_fit:
|
1542 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.data_fit_plot), dpi=300)
|
1543 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.data_fit_plot)})
|
1544 |
+
elif Dataset.visualize == MN.waterfall:
|
1545 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.waterfall_plot), dpi=300)
|
1546 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.waterfall_plot)})
|
1547 |
+
elif Dataset.visualize == MN.force:
|
1548 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.force_plot), dpi=300)
|
1549 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.force_plot)})
|
1550 |
+
elif Dataset.visualize == MN.dependence:
|
1551 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.dependence_plot), dpi=300)
|
1552 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.dependence_plot)})
|
1553 |
+
elif Dataset.visualize == MN.data_distribution:
|
1554 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.data_distribution_plot), dpi=300)
|
1555 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.data_distribution_plot)})
|
1556 |
+
elif Dataset.visualize == MN.descriptive_indicators:
|
1557 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.descriptive_indicators_plot), dpi=300)
|
1558 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.descriptive_indicators_plot)})
|
1559 |
+
extra_gr_dict.update({descriptive_indicators_dataframe: gr.Dataframe(Dataset.get_descriptive_indicators_df(), type="pandas", visible=Dataset.check_descriptive_indicators_df())})
|
1560 |
+
elif Dataset.visualize == MN.heatmap:
|
1561 |
+
cur_plt.savefig(FilePath.png_base.format(FilePath.heatmap_plot), dpi=300)
|
1562 |
+
extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.heatmap_plot)})
|
1563 |
|
1564 |
extra_gr_dict.update(dict(zip(colorpickers, Dataset.colorpickers_change(paint_object))))
|
1565 |
extra_gr_dict.update(dict(zip(color_textboxs, Dataset.color_textboxs_change(paint_object))))
|
|
|
1571 |
return get_return_extra(True, extra_gr_dict)
|
1572 |
|
1573 |
|
1574 |
+
# [模型]
|
1575 |
+
def train_model(optimize, linear_regression_model_type, naive_bayes_classifier_model_type):
|
1576 |
+
if Dataset.check_train_model(optimize):
|
1577 |
+
return Dataset.error_return_train()
|
1578 |
+
|
1579 |
+
if Dataset.check_train_model_other_related(linear_regression_model_type, naive_bayes_classifier_model_type):
|
1580 |
+
return Dataset.error_return_train()
|
1581 |
+
|
1582 |
+
Dataset.train_model(optimize, linear_regression_model_type, naive_bayes_classifier_model_type)
|
1583 |
|
1584 |
return get_return(True)
|
1585 |
|
|
|
1605 |
def encode_label(col_list: list):
|
1606 |
Dataset.encode_label(col_list)
|
1607 |
|
1608 |
+
return get_return(True, {display_encode_label_dataframe: gr.Dataframe(Dataset.get_str2int_mappings_df(), type="pandas", visible=True, label=LN.display_encode_label_dataframe)})
|
|
|
|
|
1609 |
|
1610 |
|
1611 |
def del_duplicate():
|
|
|
1660 |
return get_return(True, {choose_custom_dataset_file: gr.File(Dataset.file, visible=True)})
|
1661 |
|
1662 |
|
1663 |
+
with gr.Blocks(js=Config.JS_0) as demo:
|
1664 |
'''
|
1665 |
组件
|
1666 |
'''
|
|
|
1710 |
|
1711 |
# 数据模型
|
1712 |
with gr.Accordion("数据模型"):
|
1713 |
+
# [模型]
|
1714 |
select_as_model_radio = gr.Radio(visible=False)
|
1715 |
linear_regression_model_radio = gr.Radio(visible=False)
|
1716 |
+
naive_bayes_classification_model_radio = gr.Radio(visible=False)
|
1717 |
model_optimize_radio = gr.Radio(visible=False)
|
1718 |
model_train_button = gr.Button(visible=False)
|
1719 |
model_train_checkbox = gr.Checkbox(visible=False)
|
1720 |
+
model_train_params_dataframe = gr.Dataframe(visible=False)
|
1721 |
+
model_train_metrics_dataframe = gr.Dataframe(visible=False)
|
1722 |
|
1723 |
# 可视化
|
1724 |
with gr.Accordion("数据可视化"):
|
1725 |
+
with gr.Tab("数据分布图"):
|
1726 |
+
data_distribution_radio = gr.Radio(visible=False)
|
1727 |
+
data_distribution_is_rotate = gr.Checkbox(visible=False)
|
1728 |
+
data_distribution_button = gr.Button(visible=False)
|
1729 |
+
|
1730 |
+
with gr.Tab("箱线统计图"):
|
1731 |
+
descriptive_indicators_checkboxgroup = gr.Checkboxgroup(visible=False)
|
1732 |
+
descriptive_indicators_is_rotate = gr.Checkbox(visible=False)
|
1733 |
+
descriptive_indicators_button = gr.Button(visible=False)
|
1734 |
+
descriptive_indicators_dataframe = gr.Dataframe(visible=False)
|
1735 |
+
|
1736 |
+
with gr.Tab("系数热力图"):
|
1737 |
+
heatmap_checkboxgroup = gr.Checkboxgroup(visible=False)
|
1738 |
+
heatmap_is_rotate = gr.Checkbox(visible=False)
|
1739 |
+
heatmap_button = gr.Button(visible=False)
|
1740 |
+
|
1741 |
+
# with gr.Tab("主成分分析"):
|
1742 |
+
# pca_button = gr.Button(visible=False)
|
1743 |
+
# pca_replace_data_button = gr.Button(visible=False)
|
1744 |
+
|
1745 |
+
|
1746 |
with gr.Tab("学习曲线图"):
|
1747 |
learning_curve_checkboxgroup = gr.Checkboxgroup(visible=False)
|
1748 |
+
learning_curve_button = gr.Button(visible=False)
|
1749 |
+
|
1750 |
+
with gr.Tab("数据拟合图"):
|
1751 |
+
data_fit_checkboxgroup = gr.Checkboxgroup(visible=False)
|
1752 |
+
data_fit_button = gr.Button(visible=False)
|
1753 |
|
1754 |
+
with gr.Tab("特征蜂群图"):
|
1755 |
shap_beeswarm_radio = gr.Radio(visible=False)
|
1756 |
+
shap_beeswarm_type = gr.Radio(visible=False)
|
1757 |
shap_beeswarm_button = gr.Button(visible=False)
|
1758 |
|
1759 |
+
with gr.Tab("特征瀑布图"):
|
1760 |
+
waterfall_radio = gr.Radio(visible=False)
|
1761 |
+
waterfall_number = gr.Slider(visible=False)
|
1762 |
+
waterfall_button = gr.Button(visible=False)
|
1763 |
+
|
1764 |
+
with gr.Tab("特征力图"):
|
1765 |
+
force_radio = gr.Radio(visible=False)
|
1766 |
+
force_number = gr.Slider(visible=False)
|
1767 |
+
force_button = gr.Button(visible=False)
|
1768 |
+
|
1769 |
+
with gr.Tab("特征依赖图"):
|
1770 |
+
dependence_radio = gr.Radio(visible=False)
|
1771 |
+
dependence_col = gr.Radio(visible=False)
|
1772 |
+
dependence_button = gr.Button(visible=False)
|
1773 |
+
|
1774 |
legend_labels_textboxs = []
|
1775 |
with gr.Accordion("图例"):
|
1776 |
with gr.Row():
|
|
|
1799 |
draw_plot = gr.Plot(visible=False)
|
1800 |
draw_file = gr.File(visible=False)
|
1801 |
|
1802 |
+
with gr.Tab("文字说明"):
|
1803 |
+
notes = gr.Markdown(Dataset.get_notes(), visible=True)
|
1804 |
+
|
1805 |
'''
|
1806 |
监听事件
|
1807 |
'''
|
|
|
1833 |
|
1834 |
# 数据模型
|
1835 |
select_as_model_radio.change(fn=select_as_model, inputs=[select_as_model_radio], outputs=get_outputs())
|
1836 |
+
|
1837 |
+
# [模型]
|
1838 |
+
model_train_button.click(fn=train_model, inputs=[model_optimize_radio, linear_regression_model_radio, naive_bayes_classification_model_radio], outputs=get_outputs())
|
1839 |
+
|
1840 |
+
# [绘图]
|
1841 |
|
1842 |
# 可视化
|
1843 |
+
data_distribution_button.click(fn=data_distribution_first_draw_plot, inputs=[data_distribution_radio] + [data_distribution_is_rotate], outputs=get_outputs())
|
1844 |
+
descriptive_indicators_button.click(fn=descriptive_indicators_first_draw_plot, inputs=[descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup], outputs=get_outputs())
|
1845 |
+
heatmap_button.click(fn=heatmap_first_draw_plot, inputs=[heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1846 |
+
learning_curve_button.click(fn=learning_curve_first_draw_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
|
1847 |
+
shap_beeswarm_button.click(fn=shap_beeswarm_first_draw_plot, inputs=[shap_beeswarm_radio] + [shap_beeswarm_type], outputs=get_outputs())
|
1848 |
+
data_fit_button.click(fn=data_fit_first_draw_plot, inputs=[data_fit_checkboxgroup], outputs=get_outputs())
|
1849 |
+
waterfall_button.click(fn=waterfall_first_draw_plot, inputs=[waterfall_radio] + [waterfall_number], outputs=get_outputs())
|
1850 |
+
force_button.click(fn=force_first_draw_plot, inputs=[force_radio] + [force_number], outputs=get_outputs())
|
1851 |
+
dependence_button.click(fn=dependence_first_draw_plot, inputs=[dependence_radio] + [dependence_col], outputs=get_outputs())
|
1852 |
|
1853 |
title_name_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1854 |
+
+ [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
|
1855 |
+
+ [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
|
1856 |
+
+ [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1857 |
+
|
1858 |
x_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1859 |
+
+ [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
|
1860 |
+
+ [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
|
1861 |
+
+ [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1862 |
+
|
1863 |
y_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1864 |
+
+ [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
|
1865 |
+
+ [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
|
1866 |
+
+ [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1867 |
+
|
1868 |
for i in range(StaticValue.max_num):
|
1869 |
colorpickers[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1870 |
+
+ [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
|
1871 |
+
+ [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
|
1872 |
+
+ [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1873 |
+
|
1874 |
color_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + color_textboxs + legend_labels_textboxs
|
1875 |
+
+ [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
|
1876 |
+
+ [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
|
1877 |
+
+ [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1878 |
+
|
1879 |
legend_labels_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
|
1880 |
+
+ [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
|
1881 |
+
+ [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
|
1882 |
+
+ [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
|
1883 |
|
1884 |
if __name__ == "__main__":
|
1885 |
demo.launch()
|
data/__init__.py
ADDED
File without changes
|
data/fetch_california_housing.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/notes.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# EasyMachineLearning
|
2 |
+
### 介绍
|
3 |
+
- 版本:v1.0
|
4 |
+
- 作者:李凌浩
|
5 |
+
- 有任何新功能的想法和已出现的问题请和作者联系 ~
|
6 |
+
- *( WX: llh13857750421 )*
|
7 |
+
### 尚未实现的功能
|
8 |
+
1. [困难] 模型训练的进度条可视化(sklearn模型训练函数无回调函数)
|
9 |
+
2. 模型训练完毕后保存模型文件,后续可直接加载
|
10 |
+
3. 数据分析AI助手(直接处理Excel数据)
|
11 |
+
4. PCA主成分分析
|
12 |
+
5. 聚类
|
metrics/calculate_classification_metrics.py
CHANGED
@@ -5,20 +5,27 @@ from sklearn.preprocessing import label_binarize
|
|
5 |
from visualization.draw_line_graph import draw_line_graph
|
6 |
|
7 |
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
info = {}
|
10 |
|
11 |
real_data = np.round(real_data, 0).astype(int)
|
12 |
pred_data = np.round(pred_data, 0).astype(int)
|
13 |
|
14 |
cur_confusion_matrix = confusion_matrix(real_data[:, 0], pred_data)
|
15 |
-
info["Confusion matrix
|
16 |
|
17 |
-
info["Accuracy
|
18 |
-
info["Precision
|
19 |
-
info["Recall
|
20 |
-
info["F1-score
|
21 |
-
|
|
|
22 |
|
23 |
max_class = max(real_data)[0]
|
24 |
min_class = min(real_data)[0]
|
@@ -29,7 +36,3 @@ def calculate_classification_metrics(pred_data, real_data, model_name):
|
|
29 |
fpr, tpr, thresholds = roc_curve(real_data_[:, i], pred_data_[:, i])
|
30 |
# draw_line_graph(fpr, tpr, "ROC curve with AUC={:.2f}".format(auc(fpr, tpr)))
|
31 |
|
32 |
-
info["AUC of "+model_name] = roc_auc_score(real_data_, pred_data_)
|
33 |
-
|
34 |
-
return info
|
35 |
-
|
|
|
5 |
from visualization.draw_line_graph import draw_line_graph
|
6 |
|
7 |
|
8 |
+
class ClassificationMetrics:
|
9 |
+
@classmethod
|
10 |
+
def get_metrics(cls):
|
11 |
+
return ["Accuracy", "Precision", "Recall", "F1-score"]
|
12 |
+
|
13 |
+
|
14 |
+
def calculate_classification_metrics(pred_data, real_data):
|
15 |
info = {}
|
16 |
|
17 |
real_data = np.round(real_data, 0).astype(int)
|
18 |
pred_data = np.round(pred_data, 0).astype(int)
|
19 |
|
20 |
cur_confusion_matrix = confusion_matrix(real_data[:, 0], pred_data)
|
21 |
+
info["Confusion matrix"] = cur_confusion_matrix
|
22 |
|
23 |
+
info["Accuracy"] = np.sum(cur_confusion_matrix.diagonal()) / np.sum(cur_confusion_matrix)
|
24 |
+
info["Precision"] = cur_confusion_matrix.diagonal() / np.sum(cur_confusion_matrix, axis=1)
|
25 |
+
info["Recall"] = cur_confusion_matrix.diagonal() / np.sum(cur_confusion_matrix, axis=0)
|
26 |
+
info["F1-score"] = np.mean(2 * np.multiply(info["Precision"], info["Recall"]) / (info["Precision"] + info["Recall"]))
|
27 |
+
|
28 |
+
return info
|
29 |
|
30 |
max_class = max(real_data)[0]
|
31 |
min_class = min(real_data)[0]
|
|
|
36 |
fpr, tpr, thresholds = roc_curve(real_data_[:, i], pred_data_[:, i])
|
37 |
# draw_line_graph(fpr, tpr, "ROC curve with AUC={:.2f}".format(auc(fpr, tpr)))
|
38 |
|
|
|
|
|
|
|
|
metrics/calculate_regression_metrics.py
CHANGED
@@ -2,44 +2,28 @@ import numpy as np
|
|
2 |
from sklearn.metrics import *
|
3 |
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
info["MAE of "+model_name] = mean_absolute_error(real_data, pred_data)
|
10 |
-
# mae = mean_absolute_error(real_data, pred_data)
|
11 |
-
info["MSE of "+model_name] = mean_squared_error(real_data, pred_data)
|
12 |
-
# mse = mean_squared_error(real_data, pred_data)
|
13 |
-
info["RSME of "+model_name] = np.sqrt(info["MSE of "+model_name])
|
14 |
-
# rsme = np.sqrt(info["MSE of "+model_name])
|
15 |
-
info["R-Sqaure of "+model_name] = r2_score(real_data, pred_data)
|
16 |
-
# r2 = r2_score(real_data, pred_data)
|
17 |
-
if isinstance(max(real_data), np.ndarray):
|
18 |
-
info["Adjusted R-Square of " + model_name] = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
|
19 |
-
# ar2 = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
|
20 |
-
else:
|
21 |
-
info["Adjusted R-Square of " + model_name] = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
|
22 |
-
# ar2 = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
|
23 |
-
|
24 |
-
return info["Adjusted R-Square of " + model_name]
|
25 |
|
26 |
|
27 |
-
def calculate_regression_metrics(pred_data, real_data
|
28 |
info = {}
|
29 |
|
30 |
-
info["MAE
|
31 |
# mae = mean_absolute_error(real_data, pred_data)
|
32 |
-
info["MSE
|
33 |
# mse = mean_squared_error(real_data, pred_data)
|
34 |
-
info["RSME
|
35 |
# rsme = np.sqrt(info["MSE of "+model_name])
|
36 |
-
info["R-Sqaure
|
37 |
# r2 = r2_score(real_data, pred_data)
|
38 |
if isinstance(max(real_data), np.ndarray):
|
39 |
-
info["Adjusted R-Square
|
40 |
# ar2 = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
|
41 |
else:
|
42 |
-
info["Adjusted R-Square
|
43 |
# ar2 = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
|
44 |
|
45 |
return info
|
|
|
2 |
from sklearn.metrics import *
|
3 |
|
4 |
|
5 |
+
class RegressionMetrics:
|
6 |
+
@classmethod
|
7 |
+
def get_metrics(cls):
|
8 |
+
return ["MAE", "MSE", "RSME", "R-Sqaure", "Adjusted R-Square"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
|
11 |
+
def calculate_regression_metrics(pred_data, real_data):
|
12 |
info = {}
|
13 |
|
14 |
+
info["MAE"] = mean_absolute_error(real_data, pred_data)
|
15 |
# mae = mean_absolute_error(real_data, pred_data)
|
16 |
+
info["MSE"] = mean_squared_error(real_data, pred_data)
|
17 |
# mse = mean_squared_error(real_data, pred_data)
|
18 |
+
info["RSME"] = np.sqrt(info["MSE"])
|
19 |
# rsme = np.sqrt(info["MSE of "+model_name])
|
20 |
+
info["R-Sqaure"] = r2_score(real_data, pred_data)
|
21 |
# r2 = r2_score(real_data, pred_data)
|
22 |
if isinstance(max(real_data), np.ndarray):
|
23 |
+
info["Adjusted R-Square"] = 1 - (1 - info["R-Sqaure"]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
|
24 |
# ar2 = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
|
25 |
else:
|
26 |
+
info["Adjusted R-Square"] = 1 - (1 - info["R-Sqaure"]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
|
27 |
# ar2 = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
|
28 |
|
29 |
return info
|
requirements.txt
CHANGED
@@ -1,13 +1,15 @@
|
|
1 |
numpy~=1.23.5
|
2 |
pandas~=1.5.3
|
3 |
-
scikit-learn~=1.
|
4 |
hmmlearn~=0.3.0
|
5 |
matplotlib~=3.7.0
|
6 |
scikit-fuzzy~=0.4.2
|
7 |
gradio~=4.17.0
|
|
|
8 |
networkx~=2.8.4
|
9 |
scipy~=1.10.0
|
|
|
10 |
xgboost~=2.0.3
|
|
|
11 |
tqdm~=4.64.1
|
12 |
-
|
13 |
-
scikit-optimize~=0.9.0
|
|
|
1 |
numpy~=1.23.5
|
2 |
pandas~=1.5.3
|
3 |
+
scikit-learn~=1.4.1.post1
|
4 |
hmmlearn~=0.3.0
|
5 |
matplotlib~=3.7.0
|
6 |
scikit-fuzzy~=0.4.2
|
7 |
gradio~=4.17.0
|
8 |
+
shap~=0.44.1
|
9 |
networkx~=2.8.4
|
10 |
scipy~=1.10.0
|
11 |
+
lightgbm~=4.3.0
|
12 |
xgboost~=2.0.3
|
13 |
+
torch~=2.2.0+cu118
|
14 |
tqdm~=4.64.1
|
15 |
+
scikit-optimize~=0.9.0
|
|
static/config.py
CHANGED
@@ -1,8 +1,31 @@
|
|
1 |
class Config:
|
2 |
# 随机种子
|
3 |
RANDOM_STATE = 123
|
|
|
|
|
|
|
|
|
4 |
# 绘图颜色组
|
|
|
|
|
5 |
COLORS = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"#8074C8",
|
7 |
"#7895C1",
|
8 |
"#A8CBDF",
|
@@ -11,44 +34,103 @@ class Config:
|
|
11 |
"#E3625D",
|
12 |
"#EF8B67",
|
13 |
"#F0C284"
|
14 |
-
]
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"#91CCC0",
|
20 |
"#7FABD1",
|
21 |
"#F7AC53",
|
22 |
"#EC6E66",
|
23 |
"#B5CE4E",
|
24 |
"#BD7795",
|
25 |
-
"#
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
"#
|
30 |
-
"#
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
-
COLORS_2 = [
|
34 |
-
"#A21A54",
|
35 |
-
"#E7724F",
|
36 |
-
"#32183C"
|
37 |
-
]
|
38 |
|
39 |
-
COLORS_3 = [
|
40 |
-
"#ABD1BC",
|
41 |
-
"#CCCC99",
|
42 |
-
"#E3BBED"
|
43 |
-
]
|
44 |
|
45 |
|
46 |
-
COLORS_4 = [
|
47 |
-
"#CFCFD0",
|
48 |
-
"#B6B3D6",
|
49 |
-
"#F58F7A",
|
50 |
-
"#E9687A",
|
51 |
-
]
|
52 |
|
53 |
-
# 预测图展示的点个数
|
54 |
-
DISPLAY_RANGE = 100
|
|
|
1 |
class Config:
|
2 |
# 随机种子
|
3 |
RANDOM_STATE = 123
|
4 |
+
|
5 |
+
# 预测图展示的点个数
|
6 |
+
DISPLAY_RANGE = 100
|
7 |
+
|
8 |
# 绘图颜色组
|
9 |
+
COLOR_ITER_NUM = 3
|
10 |
+
|
11 |
COLORS = [
|
12 |
+
"#ca5353",
|
13 |
+
"#c874a5",
|
14 |
+
"#b674c8",
|
15 |
+
"#8274c8",
|
16 |
+
"#748dc8",
|
17 |
+
"#74acc8",
|
18 |
+
"#74c8b7",
|
19 |
+
"#74c88d",
|
20 |
+
"#a6c874",
|
21 |
+
"#e0e27e",
|
22 |
+
"#df9b77",
|
23 |
+
"#404040",
|
24 |
+
"#999999",
|
25 |
+
"#d4d4d4"
|
26 |
+
] * COLOR_ITER_NUM
|
27 |
+
|
28 |
+
COLORS_0 = [
|
29 |
"#8074C8",
|
30 |
"#7895C1",
|
31 |
"#A8CBDF",
|
|
|
34 |
"#E3625D",
|
35 |
"#EF8B67",
|
36 |
"#F0C284"
|
37 |
+
] * COLOR_ITER_NUM
|
38 |
|
39 |
+
COLORS_1 = [
|
40 |
+
"#4A5F7E",
|
41 |
+
"#719AAC",
|
42 |
+
"#72B063",
|
43 |
+
"#94C6CD",
|
44 |
+
"#B8DBB3",
|
45 |
+
"#E29135"
|
46 |
+
] * COLOR_ITER_NUM
|
47 |
|
48 |
+
COLORS_2 = [
|
49 |
+
"#4485C7",
|
50 |
+
"#D4562E",
|
51 |
+
"#DBB428",
|
52 |
+
"#682487",
|
53 |
+
"#84BA42",
|
54 |
+
"#7ABBDB",
|
55 |
+
"#A51C36"
|
56 |
+
] * COLOR_ITER_NUM
|
57 |
|
58 |
+
COLORS_3 = [
|
59 |
+
"#8074C8",
|
60 |
+
"#7895C1",
|
61 |
+
"#A8CBDF",
|
62 |
+
"#F5EBAE",
|
63 |
+
"#F0C284",
|
64 |
+
"#EF8B67",
|
65 |
+
"#E3625D",
|
66 |
+
"#B54764"
|
67 |
+
] * COLOR_ITER_NUM
|
68 |
+
|
69 |
+
COLORS_4 = [
|
70 |
+
"#979998",
|
71 |
+
"#C69287",
|
72 |
+
"#E79A90",
|
73 |
+
"#EFBC91",
|
74 |
+
"#E4CD87",
|
75 |
+
"#FAE5BB",
|
76 |
+
"#DDDDDF"
|
77 |
+
] * COLOR_ITER_NUM
|
78 |
+
|
79 |
+
COLORS_5 = [
|
80 |
"#91CCC0",
|
81 |
"#7FABD1",
|
82 |
"#F7AC53",
|
83 |
"#EC6E66",
|
84 |
"#B5CE4E",
|
85 |
"#BD7795",
|
86 |
+
"#7C7979"
|
87 |
+
] * COLOR_ITER_NUM
|
88 |
+
|
89 |
+
COLORS_6 = [
|
90 |
+
"#E9687A",
|
91 |
+
"#F58F7A",
|
92 |
+
"#FDE2D8",
|
93 |
+
"#CFCFD0",
|
94 |
+
"#B6B3D6"
|
95 |
+
] * COLOR_ITER_NUM
|
96 |
+
|
97 |
+
JS_0 = """
|
98 |
+
function createGradioAnimation() {
|
99 |
+
var container = document.createElement('div');
|
100 |
+
container.id = 'gradio-animation';
|
101 |
+
container.style.fontSize = '2em';
|
102 |
+
container.style.fontWeight = 'bold';
|
103 |
+
container.style.textAlign = 'center';
|
104 |
+
container.style.marginBottom = '20px';
|
105 |
+
|
106 |
+
var text = 'Welcome to EasyMachineLearning!';
|
107 |
+
for (var i = 0; i < text.length; i++) {
|
108 |
+
(function(i){
|
109 |
+
setTimeout(function(){
|
110 |
+
var letter = document.createElement('span');
|
111 |
+
letter.style.opacity = '0';
|
112 |
+
letter.style.transition = 'opacity 0.5s';
|
113 |
+
letter.innerText = text[i];
|
114 |
+
|
115 |
+
container.appendChild(letter);
|
116 |
+
|
117 |
+
setTimeout(function() {
|
118 |
+
letter.style.opacity = '1';
|
119 |
+
}, 50);
|
120 |
+
}, i * 250);
|
121 |
+
})(i);
|
122 |
+
}
|
123 |
+
|
124 |
+
var gradioContainer = document.querySelector('.gradio-container');
|
125 |
+
gradioContainer.insertBefore(container, gradioContainer.firstChild);
|
126 |
+
|
127 |
+
return 'Animation created';
|
128 |
+
}
|
129 |
+
"""
|
130 |
+
|
131 |
+
|
132 |
|
|
|
|
|
|
|
|
|
|
|
133 |
|
|
|
|
|
|
|
|
|
|
|
134 |
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
|
|
|
static/new_class.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
class Container:
|
2 |
+
def __init__(self, x_train=None, y_train=None, x_test=None, y_test=None, hyper_params_optimize=None):
|
3 |
+
self.x_train = x_train
|
4 |
+
self.y_train = y_train
|
5 |
+
self.x_test = x_test
|
6 |
+
self.y_test = y_test
|
7 |
+
self.hyper_params_optimize = hyper_params_optimize
|
8 |
+
self.info = {"参数": {}, "指标": {}}
|
9 |
+
self.y_pred = None
|
10 |
+
self.train_sizes = None
|
11 |
+
self.train_scores_mean = None
|
12 |
+
self.train_scores_std = None
|
13 |
+
self.test_scores_mean = None
|
14 |
+
self.test_scores_std = None
|
15 |
+
self.status = None
|
16 |
+
self.model = None
|
17 |
+
|
18 |
+
def get_info(self):
|
19 |
+
return self.info
|
20 |
+
|
21 |
+
def set_info(self, info: dict):
|
22 |
+
self.info = info
|
23 |
+
|
24 |
+
def set_y_pred(self, y_pred):
|
25 |
+
self.y_pred = y_pred
|
26 |
+
|
27 |
+
def get_data_fit_values(self):
|
28 |
+
return [
|
29 |
+
self.y_pred,
|
30 |
+
self.y_test
|
31 |
+
]
|
32 |
+
|
33 |
+
def get_learning_curve_values(self):
|
34 |
+
return [
|
35 |
+
self.train_sizes,
|
36 |
+
self.train_scores_mean,
|
37 |
+
self.train_scores_std,
|
38 |
+
self.test_scores_mean,
|
39 |
+
self.test_scores_std
|
40 |
+
]
|
41 |
+
|
42 |
+
def set_learning_curve_values(self, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
|
43 |
+
self.train_sizes = train_sizes
|
44 |
+
self.train_scores_mean = train_scores_mean
|
45 |
+
self.train_scores_std = train_scores_std
|
46 |
+
self.test_scores_mean = test_scores_mean
|
47 |
+
self.test_scores_std = test_scores_std
|
48 |
+
|
49 |
+
def get_status(self):
|
50 |
+
return self.status
|
51 |
+
|
52 |
+
def set_status(self, status: str):
|
53 |
+
self.status = status
|
54 |
+
|
55 |
+
def get_model(self):
|
56 |
+
return self.model
|
57 |
+
|
58 |
+
def set_model(self, model):
|
59 |
+
self.model = model
|
60 |
+
|
61 |
+
|
62 |
+
class PaintObject:
|
63 |
+
def __init__(self):
|
64 |
+
self.color_cur_num = 0
|
65 |
+
self.color_cur_list = []
|
66 |
+
self.label_cur_num = 0
|
67 |
+
self.label_cur_list = []
|
68 |
+
self.x_cur_label = ""
|
69 |
+
self.y_cur_label = ""
|
70 |
+
self.name = ""
|
71 |
+
|
72 |
+
def get_color_cur_num(self):
|
73 |
+
return self.color_cur_num
|
74 |
+
|
75 |
+
def set_color_cur_num(self, color_cur_num):
|
76 |
+
self.color_cur_num = color_cur_num
|
77 |
+
|
78 |
+
def get_color_cur_list(self):
|
79 |
+
return self.color_cur_list
|
80 |
+
|
81 |
+
def set_color_cur_list(self, color_cur_list):
|
82 |
+
self.color_cur_list = color_cur_list
|
83 |
+
|
84 |
+
def get_label_cur_num(self):
|
85 |
+
return self.label_cur_num
|
86 |
+
|
87 |
+
def set_label_cur_num(self, label_cur_num):
|
88 |
+
self.label_cur_num = label_cur_num
|
89 |
+
|
90 |
+
def get_label_cur_list(self):
|
91 |
+
return self.label_cur_list
|
92 |
+
|
93 |
+
def set_label_cur_list(self, label_cur_list):
|
94 |
+
self.label_cur_list = label_cur_list
|
95 |
+
|
96 |
+
def get_x_cur_label(self):
|
97 |
+
return self.x_cur_label
|
98 |
+
|
99 |
+
def set_x_cur_label(self, x_cur_label):
|
100 |
+
self.x_cur_label = x_cur_label
|
101 |
+
|
102 |
+
def get_y_cur_label(self):
|
103 |
+
return self.y_cur_label
|
104 |
+
|
105 |
+
def set_y_cur_label(self, y_cur_label):
|
106 |
+
self.y_cur_label = y_cur_label
|
107 |
+
|
108 |
+
def get_name(self):
|
109 |
+
return self.name
|
110 |
+
|
111 |
+
def set_name(self, name):
|
112 |
+
self.name = name
|
113 |
+
|
114 |
+
|
115 |
+
class SelectModel:
|
116 |
+
def __init__(self):
|
117 |
+
self.models = None
|
118 |
+
self.waterfall_number = None
|
119 |
+
self.force_number = None
|
120 |
+
self.beeswarm_plot_type = None
|
121 |
+
self.dependence_col = None
|
122 |
+
self.data_distribution_col = None
|
123 |
+
self.data_distribution_is_rotate = None
|
124 |
+
self.descriptive_indicators_col = None
|
125 |
+
self.descriptive_indicators_is_rotate = None
|
126 |
+
self.heatmap_col = None
|
127 |
+
self.heatmap_is_rotate = None
|
128 |
+
|
129 |
+
def get_heatmap_col(self):
|
130 |
+
return self.heatmap_col
|
131 |
+
|
132 |
+
def set_heatmap_col(self, heatmap_col):
|
133 |
+
self.heatmap_col = heatmap_col
|
134 |
+
|
135 |
+
def get_heatmap_is_rotate(self):
|
136 |
+
return self.heatmap_is_rotate
|
137 |
+
|
138 |
+
def set_heatmap_is_rotate(self, heatmap_is_rotate):
|
139 |
+
self.heatmap_is_rotate = heatmap_is_rotate
|
140 |
+
|
141 |
+
def get_models(self):
|
142 |
+
return self.models
|
143 |
+
|
144 |
+
def set_models(self, models):
|
145 |
+
self.models = models
|
146 |
+
|
147 |
+
def get_waterfall_number(self):
|
148 |
+
return self.waterfall_number
|
149 |
+
|
150 |
+
def set_waterfall_number(self, waterfall_number):
|
151 |
+
self.waterfall_number = waterfall_number
|
152 |
+
|
153 |
+
def get_force_number(self):
|
154 |
+
return self.force_number
|
155 |
+
|
156 |
+
def set_force_number(self, force_number):
|
157 |
+
self.force_number = force_number
|
158 |
+
|
159 |
+
def get_beeswarm_plot_type(self):
|
160 |
+
return self.beeswarm_plot_type
|
161 |
+
|
162 |
+
def set_beeswarm_plot_type(self, beeswarm_plot_type):
|
163 |
+
self.beeswarm_plot_type = beeswarm_plot_type
|
164 |
+
|
165 |
+
def get_dependence_col(self):
|
166 |
+
return self.dependence_col
|
167 |
+
|
168 |
+
def set_dependence_col(self, dependence_col):
|
169 |
+
self.dependence_col = dependence_col
|
170 |
+
|
171 |
+
def get_data_distribution_col(self):
|
172 |
+
return self.data_distribution_col
|
173 |
+
|
174 |
+
def set_data_distribution_col(self, data_distribution_col):
|
175 |
+
self.data_distribution_col = data_distribution_col
|
176 |
+
|
177 |
+
def get_data_distribution_is_rotate(self):
|
178 |
+
return self.data_distribution_is_rotate
|
179 |
+
|
180 |
+
def set_data_distribution_is_rotate(self, data_distribution_is_rotate):
|
181 |
+
self.data_distribution_is_rotate = data_distribution_is_rotate
|
182 |
+
|
183 |
+
def get_descriptive_indicators_is_rotate(self):
|
184 |
+
return self.descriptive_indicators_is_rotate
|
185 |
+
|
186 |
+
def set_descriptive_indicators_is_rotate(self, descriptive_indicators_is_rotate):
|
187 |
+
self.descriptive_indicators_is_rotate = descriptive_indicators_is_rotate
|
188 |
+
|
189 |
+
def get_descriptive_indicators_col(self):
|
190 |
+
return self.descriptive_indicators_col
|
191 |
+
|
192 |
+
def set_descriptive_indicators_col(self, descriptive_indicators_col):
|
193 |
+
self.descriptive_indicators_col = descriptive_indicators_col
|
194 |
+
|
195 |
+
|
static/process.py
CHANGED
@@ -7,10 +7,10 @@ from skopt import BayesSearchCV
|
|
7 |
import copy
|
8 |
import pandas as pd
|
9 |
from scipy.stats import spearmanr
|
|
|
|
|
10 |
|
11 |
-
from sklearn.datasets import load_iris
|
12 |
-
from sklearn.datasets import load_wine
|
13 |
-
from sklearn.datasets import load_breast_cancer
|
14 |
from scipy.linalg import eig
|
15 |
|
16 |
from static.config import Config
|
@@ -175,20 +175,36 @@ def choose_y_col_in_dataframe(df: pd.DataFrame, y_col: str):
|
|
175 |
|
176 |
|
177 |
def load_data(sort):
|
|
|
178 |
if sort == "Iris Dataset":
|
179 |
sk_data = load_iris()
|
|
|
180 |
elif sort == "Wine Dataset":
|
181 |
sk_data = load_wine()
|
|
|
182 |
elif sort == "Breast Cancer Dataset":
|
183 |
sk_data = load_breast_cancer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
188 |
|
189 |
-
|
|
|
190 |
sk_data = np.concatenate((target_data.reshape(-1, 1), sk_data.data), axis=1)
|
191 |
-
sk_feature_names = np.insert(sk_feature_names, 0, "species")
|
192 |
|
193 |
df = pd.DataFrame(data=sk_data, columns=sk_feature_names)
|
194 |
|
@@ -283,10 +299,7 @@ def k_fold_cross_validation_data_segmentation(x_train, y_train):
|
|
283 |
def grid_search(params, model, x_train, y_train, scoring=None):
|
284 |
info = {}
|
285 |
|
286 |
-
|
287 |
-
grid_search_model = GridSearchCV(model, params, cv=5, scoring="neg_mean_squared_error")
|
288 |
-
else:
|
289 |
-
grid_search_model = GridSearchCV(model, params, cv=5)
|
290 |
|
291 |
grid_search_model.fit(x_train, y_train.ravel())
|
292 |
|
@@ -300,10 +313,7 @@ def grid_search(params, model, x_train, y_train, scoring=None):
|
|
300 |
def bayes_search(params, model, x_train, y_train, scoring=None):
|
301 |
info = {}
|
302 |
|
303 |
-
|
304 |
-
bayes_search_model = BayesSearchCV(model, params, cv=5, n_iter=50, scoring="neg_mean_squared_error")
|
305 |
-
else:
|
306 |
-
bayes_search_model = BayesSearchCV(model, params, cv=5, n_iter=50)
|
307 |
|
308 |
bayes_search_model.fit(x_train, y_train)
|
309 |
|
|
|
7 |
import copy
|
8 |
import pandas as pd
|
9 |
from scipy.stats import spearmanr
|
10 |
+
from io import StringIO
|
11 |
+
from contextlib import redirect_stdout
|
12 |
|
13 |
+
from sklearn.datasets import load_iris, load_wine, load_breast_cancer, load_diabetes
|
|
|
|
|
14 |
from scipy.linalg import eig
|
15 |
|
16 |
from static.config import Config
|
|
|
175 |
|
176 |
|
177 |
def load_data(sort):
|
178 |
+
type = ""
|
179 |
if sort == "Iris Dataset":
|
180 |
sk_data = load_iris()
|
181 |
+
type = "classification"
|
182 |
elif sort == "Wine Dataset":
|
183 |
sk_data = load_wine()
|
184 |
+
type = "classification"
|
185 |
elif sort == "Breast Cancer Dataset":
|
186 |
sk_data = load_breast_cancer()
|
187 |
+
type = "classification"
|
188 |
+
elif sort == "Diabetes Dataset":
|
189 |
+
sk_data = load_diabetes()
|
190 |
+
type = "regression"
|
191 |
+
elif sort == "California Housing Dataset":
|
192 |
+
df = pd.read_csv("./data/fetch_california_housing.csv")
|
193 |
+
return df
|
194 |
+
else:
|
195 |
+
sk_data = load_iris()
|
196 |
+
type = "classification"
|
197 |
|
198 |
+
if type == "classification":
|
199 |
+
target_data = sk_data.target.astype(str)
|
200 |
+
for i in range(len(sk_data.target_names)):
|
201 |
+
target_data = np.where(target_data == str(i), sk_data.target_names[i], target_data)
|
202 |
+
else:
|
203 |
+
target_data = sk_data.target
|
204 |
|
205 |
+
feature_names = sk_data.feature_names
|
206 |
+
sk_feature_names = ["target"] + feature_names.tolist() if isinstance(feature_names, np.ndarray) else ["target"] + feature_names
|
207 |
sk_data = np.concatenate((target_data.reshape(-1, 1), sk_data.data), axis=1)
|
|
|
208 |
|
209 |
df = pd.DataFrame(data=sk_data, columns=sk_feature_names)
|
210 |
|
|
|
299 |
def grid_search(params, model, x_train, y_train, scoring=None):
|
300 |
info = {}
|
301 |
|
302 |
+
grid_search_model = GridSearchCV(model, params, cv=3, n_jobs=-1)
|
|
|
|
|
|
|
303 |
|
304 |
grid_search_model.fit(x_train, y_train.ravel())
|
305 |
|
|
|
313 |
def bayes_search(params, model, x_train, y_train, scoring=None):
|
314 |
info = {}
|
315 |
|
316 |
+
bayes_search_model = BayesSearchCV(model, params, cv=3, n_iter=50, n_jobs=-1)
|
|
|
|
|
|
|
317 |
|
318 |
bayes_search_model.fit(x_train, y_train)
|
319 |
|
visualization/draw_boxplot.py
CHANGED
@@ -1,26 +1,33 @@
|
|
1 |
import matplotlib.pyplot as plt
|
|
|
2 |
|
3 |
-
from
|
4 |
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
plt.grid(True)
|
10 |
|
11 |
plt.boxplot(
|
12 |
x_data,
|
13 |
meanline=True,
|
14 |
showmeans=True,
|
15 |
-
medianprops={"color":
|
16 |
-
meanprops={"color":
|
17 |
-
flierprops={"marker": "o", "markerfacecolor":
|
18 |
labels=x_data.columns.values
|
19 |
)
|
20 |
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
plt
|
25 |
|
26 |
-
plt.show()
|
|
|
1 |
import matplotlib.pyplot as plt
|
2 |
+
import numpy as np
|
3 |
|
4 |
+
from static.config import Config
|
5 |
|
6 |
|
7 |
+
def draw_boxplot(x_data, paint_object, will_rotate=False):
|
8 |
+
plt.figure(figsize=(10, 8), dpi=300)
|
9 |
+
|
10 |
plt.grid(True)
|
11 |
|
12 |
plt.boxplot(
|
13 |
x_data,
|
14 |
meanline=True,
|
15 |
showmeans=True,
|
16 |
+
medianprops={"color": paint_object.get_color_cur_list()[0], "linewidth": 1.5},
|
17 |
+
meanprops={"color": paint_object.get_color_cur_list()[1], "ls": "--", "linewidth": 1.5},
|
18 |
+
flierprops={"marker": "o", "markerfacecolor": paint_object.get_color_cur_list()[2]},
|
19 |
labels=x_data.columns.values
|
20 |
)
|
21 |
|
22 |
+
if will_rotate:
|
23 |
+
plt.xticks(rotation=-45)
|
24 |
+
|
25 |
+
plt.title(paint_object.get_name())
|
26 |
+
|
27 |
+
plt.xlabel(paint_object.get_x_cur_label())
|
28 |
+
plt.ylabel(paint_object.get_y_cur_label())
|
29 |
+
|
30 |
+
paint_object.set_color_cur_num(3)
|
31 |
|
32 |
+
return plt, paint_object
|
33 |
|
|
visualization/draw_data_fit_total.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from matplotlib import pyplot as plt
|
3 |
+
|
4 |
+
from static.new_class import PaintObject
|
5 |
+
from static.config import Config
|
6 |
+
|
7 |
+
|
8 |
+
def draw_data_fit_total(input_dict, paint_object: PaintObject):
|
9 |
+
plt.figure(figsize=(10, 6), dpi=300)
|
10 |
+
|
11 |
+
for i, input_dict_items in enumerate(input_dict.items()):
|
12 |
+
name, cur_list = input_dict_items
|
13 |
+
|
14 |
+
if i == len(input_dict.keys())-1:
|
15 |
+
final_list = cur_list
|
16 |
+
|
17 |
+
plt.plot(
|
18 |
+
np.array([x for x in range(len(cur_list[0]))]),
|
19 |
+
cur_list[0],
|
20 |
+
"-",
|
21 |
+
color=paint_object.get_color_cur_list()[i],
|
22 |
+
alpha=0.9,
|
23 |
+
label=paint_object.get_label_cur_list()[i]
|
24 |
+
)
|
25 |
+
|
26 |
+
plt.plot(
|
27 |
+
np.array([x for x in range(len(final_list[1]))]),
|
28 |
+
final_list[1],
|
29 |
+
"--",
|
30 |
+
color=paint_object.get_color_cur_list()[len(input_dict.keys())],
|
31 |
+
alpha=0.9,
|
32 |
+
label=paint_object.get_label_cur_list()[len(input_dict.keys())]
|
33 |
+
)
|
34 |
+
|
35 |
+
plt.title(paint_object.get_name())
|
36 |
+
|
37 |
+
plt.xlabel(paint_object.get_x_cur_label())
|
38 |
+
plt.ylabel(paint_object.get_y_cur_label())
|
39 |
+
plt.legend()
|
40 |
+
|
41 |
+
# plt.savefig("./diagram/{}.png".format(title), dpi=300)
|
42 |
+
# plt.show()
|
43 |
+
|
44 |
+
paint_object.set_color_cur_num(len(input_dict.values())+1)
|
45 |
+
paint_object.set_label_cur_num(len(input_dict.values())+1)
|
46 |
+
|
47 |
+
return plt, paint_object
|
48 |
+
|
visualization/draw_heat_map.py
CHANGED
@@ -2,16 +2,13 @@ import numpy as np
|
|
2 |
import matplotlib.pyplot as plt
|
3 |
import pandas as pd
|
4 |
|
5 |
-
from
|
6 |
|
7 |
|
8 |
-
|
9 |
-
def draw_heat_map(x_data, title, is_rotate, col_name):
|
10 |
-
# col_name = np.delete(col_name, np.where(col_name == "swing"))
|
11 |
-
|
12 |
plt.rcParams.update({'figure.autolayout': True})
|
13 |
|
14 |
-
plt.figure(figsize=(
|
15 |
|
16 |
if isinstance(x_data, np.ndarray):
|
17 |
np_data = np.around(x_data.astype("float64"), 2)
|
@@ -24,17 +21,22 @@ def draw_heat_map(x_data, title, is_rotate, col_name):
|
|
24 |
for j in range(np_data.shape[1]):
|
25 |
plt.text(j, i, np_data[i, j], ha="center", va="center", color="w")
|
26 |
|
27 |
-
if
|
28 |
-
plt.xticks(np.arange(len(
|
29 |
else:
|
30 |
-
plt.xticks(np.arange(len(
|
31 |
|
32 |
-
plt.yticks(np.arange(len(
|
33 |
plt.imshow(np_data)
|
34 |
-
|
35 |
plt.tight_layout()
|
36 |
-
# plt.title(title)
|
37 |
|
38 |
-
plt.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
plt.show()
|
|
|
2 |
import matplotlib.pyplot as plt
|
3 |
import pandas as pd
|
4 |
|
5 |
+
from static.config import Config
|
6 |
|
7 |
|
8 |
+
def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
|
|
|
|
|
|
|
9 |
plt.rcParams.update({'figure.autolayout': True})
|
10 |
|
11 |
+
plt.figure(figsize=(10, 8), dpi=300)
|
12 |
|
13 |
if isinstance(x_data, np.ndarray):
|
14 |
np_data = np.around(x_data.astype("float64"), 2)
|
|
|
21 |
for j in range(np_data.shape[1]):
|
22 |
plt.text(j, i, np_data[i, j], ha="center", va="center", color="w")
|
23 |
|
24 |
+
if will_rotate:
|
25 |
+
plt.xticks(np.arange(len(col_list)), col_list, rotation=-90)
|
26 |
else:
|
27 |
+
plt.xticks(np.arange(len(col_list)), col_list)
|
28 |
|
29 |
+
plt.yticks(np.arange(len(col_list)), col_list)
|
30 |
plt.imshow(np_data)
|
31 |
+
plt.colorbar(True)
|
32 |
plt.tight_layout()
|
|
|
33 |
|
34 |
+
plt.title(paint_object.get_name())
|
35 |
+
|
36 |
+
plt.xlabel(paint_object.get_x_cur_label())
|
37 |
+
plt.ylabel(paint_object.get_y_cur_label())
|
38 |
+
|
39 |
+
paint_object.set_color_cur_num(0)
|
40 |
+
|
41 |
+
return plt, paint_object
|
42 |
|
|
visualization/draw_histogram.py
CHANGED
@@ -1,26 +1,27 @@
|
|
|
|
|
|
1 |
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
|
4 |
-
from
|
|
|
5 |
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
fig, ax = plt.subplots(figsize=(10, 8))
|
10 |
|
11 |
bars = plt.bar(
|
12 |
-
np.arange(0, len(
|
13 |
-
|
14 |
align="center",
|
15 |
alpha=1,
|
16 |
-
color=
|
17 |
-
tick_label=
|
18 |
)
|
19 |
|
20 |
-
# Bar annotation
|
21 |
if will_show_text:
|
22 |
for bar in bars:
|
23 |
-
|
24 |
str(bar.get_height()),
|
25 |
xy=(bar.get_x() + bar.get_width() / 2,
|
26 |
bar.get_height()),
|
@@ -31,10 +32,14 @@ def draw_histogram(x_data, y_data, will_rotate, will_show_text, title):
|
|
31 |
)
|
32 |
|
33 |
if will_rotate:
|
34 |
-
plt.xticks(rotation=-
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
|
38 |
-
plt
|
39 |
|
40 |
-
plt.show()
|
|
|
1 |
+
import random
|
2 |
+
|
3 |
import numpy as np
|
4 |
import matplotlib.pyplot as plt
|
5 |
|
6 |
+
from static.config import Config
|
7 |
+
from static.new_class import PaintObject
|
8 |
|
9 |
|
10 |
+
def draw_histogram(nums, labels, paint_object, will_rotate=False, will_show_text=True):
|
11 |
+
plt.figure(figsize=(10, 8), dpi=300)
|
|
|
12 |
|
13 |
bars = plt.bar(
|
14 |
+
np.arange(0, len(nums)),
|
15 |
+
nums,
|
16 |
align="center",
|
17 |
alpha=1,
|
18 |
+
color=paint_object.get_color_cur_list()[0],
|
19 |
+
tick_label=labels
|
20 |
)
|
21 |
|
|
|
22 |
if will_show_text:
|
23 |
for bar in bars:
|
24 |
+
plt.annotate(
|
25 |
str(bar.get_height()),
|
26 |
xy=(bar.get_x() + bar.get_width() / 2,
|
27 |
bar.get_height()),
|
|
|
32 |
)
|
33 |
|
34 |
if will_rotate:
|
35 |
+
plt.xticks(rotation=-45)
|
36 |
+
|
37 |
+
plt.title(paint_object.get_name())
|
38 |
+
|
39 |
+
plt.xlabel(paint_object.get_x_cur_label())
|
40 |
+
plt.ylabel(paint_object.get_y_cur_label())
|
41 |
|
42 |
+
paint_object.set_color_cur_num(1)
|
43 |
|
44 |
+
return plt, paint_object
|
45 |
|
|
visualization/draw_histogram_line_subgraph.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
-
from
|
5 |
|
6 |
|
7 |
def draw_histogram_line_subgraph(total_data_for_plot):
|
|
|
1 |
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
+
from static.config import Config
|
5 |
|
6 |
|
7 |
def draw_histogram_line_subgraph(total_data_for_plot):
|
visualization/draw_learning_curve_total.py
CHANGED
@@ -1,59 +1,48 @@
|
|
1 |
-
import numpy as np
|
2 |
from matplotlib import pyplot as plt
|
3 |
|
4 |
-
from static.
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
alpha=0.1,
|
48 |
-
color=paint_object.get_color_cur_list()[i]
|
49 |
-
)
|
50 |
-
plt.plot(
|
51 |
-
train_sizes,
|
52 |
-
test_scores_mean,
|
53 |
-
"o-",
|
54 |
-
color=paint_object.get_color_cur_list()[i],
|
55 |
-
label=paint_object.get_label_cur_list()[i]
|
56 |
-
)
|
57 |
|
58 |
plt.title(paint_object.get_name())
|
59 |
|
@@ -61,11 +50,8 @@ def draw_learning_curve_total(input_dict, type, paint_object: PaintObject):
|
|
61 |
plt.ylabel(paint_object.get_y_cur_label())
|
62 |
plt.legend()
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
paint_object.set_color_cur_num(len(input_dict.keys()))
|
68 |
-
paint_object.set_label_cur_num(len(input_dict.keys()))
|
69 |
|
70 |
return plt, paint_object
|
71 |
|
|
|
|
|
1 |
from matplotlib import pyplot as plt
|
2 |
|
3 |
+
from static.new_class import PaintObject
|
4 |
+
|
5 |
+
|
6 |
+
def draw_learning_curve_total(input_dict, paint_object: PaintObject):
|
7 |
+
plt.figure(figsize=(10, 8), dpi=300)
|
8 |
+
|
9 |
+
for i, values in enumerate(input_dict.values()):
|
10 |
+
train_sizes = values[0]
|
11 |
+
train_scores_mean = values[1]
|
12 |
+
train_scores_std = values[2]
|
13 |
+
test_scores_mean = values[3]
|
14 |
+
test_scores_std = values[4]
|
15 |
+
|
16 |
+
plt.fill_between(
|
17 |
+
train_sizes,
|
18 |
+
train_scores_mean - train_scores_std,
|
19 |
+
train_scores_mean + train_scores_std,
|
20 |
+
alpha=0.1,
|
21 |
+
color=paint_object.get_color_cur_list()[2*i]
|
22 |
+
)
|
23 |
+
|
24 |
+
plt.plot(
|
25 |
+
train_sizes,
|
26 |
+
train_scores_mean,
|
27 |
+
"o-",
|
28 |
+
color=paint_object.get_color_cur_list()[2*i],
|
29 |
+
label=paint_object.get_label_cur_list()[2*i]
|
30 |
+
)
|
31 |
+
|
32 |
+
plt.fill_between(
|
33 |
+
train_sizes,
|
34 |
+
test_scores_mean - test_scores_std,
|
35 |
+
test_scores_mean + test_scores_std,
|
36 |
+
alpha=0.1,
|
37 |
+
color=paint_object.get_color_cur_list()[2*i+1]
|
38 |
+
)
|
39 |
+
plt.plot(
|
40 |
+
train_sizes,
|
41 |
+
test_scores_mean,
|
42 |
+
"o-",
|
43 |
+
color=paint_object.get_color_cur_list()[2*i+1],
|
44 |
+
label=paint_object.get_label_cur_list()[2*i+1]
|
45 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
plt.title(paint_object.get_name())
|
48 |
|
|
|
50 |
plt.ylabel(paint_object.get_y_cur_label())
|
51 |
plt.legend()
|
52 |
|
53 |
+
paint_object.set_color_cur_num(2*len(input_dict.values()))
|
54 |
+
paint_object.set_label_cur_num(2*len(input_dict.values()))
|
|
|
|
|
|
|
55 |
|
56 |
return plt, paint_object
|
57 |
|
visualization/draw_line_graph.py
CHANGED
@@ -4,37 +4,24 @@ import matplotlib.pyplot as plt
|
|
4 |
from static.config import Config
|
5 |
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
plt.figure(figsize=(10, 8))
|
10 |
|
11 |
plt.plot(
|
12 |
-
|
13 |
-
|
14 |
"-o",
|
15 |
-
color=
|
16 |
)
|
17 |
|
18 |
-
plt.title(
|
19 |
-
plt.savefig("./diagram/{}.png".format(title), dpi=300)
|
20 |
|
21 |
-
plt.
|
|
|
22 |
|
|
|
23 |
|
24 |
-
|
25 |
-
plt.figure(figsize=(10, 8))
|
26 |
|
27 |
-
for i, single_y_data in enumerate(y_data):
|
28 |
-
plt.plot(
|
29 |
-
x_data,
|
30 |
-
single_y_data,
|
31 |
-
"-o",
|
32 |
-
color=Config.COLORS[i],
|
33 |
-
label=labels[i]
|
34 |
-
)
|
35 |
|
36 |
-
plt.legend()
|
37 |
-
plt.title(title)
|
38 |
-
plt.savefig("./diagram/{}.png".format(title), dpi=300)
|
39 |
|
40 |
-
plt.show()
|
|
|
4 |
from static.config import Config
|
5 |
|
6 |
|
7 |
+
def draw_line_graph(nums, labels, paint_object):
|
8 |
+
plt.figure(figsize=(10, 8), dpi=300)
|
|
|
9 |
|
10 |
plt.plot(
|
11 |
+
nums,
|
12 |
+
labels,
|
13 |
"-o",
|
14 |
+
color=paint_object.get_color_cur_list()[0]
|
15 |
)
|
16 |
|
17 |
+
plt.title(paint_object.get_name())
|
|
|
18 |
|
19 |
+
plt.xlabel(paint_object.get_x_cur_label())
|
20 |
+
plt.ylabel(paint_object.get_y_cur_label())
|
21 |
|
22 |
+
paint_object.set_color_cur_num(1)
|
23 |
|
24 |
+
return plt, paint_object
|
|
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
|
|
|
|
|
|
27 |
|
|
visualization/draw_pred_total.py
CHANGED
@@ -7,30 +7,28 @@ from coding.llh.static.config import Config
|
|
7 |
def draw_pred_total(input_dict):
|
8 |
plt.figure(figsize=(10, 6))
|
9 |
|
10 |
-
i
|
11 |
-
|
12 |
-
|
|
|
13 |
plt.plot(
|
14 |
np.array([x for x in range(len(cur_list[0]))]),
|
15 |
cur_list[0],
|
16 |
"-",
|
17 |
-
color=
|
18 |
alpha=0.9,
|
19 |
-
label=
|
20 |
)
|
21 |
-
i += 1
|
22 |
|
23 |
plt.plot(
|
24 |
-
np.array([x for x in range(len(
|
25 |
-
|
26 |
"--",
|
27 |
-
color=
|
28 |
alpha=0.9,
|
29 |
-
label=
|
30 |
)
|
31 |
|
32 |
-
title = "pred curve"
|
33 |
-
|
34 |
plt.xlabel("Sizes")
|
35 |
plt.ylabel("Value")
|
36 |
plt.legend()
|
|
|
7 |
def draw_pred_total(input_dict):
|
8 |
plt.figure(figsize=(10, 6))
|
9 |
|
10 |
+
for i, name, cur_list in enumerate(input_dict.items()):
|
11 |
+
if i == len(input_dict.keys())-1:
|
12 |
+
final_list = cur_list
|
13 |
+
|
14 |
plt.plot(
|
15 |
np.array([x for x in range(len(cur_list[0]))]),
|
16 |
cur_list[0],
|
17 |
"-",
|
18 |
+
color=paint_object.get_color_cur_list()[i],
|
19 |
alpha=0.9,
|
20 |
+
label=paint_object.get_label_cur_list()[i]
|
21 |
)
|
|
|
22 |
|
23 |
plt.plot(
|
24 |
+
np.array([x for x in range(len(final_list[1]))]),
|
25 |
+
final_list[1],
|
26 |
"--",
|
27 |
+
color=paint_object.get_color_cur_list()[len(input_dict.keys())],
|
28 |
alpha=0.9,
|
29 |
+
label=paint_object.get_label_cur_list[len(input_dict.keys())]
|
30 |
)
|
31 |
|
|
|
|
|
32 |
plt.xlabel("Sizes")
|
33 |
plt.ylabel("Value")
|
34 |
plt.legend()
|
visualization/draw_scatter_line_graph.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
|
4 |
-
from
|
5 |
|
6 |
|
7 |
# draw scatter line graph
|
|
|
1 |
import numpy as np
|
2 |
import matplotlib.pyplot as plt
|
3 |
|
4 |
+
from static.config import Config
|
5 |
|
6 |
|
7 |
# draw scatter line graph
|