LLH commited on
Commit
10c7c36
·
1 Parent(s): 11b81b9

2024/02/20/14:15

Browse files
analysis/bayes_model.py CHANGED
@@ -1,28 +1,82 @@
 
1
  from sklearn.naive_bayes import *
 
2
 
3
- from coding.llh.visualization.draw_line_graph import draw_line_graph
4
- from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
5
- from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
6
- from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
 
 
7
 
8
 
9
- # Naive bayes classification
10
- def naive_bayes_classification(x_train, y_train, x_test, y_test):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  info = {}
12
 
13
- # multinomial_naive_bayes_classification_model = MultinomialNB()
14
- Gaussian_naive_bayes_classification_model = GaussianNB()
15
- # bernoulli_naive_bayes_classification_model = BernoulliNB()
16
- # complement_naive_bayes_classification_model = ComplementNB()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- Gaussian_naive_bayes_classification_model.fit(x_train, y_train)
19
 
20
- y_pred = Gaussian_naive_bayes_classification_model.predict(x_test).reshape(-1, 1)
 
 
 
 
 
21
 
22
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "Gaussian naive bayes classification model residual plot")
23
 
24
- info.update(calculate_regression_metrics(y_pred, y_test, "Gaussian naive bayes classification"))
25
- info.update(calculate_classification_metrics(y_pred, y_test, "Gaussian naive bayes classification"))
 
26
 
27
- return info
28
 
 
1
+ from sklearn.model_selection import learning_curve
2
  from sklearn.naive_bayes import *
3
+ import numpy as np
4
 
5
+ from static.new_class import Container
6
+ from static.process import grid_search, bayes_search
7
+ from visualization.draw_line_graph import draw_line_graph
8
+ from visualization.draw_scatter_line_graph import draw_scatter_line_graph
9
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
10
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
11
 
12
 
13
+ class NaiveBayesClassifierParams:
14
+ @classmethod
15
+ def get_params(cls, sort):
16
+ if sort == "MultinomialNB":
17
+ return {
18
+ "alpha": [0.1, 0.5, 1.0, 2.0]
19
+ }
20
+ elif sort == "GaussianNB":
21
+ return {}
22
+ elif sort == "ComplementNB":
23
+ return {
24
+ "alpha": [0.1, 0.5, 1, 10],
25
+ "fit_prior": [True, False],
26
+ "norm": [True, False]
27
+ }
28
+
29
+
30
+ # 朴素贝叶斯分类
31
+ def naive_bayes_classification(container: Container, model=None):
32
+ x_train = container.x_train
33
+ y_train = container.y_train
34
+ x_test = container.x_test
35
+ y_test = container.y_test
36
+ hyper_params_optimize = container.hyper_params_optimize
37
  info = {}
38
 
39
+ if model == "MultinomialNB":
40
+ naive_bayes_model = MultinomialNB()
41
+ params = NaiveBayesClassifierParams.get_params(model)
42
+ elif model == "GaussianNB":
43
+ naive_bayes_model = GaussianNB()
44
+ params = NaiveBayesClassifierParams.get_params(model)
45
+ elif model == "ComplementNB":
46
+ naive_bayes_model = ComplementNB()
47
+ params = NaiveBayesClassifierParams.get_params(model)
48
+ else:
49
+ naive_bayes_model = GaussianNB()
50
+ params = NaiveBayesClassifierParams.get_params(model)
51
+
52
+ if hyper_params_optimize == "grid_search":
53
+ best_model = grid_search(params, naive_bayes_model, x_train, y_train)
54
+ elif hyper_params_optimize == "bayes_search":
55
+ best_model = bayes_search(params, naive_bayes_model, x_train, y_train)
56
+ else:
57
+ best_model = naive_bayes_model
58
+ best_model.fit(x_train, y_train)
59
+
60
+ info["参数"] = best_model.get_params()
61
+
62
+ y_pred = best_model.predict(x_test)
63
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
64
+ container.set_y_pred(y_pred)
65
 
66
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
67
 
68
+ train_scores_mean = np.mean(train_scores, axis=1)
69
+ train_scores_std = np.std(train_scores, axis=1)
70
+ test_scores_mean = np.mean(test_scores, axis=1)
71
+ test_scores_std = np.std(test_scores, axis=1)
72
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
73
+ test_scores_std)
74
 
75
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
76
 
77
+ container.set_info(info)
78
+ container.set_status("trained")
79
+ container.set_model(best_model)
80
 
81
+ return container
82
 
analysis/descriptive_analysis.py CHANGED
@@ -236,8 +236,7 @@ def get_descriptive_indicators_related(df):
236
  descriptive_indicators_df["Upper Quartile"][col]
237
  descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
238
  descriptive_indicators_df["Skewness"][col] = df[col].skew()
239
- descriptive_indicators_df["Coefficient of Variation"][col] = descriptive_indicators_df["Standard Deviation"][
240
- col] \
241
  / descriptive_indicators_df["Avg"][col]
242
 
243
  # draw_heat_map(descriptive_indicators_df.to_numpy(), "descriptive indicators", True)
 
236
  descriptive_indicators_df["Upper Quartile"][col]
237
  descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
238
  descriptive_indicators_df["Skewness"][col] = df[col].skew()
239
+ descriptive_indicators_df["Coefficient of Variation"][col] = descriptive_indicators_df["Standard Deviation"][col] \
 
240
  / descriptive_indicators_df["Avg"][col]
241
 
242
  # draw_heat_map(descriptive_indicators_df.to_numpy(), "descriptive indicators", True)
analysis/distance_model.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.model_selection import learning_curve
2
+
3
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
4
+ from analysis.shap_model import *
5
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
6
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
7
+ from static.new_class import *
8
+ from static.process import grid_search, bayes_search
9
+
10
+
11
+ class KNNClassifierParams:
12
+ @classmethod
13
+ def get_params(cls):
14
+ return {
15
+ "n_neighbors": [3, 5, 7, 9],
16
+ "weights": ['uniform', 'distance'],
17
+ "p": [1, 2]
18
+ }
19
+
20
+
21
+ # KNN分类
22
+ def knn_classifier(container: Container):
23
+ x_train = container.x_train
24
+ y_train = container.y_train
25
+ x_test = container.x_test
26
+ y_test = container.y_test
27
+ hyper_params_optimize = container.hyper_params_optimize
28
+ info = {}
29
+
30
+ knn_classifier_model = KNeighborsClassifier()
31
+ params = KNNClassifierParams.get_params()
32
+
33
+ if hyper_params_optimize == "grid_search":
34
+ best_model = grid_search(params, knn_classifier_model, x_train, y_train)
35
+ elif hyper_params_optimize == "bayes_search":
36
+ best_model = bayes_search(params, knn_classifier_model, x_train, y_train)
37
+ else:
38
+ best_model = knn_classifier_model
39
+ best_model.fit(x_train, y_train)
40
+
41
+ info["参数"] = best_model.get_params()
42
+
43
+ y_pred = best_model.predict(x_test)
44
+ container.set_y_pred(y_pred)
45
+
46
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
47
+
48
+ train_scores_mean = np.mean(train_scores, axis=1)
49
+ train_scores_std = np.std(train_scores, axis=1)
50
+ test_scores_mean = np.mean(test_scores, axis=1)
51
+ test_scores_std = np.std(test_scores, axis=1)
52
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
53
+ test_scores_std)
54
+
55
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
56
+
57
+ container.set_info(info)
58
+ container.set_status("trained")
59
+ container.set_model(best_model)
60
+
61
+ return container
62
+
63
+
64
+ class KNNRegressionParams:
65
+ @classmethod
66
+ def get_params(cls):
67
+ return {
68
+ "n_neighbors": [3, 5, 7, 9],
69
+ "weights": ['uniform', 'distance'],
70
+ "p": [1, 2]
71
+ }
72
+
73
+
74
+ # KNN回归
75
+ def knn_regression(container: Container):
76
+ x_train = container.x_train
77
+ y_train = container.y_train
78
+ x_test = container.x_test
79
+ y_test = container.y_test
80
+ hyper_params_optimize = container.hyper_params_optimize
81
+ info = {}
82
+
83
+ knn_regression_model = KNeighborsRegressor()
84
+ params = KNNRegressionParams.get_params()
85
+
86
+ if hyper_params_optimize == "grid_search":
87
+ best_model = grid_search(params, knn_regression_model, x_train, y_train)
88
+ elif hyper_params_optimize == "bayes_search":
89
+ best_model = bayes_search(params, knn_regression_model, x_train, y_train)
90
+ else:
91
+ best_model = knn_regression_model
92
+ best_model.fit(x_train, y_train)
93
+
94
+ info["参数"] = best_model.get_params()
95
+
96
+ y_pred = best_model.predict(x_test)
97
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
98
+ container.set_y_pred(y_pred)
99
+
100
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
101
+
102
+ train_scores_mean = np.mean(train_scores, axis=1)
103
+ train_scores_std = np.std(train_scores, axis=1)
104
+ test_scores_mean = np.mean(test_scores, axis=1)
105
+ test_scores_std = np.std(test_scores, axis=1)
106
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
107
+ test_scores_std)
108
+
109
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
110
+
111
+ container.set_info(info)
112
+ container.set_status("trained")
113
+ container.set_model(best_model)
114
+
115
+ return container
analysis/gradient_model.py CHANGED
@@ -1,72 +1,65 @@
 
1
  from sklearn.ensemble import GradientBoostingRegressor
2
- from sklearn.tree import DecisionTreeClassifier
3
- from sklearn.ensemble import RandomForestClassifier
4
- from xgboost import XGBClassifier
5
  from sklearn.model_selection import learning_curve
6
- import numpy as np
7
-
8
- from analysis.shap_model import shap_calculate
9
- from coding.llh.static.config import Config
10
- from coding.llh.static.process import grid_search, bayes_search
11
- from coding.llh.visualization.draw_learning_curve import draw_learning_curve
12
- from coding.llh.visualization.draw_line_graph import draw_line_graph
13
- from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
14
- from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
15
- from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
16
- from sklearn.ensemble import RandomForestRegressor
17
 
18
-
19
- def gradient_boosting_regression(feature_names, x, y, x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  info = {}
21
- model_name = "Double Exponential Smoothing Plus"
22
 
23
- model = GradientBoostingRegressor()
24
- params = {
25
- 'n_estimators': [50, 100, 150],
26
- 'learning_rate': [0.01, 0.1, 0.2],
27
- 'max_depth': [3, 5, 7],
28
- 'min_samples_split': [2, 5, 10],
29
- 'min_samples_leaf': [1, 2, 4]
30
- }
31
 
32
  if hyper_params_optimize == "grid_search":
33
- best_model = grid_search(params, model, x_train_and_validate, y_train_and_validate)
34
  elif hyper_params_optimize == "bayes_search":
35
- best_model = bayes_search(params, model, x_train_and_validate, y_train_and_validate)
36
  else:
37
- best_model = model
38
- best_model.fit(x, y)
39
-
40
- info["{} Params".format(model_name)] = best_model.get_params()
41
 
42
- y_pred = best_model.predict(x_test).reshape(-1, 1)
43
 
44
- # 0202:
 
 
45
 
46
- train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="r2")
47
 
48
  train_scores_mean = np.mean(train_scores, axis=1)
49
  train_scores_std = np.std(train_scores, axis=1)
50
  test_scores_mean = np.mean(test_scores, axis=1)
51
  test_scores_std = np.std(test_scores, axis=1)
 
 
52
 
53
- # 修正
54
- train_scores_mean[0] = 0.984
55
- test_scores_mean[1] = 0.89
56
- test_scores_mean[2] = 0.93
57
- test_scores_mean[3] = 0.97
58
- test_scores_mean[4] = 0.98
59
-
60
-
61
- # draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
62
-
63
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "logistic regression model residual plot")
64
-
65
- info.update(calculate_regression_metrics(y_pred, y_test, model_name))
66
- # info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
67
- # mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
68
 
69
- shap_calculate(best_model, x[:1000], feature_names)
 
 
70
 
71
- # return y_pred, info
72
- return y_pred, info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std
 
1
+ import numpy as np
2
  from sklearn.ensemble import GradientBoostingRegressor
 
 
 
3
  from sklearn.model_selection import learning_curve
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ from analysis.shap_model import draw_shap_beeswarm
6
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
7
+ from static.config import Config
8
+ from static.new_class import Container
9
+ from static.process import grid_search, bayes_search
10
+
11
+
12
+ class GradientBoostingParams:
13
+ @classmethod
14
+ def get_params(cls):
15
+ return {
16
+ 'n_estimators': [50, 100, 150],
17
+ 'learning_rate': [0.01, 0.1, 0.2],
18
+ 'max_depth': [3, 5, 7],
19
+ 'min_samples_split': [2, 5, 10],
20
+ 'min_samples_leaf': [1, 2, 4]
21
+ }
22
+
23
+
24
+ # 梯度提升回归
25
+ def gradient_boosting_regression(container: Container):
26
+ x_train = container.x_train
27
+ y_train = container.y_train
28
+ x_test = container.x_test
29
+ y_test = container.y_test
30
+ hyper_params_optimize = container.hyper_params_optimize
31
  info = {}
 
32
 
33
+ gradient_boosting_regression_model = GradientBoostingRegressor(random_state=Config.RANDOM_STATE)
34
+ params = GradientBoostingParams.get_params()
 
 
 
 
 
 
35
 
36
  if hyper_params_optimize == "grid_search":
37
+ best_model = grid_search(params, gradient_boosting_regression_model, x_train, y_train)
38
  elif hyper_params_optimize == "bayes_search":
39
+ best_model = bayes_search(params, gradient_boosting_regression_model, x_train, y_train)
40
  else:
41
+ best_model = gradient_boosting_regression_model
42
+ best_model.fit(x_train, y_train)
 
 
43
 
44
+ info["参数"] = best_model.get_params()
45
 
46
+ y_pred = best_model.predict(x_test)
47
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
48
+ container.set_y_pred(y_pred)
49
 
50
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
51
 
52
  train_scores_mean = np.mean(train_scores, axis=1)
53
  train_scores_std = np.std(train_scores, axis=1)
54
  test_scores_mean = np.mean(test_scores, axis=1)
55
  test_scores_std = np.std(test_scores, axis=1)
56
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
57
+ test_scores_std)
58
 
59
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ container.set_info(info)
62
+ container.set_status("trained")
63
+ container.set_model(best_model)
64
 
65
+ return container
 
analysis/kernel_model.py CHANGED
@@ -1,97 +1,119 @@
 
1
  from sklearn.model_selection import learning_curve
2
  from sklearn.svm import SVC
3
  from sklearn.svm import SVR
4
- import numpy as np
5
-
6
- from coding.llh.analysis.my_learning_curve import my_learning_curve
7
- from coding.llh.analysis.shap_model import shap_calculate
8
- from coding.llh.static.process import grid_search, bayes_search
9
- from coding.llh.visualization.draw_line_graph import draw_line_graph
10
- from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
11
- from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
12
- from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
13
-
14
 
15
- def svm_regression(feature_names, x, y, x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  info = {}
17
- model_name = "Support Vector Regression"
18
 
19
- model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
20
- params = {
21
- 'kernel': ['linear', 'rbf'],
22
- 'C': [0.1, 1, 10, 100],
23
- 'gamma': [0.01, 0.1, 1, 10],
24
- 'epsilon': [0.01, 0.1, 1]
25
- }
26
 
27
  if hyper_params_optimize == "grid_search":
28
- best_model = grid_search(params, model, x_train_and_validate, y_train_and_validate)
29
  elif hyper_params_optimize == "bayes_search":
30
- best_model = bayes_search(params, model, x_train_and_validate, y_train_and_validate)
31
  else:
32
- best_model = model
33
- best_model.fit(x, y)
34
 
35
- info["{} Params".format(model_name)] = best_model.get_params()
36
 
37
- y_pred = best_model.predict(x_test).reshape(-1, 1)
 
 
38
 
39
- # 0202:
40
-
41
- # train_sizes, train_scores, test_scores = my_learning_curve(best_model, x[:300], y[:300], cv=5)
42
- train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="r2")
43
 
44
  train_scores_mean = np.mean(train_scores, axis=1)
45
  train_scores_std = np.std(train_scores, axis=1)
46
  test_scores_mean = np.mean(test_scores, axis=1)
47
  test_scores_std = np.std(test_scores, axis=1)
 
 
48
 
49
- # 修正
50
- train_scores_mean[0] = 0.99
51
- test_scores_mean[0] = 0.02
52
-
53
- # draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
54
 
55
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "logistic regression model residual plot")
 
 
56
 
57
- info.update(calculate_regression_metrics(y_pred, y_test, model_name))
58
- # info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
59
- # mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
60
 
61
- # shap_calculate(best_model, x_test, feature_names)
62
 
63
- return y_pred, info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std
 
 
 
 
 
 
 
64
 
65
 
66
- # svm classification
67
- def svm_classification(x_train, y_train, x_test, y_test):
 
 
 
 
 
68
  info = {}
69
 
70
- # # Linear kernel SVM
71
- # svm_classification_model = SVC(kernel="linear")
72
- #
73
- # # Polynomial kernel SVM
74
- # svm_classification_model = SVC(kernel="poly")
75
- #
76
- # Radial base kernel SVM
77
- svm_classification_model = SVC(kernel="rbf")
78
 
79
- # # Sigmoid kernel SVM
80
- # svm_classification_model = SVC(kernel="rbf")
 
 
 
 
 
81
 
82
- svm_classification_model.fit(x_train, y_train)
83
 
84
- lr_intercept = svm_classification_model.intercept_
85
- info["Intercept of linear regression equation"] = lr_intercept
 
86
 
87
- lr_coef = svm_classification_model.coef_
88
- info["Coefficients of linear regression equation"] = lr_coef
89
 
90
- y_pred = svm_classification_model.predict(x_test)
 
 
 
 
 
91
 
92
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "linear regression model residual plot")
93
 
94
- info.update(calculate_regression_metrics(y_pred, y_test, "linear regression"))
95
- info.update(calculate_classification_metrics(y_pred, y_test, "linear regression"))
 
96
 
97
- return info
 
1
+ import numpy as np
2
  from sklearn.model_selection import learning_curve
3
  from sklearn.svm import SVC
4
  from sklearn.svm import SVR
 
 
 
 
 
 
 
 
 
 
5
 
6
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
7
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
8
+ from static.config import Config
9
+ from static.new_class import Container
10
+ from static.process import grid_search, bayes_search
11
+
12
+
13
+ class SVMRegressionParams:
14
+ @classmethod
15
+ def get_params(cls):
16
+ return {
17
+ 'kernel': ['linear', 'rbf'],
18
+ 'C': [0.1, 1, 10, 100],
19
+ 'gamma': [0.01, 0.1, 1, 10],
20
+ 'epsilon': [0.01, 0.1, 1]
21
+ }
22
+
23
+
24
+ # 支持向量机回归
25
+ def svm_regression(container: Container):
26
+ x_train = container.x_train
27
+ y_train = container.y_train
28
+ x_test = container.x_test
29
+ y_test = container.y_test
30
+ hyper_params_optimize = container.hyper_params_optimize
31
  info = {}
 
32
 
33
+ svm_regression_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
34
+ params = SVMRegressionParams.get_params()
 
 
 
 
 
35
 
36
  if hyper_params_optimize == "grid_search":
37
+ best_model = grid_search(params, svm_regression_model, x_train, y_train)
38
  elif hyper_params_optimize == "bayes_search":
39
+ best_model = bayes_search(params, svm_regression_model, x_train, y_train)
40
  else:
41
+ best_model = svm_regression_model
42
+ best_model.fit(x_train, y_train)
43
 
44
+ info["参数"] = best_model.get_params()
45
 
46
+ y_pred = best_model.predict(x_test)
47
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
48
+ container.set_y_pred(y_pred)
49
 
50
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
 
 
 
51
 
52
  train_scores_mean = np.mean(train_scores, axis=1)
53
  train_scores_std = np.std(train_scores, axis=1)
54
  test_scores_mean = np.mean(test_scores, axis=1)
55
  test_scores_std = np.std(test_scores, axis=1)
56
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
57
+ test_scores_std)
58
 
59
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
 
 
 
 
60
 
61
+ container.set_info(info)
62
+ container.set_status("trained")
63
+ container.set_model(best_model)
64
 
65
+ return container
 
 
66
 
 
67
 
68
+ class SVMClassifierParams:
69
+ @classmethod
70
+ def get_params(cls):
71
+ return {
72
+ "C": [0.1, 1, 10, 100],
73
+ "kernel": ['linear', 'rbf', 'poly'],
74
+ "gamma": [0.1, 1, 10]
75
+ }
76
 
77
 
78
+ # 支持向量机分类
79
+ def svm_classifier(container: Container):
80
+ x_train = container.x_train
81
+ y_train = container.y_train
82
+ x_test = container.x_test
83
+ y_test = container.y_test
84
+ hyper_params_optimize = container.hyper_params_optimize
85
  info = {}
86
 
87
+ svm_classifier_model = SVC(kernel="rbf")
88
+ params = SVMClassifierParams.get_params()
 
 
 
 
 
 
89
 
90
+ if hyper_params_optimize == "grid_search":
91
+ best_model = grid_search(params, svm_classifier_model, x_train, y_train)
92
+ elif hyper_params_optimize == "bayes_search":
93
+ best_model = bayes_search(params, svm_classifier_model, x_train, y_train)
94
+ else:
95
+ best_model = svm_classifier_model
96
+ best_model.fit(x_train, y_train)
97
 
98
+ info["参数"] = best_model.get_params()
99
 
100
+ y_pred = best_model.predict(x_test)
101
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
102
+ container.set_y_pred(y_pred)
103
 
104
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
 
105
 
106
+ train_scores_mean = np.mean(train_scores, axis=1)
107
+ train_scores_std = np.std(train_scores, axis=1)
108
+ test_scores_mean = np.mean(test_scores, axis=1)
109
+ test_scores_std = np.std(test_scores, axis=1)
110
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
111
+ test_scores_std)
112
 
113
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
114
 
115
+ container.set_info(info)
116
+ container.set_status("trained")
117
+ container.set_model(best_model)
118
 
119
+ return container
analysis/linear_model.py CHANGED
@@ -11,7 +11,23 @@ from sklearn.model_selection import learning_curve
11
  from static.process import grid_search, bayes_search
12
  from metrics.calculate_classification_metrics import calculate_classification_metrics
13
  from metrics.calculate_regression_metrics import calculate_regression_metrics
14
- from app import Container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  # 线性回归
@@ -24,28 +40,20 @@ def linear_regression(container: Container, model=None):
24
  info = {}
25
 
26
  if model == "Lasso":
27
- linear_regression_model = Lasso(alpha=0.1)
28
- params = {
29
- "fit_intercept": [True, False],
30
- "alpha": [0.001, 0.01, 0.1, 1.0, 10.0]
31
- }
32
  elif model == "Ridge":
33
- linear_regression_model = Ridge(alpha=0.1)
34
- params = {
35
- "fit_intercept": [True, False],
36
- "alpha": [0.001, 0.01, 0.1, 1.0, 10.0]
37
- }
38
  elif model == "ElasticNet":
39
- linear_regression_model = ElasticNet(alpha=0.1)
40
- params = {
41
- "fit_intercept": [True, False],
42
- "alpha": [0.001, 0.01, 0.1, 1.0, 10.0]
43
- }
44
  else:
45
  linear_regression_model = LinearRegression()
46
- params = {
47
- "fit_intercept": [True, False]
48
- }
49
 
50
  if hyper_params_optimize == "grid_search":
51
  best_model = grid_search(params, linear_regression_model, x_train, y_train)
@@ -55,13 +63,13 @@ def linear_regression(container: Container, model=None):
55
  best_model = linear_regression_model
56
  best_model.fit(x_train, y_train)
57
 
58
- info["linear regression Params"] = best_model.get_params()
59
-
60
- lr_intercept = best_model.intercept_
61
- info["Intercept of linear regression equation"] = lr_intercept
62
 
63
- lr_coef = best_model.coef_
64
- info["Coefficients of linear regression equation"] = lr_coef
 
 
 
65
 
66
  y_pred = best_model.predict(x_test)
67
  container.set_y_pred(y_pred)
@@ -72,9 +80,10 @@ def linear_regression(container: Container, model=None):
72
  train_scores_std = np.std(train_scores, axis=1)
73
  test_scores_mean = np.mean(test_scores, axis=1)
74
  test_scores_std = np.std(test_scores, axis=1)
75
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
 
76
 
77
- info.update(calculate_regression_metrics(y_pred, y_test, "linear regression"))
78
 
79
  container.set_info(info)
80
  container.set_status("trained")
@@ -83,6 +92,15 @@ def linear_regression(container: Container, model=None):
83
  return container
84
 
85
 
 
 
 
 
 
 
 
 
 
86
  # 多项式回归
87
  def polynomial_regression(container: Container):
88
  x_train = container.x_train
@@ -97,10 +115,7 @@ def polynomial_regression(container: Container):
97
 
98
  polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
99
  ("linear_regression_model", linear_regression_model)])
100
- params = {
101
- "polynomial_features__degree": [2, 3],
102
- "linear_regression_model__fit_intercept": [True, False]
103
- }
104
 
105
  if hyper_params_optimize == "grid_search":
106
  best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
@@ -110,16 +125,16 @@ def polynomial_regression(container: Container):
110
  best_model = polynomial_regression_model
111
  best_model.fit(x_train, y_train)
112
 
113
- info["polynomial regression Params"] = best_model.get_params()
114
-
115
- feature_names = best_model["polynomial_features"].get_feature_names_out()
116
- info["Feature names of polynomial regression"] = feature_names
117
 
118
- lr_intercept = best_model["linear_regression_model"].intercept_
119
- info["Intercept of polynomial regression equation"] = lr_intercept
120
-
121
- lr_coef = best_model["linear_regression_model"].coef_
122
- info["Coefficients of polynomial regression equation"] = lr_coef
 
 
 
123
 
124
  x_test_ = best_model["polynomial_features"].fit_transform(x_test)
125
  y_pred = best_model["linear_regression_model"].predict(x_test_)
@@ -133,7 +148,7 @@ def polynomial_regression(container: Container):
133
  test_scores_std = np.std(test_scores, axis=1)
134
  container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
135
 
136
- info.update(calculate_regression_metrics(y_pred, y_test, "polynomial regression"))
137
 
138
  container.set_info(info)
139
  container.set_status("trained")
@@ -142,7 +157,18 @@ def polynomial_regression(container: Container):
142
  return container
143
 
144
 
145
- # 逻辑斯谛回归
 
 
 
 
 
 
 
 
 
 
 
146
  def logistic_regression(container: Container):
147
  x_train = container.x_train
148
  y_train = container.y_train
@@ -151,12 +177,8 @@ def logistic_regression(container: Container):
151
  hyper_params_optimize = container.hyper_params_optimize
152
  info = {}
153
 
154
- logistic_regression_model = LogisticRegression()
155
- params = {
156
- "C": [0.001, 0.01, 0.1, 1.0, 10.0],
157
- "max_iter": [100, 200, 300],
158
- "solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"]
159
- }
160
 
161
  if hyper_params_optimize == "grid_search":
162
  best_model = grid_search(params, logistic_regression_model, x_train, y_train)
@@ -166,13 +188,13 @@ def logistic_regression(container: Container):
166
  best_model = logistic_regression_model
167
  best_model.fit(x_train, y_train)
168
 
169
- info["logistic regression Params"] = best_model.get_params()
170
 
171
- lr_intercept = best_model.intercept_
172
- info["Intercept of logistic regression equation"] = lr_intercept.tolist()
173
-
174
- lr_coef = best_model.coef_
175
- info["Coefficients of logistic regression equation"] = lr_coef.tolist()
176
 
177
  y_pred = best_model.predict(x_test)
178
  container.set_y_pred(y_pred)
@@ -183,9 +205,10 @@ def logistic_regression(container: Container):
183
  train_scores_std = np.std(train_scores, axis=1)
184
  test_scores_mean = np.mean(test_scores, axis=1)
185
  test_scores_std = np.std(test_scores, axis=1)
186
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
 
187
 
188
- info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
189
 
190
  container.set_info(info)
191
  container.set_status("trained")
 
11
  from static.process import grid_search, bayes_search
12
  from metrics.calculate_classification_metrics import calculate_classification_metrics
13
  from metrics.calculate_regression_metrics import calculate_regression_metrics
14
+ from static.new_class import *
15
+ from static.config import Config
16
+
17
+
18
+ class LinearRegressionParams:
19
+ @classmethod
20
+ def get_params(cls, sort):
21
+ if sort in ["Lasso", "Ridge", "ElasticNet"]:
22
+ return {
23
+ "fit_intercept": [True, False],
24
+ "alpha": [0.001, 0.01, 0.1, 1.0, 10.0],
25
+ "random_state": [Config.RANDOM_STATE]
26
+ }
27
+ else:
28
+ return {
29
+ "fit_intercept": [True, False]
30
+ }
31
 
32
 
33
  # 线性回归
 
40
  info = {}
41
 
42
  if model == "Lasso":
43
+ linear_regression_model = Lasso(alpha=0.1, random_state=Config.RANDOM_STATE)
44
+ params = LinearRegressionParams.get_params(model)
 
 
 
45
  elif model == "Ridge":
46
+ linear_regression_model = Ridge(alpha=0.1, random_state=Config.RANDOM_STATE)
47
+ params = LinearRegressionParams.get_params(model)
 
 
 
48
  elif model == "ElasticNet":
49
+ linear_regression_model = ElasticNet(alpha=0.1, random_state=Config.RANDOM_STATE)
50
+ params = LinearRegressionParams.get_params(model)
51
+ elif model == "LinearRegression":
52
+ linear_regression_model = LinearRegression()
53
+ params = LinearRegressionParams.get_params(model)
54
  else:
55
  linear_regression_model = LinearRegression()
56
+ params = LinearRegressionParams.get_params(model)
 
 
57
 
58
  if hyper_params_optimize == "grid_search":
59
  best_model = grid_search(params, linear_regression_model, x_train, y_train)
 
63
  best_model = linear_regression_model
64
  best_model.fit(x_train, y_train)
65
 
66
+ info["参数"] = best_model.get_params()
 
 
 
67
 
68
+ # lr_intercept = best_model.intercept_
69
+ # info["Intercept of linear regression equation"] = lr_intercept
70
+ #
71
+ # lr_coef = best_model.coef_
72
+ # info["Coefficients of linear regression equation"] = lr_coef
73
 
74
  y_pred = best_model.predict(x_test)
75
  container.set_y_pred(y_pred)
 
80
  train_scores_std = np.std(train_scores, axis=1)
81
  test_scores_mean = np.mean(test_scores, axis=1)
82
  test_scores_std = np.std(test_scores, axis=1)
83
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
84
+ test_scores_std)
85
 
86
+ info["参数"] = calculate_regression_metrics(y_pred, y_test)
87
 
88
  container.set_info(info)
89
  container.set_status("trained")
 
92
  return container
93
 
94
 
95
+ class PolynomialRegressionParams:
96
+ @classmethod
97
+ def get_params(cls):
98
+ return {
99
+ "polynomial_features__degree": [2, 3],
100
+ "linear_regression_model__fit_intercept": [True, False]
101
+ }
102
+
103
+
104
  # 多项式回归
105
  def polynomial_regression(container: Container):
106
  x_train = container.x_train
 
115
 
116
  polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
117
  ("linear_regression_model", linear_regression_model)])
118
+ params = PolynomialRegressionParams.get_params()
 
 
 
119
 
120
  if hyper_params_optimize == "grid_search":
121
  best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
 
125
  best_model = polynomial_regression_model
126
  best_model.fit(x_train, y_train)
127
 
128
+ info["参数"] = best_model.get_params()
 
 
 
129
 
130
+ # feature_names = best_model["polynomial_features"].get_feature_names_out()
131
+ # info["Feature names of polynomial regression"] = feature_names
132
+ #
133
+ # lr_intercept = best_model["linear_regression_model"].intercept_
134
+ # info["Intercept of polynomial regression equation"] = lr_intercept
135
+ #
136
+ # lr_coef = best_model["linear_regression_model"].coef_
137
+ # info["Coefficients of polynomial regression equation"] = lr_coef
138
 
139
  x_test_ = best_model["polynomial_features"].fit_transform(x_test)
140
  y_pred = best_model["linear_regression_model"].predict(x_test_)
 
148
  test_scores_std = np.std(test_scores, axis=1)
149
  container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
150
 
151
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
152
 
153
  container.set_info(info)
154
  container.set_status("trained")
 
157
  return container
158
 
159
 
160
+ class LogisticRegressionParams:
161
+ @classmethod
162
+ def get_params(cls):
163
+ return {
164
+ "C": [0.001, 0.01, 0.1, 1.0, 10.0],
165
+ "max_iter": [100, 200, 300],
166
+ "solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"],
167
+ "random_state": [Config.RANDOM_STATE]
168
+ }
169
+
170
+
171
+ # 逻辑斯谛分类
172
  def logistic_regression(container: Container):
173
  x_train = container.x_train
174
  y_train = container.y_train
 
177
  hyper_params_optimize = container.hyper_params_optimize
178
  info = {}
179
 
180
+ logistic_regression_model = LogisticRegression(random_state=Config.RANDOM_STATE)
181
+ params = LogisticRegressionParams.get_params()
 
 
 
 
182
 
183
  if hyper_params_optimize == "grid_search":
184
  best_model = grid_search(params, logistic_regression_model, x_train, y_train)
 
188
  best_model = logistic_regression_model
189
  best_model.fit(x_train, y_train)
190
 
191
+ info["参数"] = best_model.get_params()
192
 
193
+ # lr_intercept = best_model.intercept_
194
+ # info["Intercept of logistic regression equation"] = lr_intercept.tolist()
195
+ #
196
+ # lr_coef = best_model.coef_
197
+ # info["Coefficients of logistic regression equation"] = lr_coef.tolist()
198
 
199
  y_pred = best_model.predict(x_test)
200
  container.set_y_pred(y_pred)
 
205
  train_scores_std = np.std(train_scores, axis=1)
206
  test_scores_mean = np.mean(test_scores, axis=1)
207
  test_scores_std = np.std(test_scores, axis=1)
208
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
209
+ test_scores_std)
210
 
211
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
212
 
213
  container.set_info(info)
214
  container.set_status("trained")
analysis/others/__init__.py ADDED
File without changes
analysis/others/evaluation_model.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import skfuzzy as fuzz
3
+ from skfuzzy import control as ctrl
4
+ import matplotlib.pyplot as plt
5
+
6
+
7
+ def fuzzy_comprehensive_evaluation_model():
8
+ # 创建模糊变量和模糊集合
9
+ technical_skill = ctrl.Antecedent(np.arange(0, 101, 1), 'technical_skill')
10
+ physical_condition = ctrl.Antecedent(np.arange(0, 101, 1), 'physical_condition')
11
+ mental_toughness = ctrl.Antecedent(np.arange(0, 101, 1), 'mental_toughness')
12
+ opponent_strength = ctrl.Antecedent(np.arange(0, 101, 1), 'opponent_strength')
13
+
14
+ performance = ctrl.Consequent(np.arange(0, 101, 1), 'performance')
15
+
16
+ # 设定模糊隶属度函数
17
+ technical_skill['low'] = fuzz.trimf(technical_skill.universe, [0, 0, 50])
18
+ technical_skill['medium'] = fuzz.trimf(technical_skill.universe, [0, 50, 100])
19
+ technical_skill['high'] = fuzz.trimf(technical_skill.universe, [50, 100, 100])
20
+
21
+ physical_condition['low'] = fuzz.trimf(physical_condition.universe, [0, 0, 50])
22
+ physical_condition['medium'] = fuzz.trimf(physical_condition.universe, [0, 50, 100])
23
+ physical_condition['high'] = fuzz.trimf(physical_condition.universe, [50, 100, 100])
24
+
25
+ mental_toughness['low'] = fuzz.trimf(mental_toughness.universe, [0, 0, 50])
26
+ mental_toughness['medium'] = fuzz.trimf(mental_toughness.universe, [0, 50, 100])
27
+ mental_toughness['high'] = fuzz.trimf(mental_toughness.universe, [50, 100, 100])
28
+
29
+ opponent_strength['low'] = fuzz.trimf(opponent_strength.universe, [0, 0, 50])
30
+ opponent_strength['medium'] = fuzz.trimf(opponent_strength.universe, [0, 50, 100])
31
+ opponent_strength['high'] = fuzz.trimf(opponent_strength.universe, [50, 100, 100])
32
+
33
+ performance['poor'] = fuzz.trimf(performance.universe, [0, 0, 50])
34
+ performance['average'] = fuzz.trimf(performance.universe, [0, 50, 100])
35
+ performance['excellent'] = fuzz.trimf(performance.universe, [50, 100, 100])
36
+
37
+ # 设定输出的解模糊方法——质心解模糊方式
38
+ performance.defuzzify_method = 'centroid'
39
+
40
+ # 设定规则
41
+ rule1 = ctrl.Rule(
42
+ technical_skill['low'] | physical_condition['low'] | mental_toughness['low'] | opponent_strength['low'],
43
+ performance['poor']
44
+ )
45
+ rule2 = ctrl.Rule(
46
+ technical_skill['medium'] | physical_condition['medium'] | mental_toughness['medium'] | opponent_strength['medium'],
47
+ performance['average']
48
+ )
49
+ rule3 = ctrl.Rule(
50
+ technical_skill['high'] | physical_condition['high'] | mental_toughness['high'] | opponent_strength['high'],
51
+ performance['excellent']
52
+ )
53
+
54
+ # 创建控制系统
55
+ performance_evaluation = ctrl.ControlSystem([rule1, rule2, rule3])
56
+ performance_evaluator = ctrl.ControlSystemSimulation(performance_evaluation)
57
+
58
+ # 输入数据
59
+ performance_evaluator.input['technical_skill'] = 75
60
+ performance_evaluator.input['physical_condition'] = 80
61
+ performance_evaluator.input['mental_toughness'] = 85
62
+ performance_evaluator.input['opponent_strength'] = 60
63
+
64
+ # 计算模糊综合评分
65
+ performance_evaluator.compute()
66
+
67
+ # 输出结果
68
+ print("模糊综合评分:", performance_evaluator.output['performance'])
69
+
70
+ # 打印模糊集合的可视化图表
71
+ technical_skill.view("technical_skill", sim=performance_evaluator)
72
+ physical_condition.view("physical_condition", sim=performance_evaluator)
73
+ mental_toughness.view("mental_toughness", sim=performance_evaluator)
74
+ opponent_strength.view("opponent_strength", sim=performance_evaluator)
75
+ performance.view("performance", sim=performance_evaluator)
76
+
77
+ # Perform sensitivity analyze (to change input value)
78
+
79
+ # input_var_1:
80
+
81
+ # input_values = np.arange(0, 11, 1)
82
+ # output_values = []
83
+ #
84
+ # for val in input_values:
85
+ # fuzzy_control_sys_simulation.input["input_var_1"] = val
86
+ # fuzzy_control_sys_simulation.compute()
87
+ # output_values.append(fuzzy_control_sys_simulation.output["output_var"])
88
+ #
89
+ # plt.plot(
90
+ # input_values,
91
+ # output_values,
92
+ # label="Sensitivity Analysis"
93
+ # )
94
+ # plt.xlabel("Input Variable 1")
95
+ # plt.ylabel("Output Variable")
96
+ # plt.legend()
97
+ # plt.show()
98
+ #
99
+ # return fuzzy_control_sys_simulation.output["output_var"]
analysis/others/gaussian_model.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from sklearn.mixture import GaussianMixture
4
+
5
+
6
+ def gaussian_mix(x):
7
+ x = x.reshape(-1, 1)
8
+ n_components = 2000 # 你可以根据需要调整混合组件的数量
9
+ gmm = GaussianMixture(n_components=n_components, covariance_type='full')
10
+
11
+ # 拟合模型
12
+ gmm.fit(x)
13
+
14
+ # 预测每个数据点所属的组件
15
+ continuous_data = gmm.sample(len(x))[0].reshape(-1)
16
+
17
+ return continuous_data
18
+
19
+ # 使用高斯混合模型拟合数据
20
+ # gmm = GaussianMixture(n_components=50) # 选择混合成分的数量
21
+ # gmm.fit(x.reshape(-1, 1))
22
+
23
+ # 生成连续数据
24
+ # return np.linspace(min(x), max(x), len(x)).flatten()
25
+
26
+ # z = np.exp(gmm.score_samples(y.reshape(-1, 1)))
27
+
28
+ # return z
analysis/others/markov_model.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from hmmlearn import hmm
4
+
5
+
6
+ def train_and_predict_hidden_markov_model(df):
7
+ window_size = 10
8
+
9
+ # train_df = df[['point_won', 'point_loss', 'ace', 'winner', 'double_fault', 'unf_err', 'net_point', 'net_point_won', 'break_pt', 'break_pt_won', 'break_pt_miss']]
10
+
11
+ train_df = df
12
+ # "p1_winner",
13
+ # "p2_winner",
14
+ # "winner_shot_type",
15
+ # "p1_double_fault",
16
+ # "p2_double_fault",
17
+ # "p1_unf_err",
18
+ # "p2_unf_err",
19
+ # "p1_net_pt_won",
20
+ # "p2_net_pt_won",
21
+ # "p1_break_pt_won",
22
+ # "p2_break_pt_won",
23
+ # "rally_count",
24
+ # "serve_width",
25
+ # "serve_depth",
26
+ # "return_depth"
27
+ df["observation"] = 0
28
+
29
+ # mapping = {}
30
+ # counter = 0
31
+ # for i in range(len(train_df)):
32
+ # cur_combination = train_df.iloc[i].to_list()
33
+ #
34
+ # if str(cur_combination) not in mapping.keys():
35
+ # mapping[str(cur_combination)] = counter
36
+ # df.loc[i, "observation"] = counter
37
+ # counter += 1
38
+ # else:
39
+ # df.loc[i, "observation"] = mapping[str(cur_combination)]
40
+
41
+ observation_list = df["observation"].to_list()
42
+
43
+ # value_separated_observation_list = [observation_list[i - window_size: i] for i in range(window_size, len(observation_list))]
44
+ # value_separated_observation_list = [[0] * window_size] * window_size + value_separated_observation_list
45
+
46
+ observations = np.array([np.sum(np.array([train_df.iloc[j].to_list() for j in range(i-window_size, i)]).astype(int), axis=0) for i in range(window_size, len(train_df))])
47
+
48
+ observations = abs(np.min(observations)) + observations
49
+
50
+ observations = observations.astype(int)
51
+
52
+ m_observations = np.concatenate(
53
+ (np.array([observations[0].tolist()] * window_size), observations),
54
+ axis=0
55
+ )
56
+
57
+ df = pd.concat([df, pd.DataFrame({"window_observation": m_observations.tolist()})], axis=1)
58
+
59
+ hidden_markov_model = hmm.MultinomialHMM(n_components=5, n_iter=50, tol=0.01)
60
+
61
+ hidden_markov_model.fit(observations)
62
+
63
+ start_prob = hidden_markov_model.startprob_
64
+ transition_prob = hidden_markov_model.transmat_
65
+ emission_prob = hidden_markov_model.emissionprob_
66
+
67
+ neg_log_likelihood, pred = calculate_momentum(df, hidden_markov_model, m_observations)
68
+
69
+ _, hidden2observation = hidden_markov_model.score_samples(observations)
70
+
71
+ state_impacts = np.sum(hidden2observation, axis=0)
72
+
73
+ return state_impacts, neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
74
+
75
+ state_impacts = np.zeros((num_states, num_obs))
76
+
77
+ for t in range(num_obs):
78
+ for i in range(num_states):
79
+ state_impacts[i, t] = (forward_prob[t, i] * backward_prob[t, i]) / np.sum(
80
+ forward_prob[t, :] * backward_prob[t, :])
81
+
82
+ return neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
83
+
84
+
85
+ def calculate_momentum(df, hidden_markov_model, m_observations):
86
+ # pred_list = []
87
+ # neg_log_likelihood_list = []
88
+ # for i in range(len(df)):
89
+ # neg_log_likelihood, pred = hidden_markov_model.decode(np.array([df.loc[i, "window_observation"]]))
90
+ # pred_list.append(pred[0])
91
+ # neg_log_likelihood_list.append(neg_log_likelihood)
92
+ #
93
+ # return pred_list, neg_log_likelihood_list
94
+
95
+ neg_log_likelihood, pred = hidden_markov_model.decode(m_observations)
96
+
97
+ return neg_log_likelihood, pred
98
+
analysis/others/poly_model.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+
4
+
5
+ def poly_fit(x_values, y_values, degree=60):
6
+ # 使用 numpy 的 polyfit 函数进行多项式拟合
7
+ coefficients = np.polyfit(x_values, y_values, degree)
8
+
9
+ # 生成拟合的多项式函数
10
+ fitted_curve = np.poly1d(coefficients)
11
+
12
+ return fitted_curve(x_values)
analysis/shap_model.py CHANGED
@@ -1,15 +1,52 @@
1
  import matplotlib.pyplot as plt
2
-
3
  import shap
4
 
5
 
6
- def shap_calculate(model, x, feature_names, paint_object):
7
- explainer = shap.Explainer(model.predict, x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  shap_values = explainer(x)
9
 
10
- shap.summary_plot(shap_values, x, feature_names=feature_names, show=False)
11
 
12
  plt.title(paint_object.get_name())
 
13
 
14
  return plt, paint_object
15
 
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
3
  import shap
4
 
5
 
6
+ def draw_shap_beeswarm(model, x, feature_names, type, paint_object):
7
+ explainer = shap.KernelExplainer(model.predict, x)
8
+ shap_values = explainer(x)
9
+
10
+ shap.summary_plot(shap_values, x, feature_names=feature_names, plot_type=type, show=False)
11
+
12
+ plt.title(paint_object.get_name())
13
+ plt.tight_layout()
14
+
15
+ return plt, paint_object
16
+
17
+
18
+ def draw_waterfall(model, x, feature_names, number, paint_object):
19
+ explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
20
+ shap_values = explainer(x)
21
+
22
+ shap.waterfall_plot(shap_values[number], show=False)
23
+
24
+ plt.title(paint_object.get_name())
25
+ plt.tight_layout()
26
+
27
+ return plt, paint_object
28
+
29
+
30
+ def draw_force(model, x, feature_names, number, paint_object):
31
+ explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
32
+ shap_values = explainer(x[number])
33
+
34
+ shap.force_plot(explainer.expected_value, shap_values.values, feature_names=feature_names, show=False, matplotlib=True)
35
+
36
+ plt.title(paint_object.get_name())
37
+ plt.tight_layout()
38
+
39
+ return plt, paint_object
40
+
41
+
42
+ def draw_dependence(model, x, feature_names, col, paint_object):
43
+ explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
44
  shap_values = explainer(x)
45
 
46
+ shap.dependence_plot(feature_names.index(col), shap_values.values, x, feature_names=feature_names, show=False)
47
 
48
  plt.title(paint_object.get_name())
49
+ plt.tight_layout()
50
 
51
  return plt, paint_object
52
 
analysis/tree_model.py CHANGED
@@ -1,208 +1,290 @@
1
- from sklearn.tree import DecisionTreeClassifier
2
  from sklearn.ensemble import RandomForestClassifier
3
- from xgboost import XGBClassifier
4
- from sklearn.model_selection import learning_curve
5
- import numpy as np
6
-
7
- from coding.llh.analysis.shap_model import shap_calculate
8
- from coding.llh.static.config import Config
9
- from coding.llh.static.process import grid_search, bayes_search
10
- from coding.llh.visualization.draw_learning_curve import draw_learning_curve
11
- from coding.llh.visualization.draw_line_graph import draw_line_graph
12
- from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
13
- from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
14
- from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
15
  from sklearn.ensemble import RandomForestRegressor
16
-
17
-
18
- def random_forest_regression(feature_names, x, y, x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  info = {}
20
- model_name = "Random Forest Regression"
21
 
22
- model = RandomForestRegressor(n_estimators=5)
23
- params = {
24
- 'n_estimators': [10, 50, 100, 200],
25
- 'max_depth': [None, 10, 20, 30],
26
- 'min_samples_split': [2, 5, 10],
27
- 'min_samples_leaf': [1, 2, 4]
28
- }
29
 
30
  if hyper_params_optimize == "grid_search":
31
- best_model = grid_search(params, model, x_train_and_validate, y_train_and_validate)
32
  elif hyper_params_optimize == "bayes_search":
33
- best_model = bayes_search(params, model, x_train_and_validate, y_train_and_validate)
34
  else:
35
- best_model = model
36
- best_model.fit(x, y)
37
-
38
- info["{} Params".format(model_name)] = best_model.get_params()
39
 
40
- y_pred = best_model.predict(x_test).reshape(-1, 1)
41
 
 
 
 
42
 
43
-
44
- # 0202:
45
-
46
- train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="r2")
47
 
48
  train_scores_mean = np.mean(train_scores, axis=1)
49
  train_scores_std = np.std(train_scores, axis=1)
50
  test_scores_mean = np.mean(test_scores, axis=1)
51
  test_scores_std = np.std(test_scores, axis=1)
 
 
52
 
53
- # 修正
54
- train_scores_mean[0] = 0.98
55
 
56
- # draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
 
 
57
 
58
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "logistic regression model residual plot")
59
 
60
- info.update(calculate_regression_metrics(y_pred, y_test, model_name))
61
- # info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
62
- # mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
63
 
64
- # shap_calculate(best_model, x_test, feature_names)
 
 
 
 
 
 
 
 
 
65
 
66
- return y_pred, info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std
67
 
68
-
69
- # Decision tree classifier
70
- def decision_tree_classifier(x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
 
 
 
 
71
  info = {}
72
 
73
- decision_tree_classifier_model = DecisionTreeClassifier(random_state=Config.RANDOM_STATE)
74
- params = {
75
- "criterion": ["gini", "entropy"],
76
- "splitter": ["best", "random"],
77
- "max_depth": [None, 5, 10, 15],
78
- "min_samples_split": [2, 5, 10],
79
- "min_samples_leaf": [1, 2, 4]
80
- }
81
 
82
  if hyper_params_optimize == "grid_search":
83
- best_model = grid_search(params, decision_tree_classifier_model, x_train_and_validate, y_train_and_validate)
84
  elif hyper_params_optimize == "bayes_search":
85
- best_model = bayes_search(params, decision_tree_classifier_model, x_train_and_validate, y_train_and_validate)
86
  else:
87
- best_model = decision_tree_classifier_model
88
- for epoch in train_and_validate_data_list:
89
- # TODO
90
- x_train, x_validate, y_train, y_validate = epoch
91
 
92
- best_model.fit(x_train, y_train)
93
 
94
  y_pred = best_model.predict(x_test)
 
 
 
95
 
96
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "decision tree classifier model residual plot")
 
 
 
 
 
97
 
98
- info.update(calculate_regression_metrics(y_pred, y_test, "decision tree classifier"))
99
- info.update(calculate_classification_metrics(y_pred, y_test, "decision tree classifier"))
100
 
101
- return info
 
 
102
 
 
103
 
104
- # Random forest classifier
105
- def random_forest_classifier(x, y, x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  info = {}
107
 
108
- random_forest_classifier_model = RandomForestClassifier(random_state=Config.RANDOM_STATE)
109
- params = {
110
- "criterion": ["gini", "entropy"],
111
- "n_estimators": [50, 100, 150],
112
- "max_depth": [None, 5, 10, 15],
113
- "min_samples_split": [2, 5, 10],
114
- "min_samples_leaf": [1, 2, 4],
115
- "n_jobs": [-1]
116
- }
117
 
118
  if hyper_params_optimize == "grid_search":
119
- best_model = grid_search(params, random_forest_classifier_model, x_train_and_validate, y_train_and_validate)
120
  elif hyper_params_optimize == "bayes_search":
121
- best_model = bayes_search(params, random_forest_classifier_model, x_train_and_validate, y_train_and_validate)
122
  else:
123
  best_model = random_forest_classifier_model
124
- for epoch in train_and_validate_data_list:
125
- # TODO
126
- x_train, x_validate, y_train, y_validate = epoch
127
 
128
- best_model.fit(x_train, y_train)
129
-
130
- info["random forest Params"] = best_model.get_params()
131
 
132
  y_pred = best_model.predict(x_test)
 
 
133
 
134
- # 0202:
135
 
136
- train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="accuracy")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  train_scores_mean = np.mean(train_scores, axis=1)
139
  train_scores_std = np.std(train_scores, axis=1)
140
  test_scores_mean = np.mean(test_scores, axis=1)
141
  test_scores_std = np.std(test_scores, axis=1)
 
 
142
 
143
- # draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
144
 
145
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "random forest classifier model residual plot")
 
 
146
 
147
- # info.update(calculate_regression_metrics(y_pred, y_test, "random forest classifier"))
148
- # info.update(calculate_classification_metrics(y_pred, y_test, "random forest classifier"))
149
 
150
- f1_score, fpr, tpr, thresholds = calculate_classification_metrics(y_pred, y_test, "random forest")
151
 
152
- return info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std, f1_score, fpr, tpr, thresholds
 
 
 
153
 
154
 
155
- # xgboost classifier
156
- def xgboost_classifier(x, y, x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
 
 
 
 
 
157
  info = {}
158
 
159
- xgboost_classifier_model = XGBClassifier(random_state=Config.RANDOM_STATE)
160
- params = {
161
- "n_estimators": [50, 100, 150],
162
- "learning_rate": [0.01, 0.1, 0.2],
163
- "max_depth": [3, 4, 5],
164
- "min_child_weight": [1, 2, 3],
165
- "gamma": [0, 0.1, 0.2],
166
- "subsample": [0.8, 0.9, 1.0],
167
- "colsample_bytree": [0.8, 0.9, 1.0]
168
- }
169
 
170
  if hyper_params_optimize == "grid_search":
171
- best_model = grid_search(params, xgboost_classifier_model, x_train_and_validate, y_train_and_validate)
172
  elif hyper_params_optimize == "bayes_search":
173
- best_model = bayes_search(params, xgboost_classifier_model, x_train_and_validate, y_train_and_validate)
174
  else:
175
- best_model = xgboost_classifier_model
176
- for epoch in train_and_validate_data_list:
177
- # TODO
178
- x_train, x_validate, y_train, y_validate = epoch
179
 
180
- best_model.fit(x_train, y_train)
181
-
182
- info["xgboost Params"] = best_model.get_params()
183
 
184
  y_pred = best_model.predict(x_test)
 
 
185
 
186
- # 0202:
187
-
188
- train_sizes, train_scores, test_scores = learning_curve(best_model, x, y, cv=5, scoring="accuracy")
189
 
190
  train_scores_mean = np.mean(train_scores, axis=1)
191
  train_scores_std = np.std(train_scores, axis=1)
192
  test_scores_mean = np.mean(test_scores, axis=1)
193
  test_scores_std = np.std(test_scores, axis=1)
 
 
194
 
195
- # draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
196
-
197
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "xgboost classifier model residual plot")
198
-
199
- # info.update(calculate_regression_metrics(y_pred, y_test, "xgboost classifier"))
200
- # info.update(calculate_classification_metrics(y_pred, y_test, "xgboost classifier"))
201
-
202
- f1_score, fpr, tpr, thresholds = calculate_classification_metrics(y_pred, y_test, "xgboost")
203
 
204
- return info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std, f1_score, fpr, tpr, thresholds
 
 
205
 
 
206
 
207
 
208
 
 
1
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
2
  from sklearn.ensemble import RandomForestClassifier
 
 
 
 
 
 
 
 
 
 
 
 
3
  from sklearn.ensemble import RandomForestRegressor
4
+ from sklearn.model_selection import learning_curve
5
+ from sklearn.tree import DecisionTreeClassifier
6
+ from xgboost import XGBClassifier
7
+ import lightgbm as lightGBMClassifier
8
+
9
+ from analysis.shap_model import *
10
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
11
+ from static.config import Config
12
+ from static.process import grid_search, bayes_search
13
+ from static.new_class import *
14
+
15
+
16
+ class RandomForestRegressionParams:
17
+ @classmethod
18
+ def get_params(cls):
19
+ return {
20
+ 'n_estimators': [10, 50, 100, 200],
21
+ 'max_depth': [None, 10, 20, 30],
22
+ 'min_samples_split': [2, 5, 10],
23
+ 'min_samples_leaf': [1, 2, 4]
24
+ }
25
+
26
+
27
+ # 随机森林回归
28
+ def random_forest_regression(container: Container):
29
+ x_train = container.x_train
30
+ y_train = container.y_train
31
+ x_test = container.x_test
32
+ y_test = container.y_test
33
+ hyper_params_optimize = container.hyper_params_optimize
34
  info = {}
 
35
 
36
+ random_forest_regression_model = RandomForestRegressor(n_estimators=5, random_state=Config.RANDOM_STATE)
37
+ params = RandomForestRegressionParams.get_params()
 
 
 
 
 
38
 
39
  if hyper_params_optimize == "grid_search":
40
+ best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
41
  elif hyper_params_optimize == "bayes_search":
42
+ best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
43
  else:
44
+ best_model = random_forest_regression_model
45
+ best_model.fit(x_train, y_train)
 
 
46
 
47
+ info["参数"] = best_model.get_params()
48
 
49
+ y_pred = best_model.predict(x_test)
50
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
51
+ container.set_y_pred(y_pred)
52
 
53
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
 
 
 
54
 
55
  train_scores_mean = np.mean(train_scores, axis=1)
56
  train_scores_std = np.std(train_scores, axis=1)
57
  test_scores_mean = np.mean(test_scores, axis=1)
58
  test_scores_std = np.std(test_scores, axis=1)
59
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
60
+ test_scores_std)
61
 
62
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
 
63
 
64
+ container.set_info(info)
65
+ container.set_status("trained")
66
+ container.set_model(best_model)
67
 
68
+ return container
69
 
 
 
 
70
 
71
+ class DecisionTreeClassifierParams:
72
+ @classmethod
73
+ def get_params(cls):
74
+ return {
75
+ "criterion": ["gini", "entropy"],
76
+ "splitter": ["best", "random"],
77
+ "max_depth": [None, 5, 10, 15],
78
+ "min_samples_split": [2, 5, 10],
79
+ "min_samples_leaf": [1, 2, 4]
80
+ }
81
 
 
82
 
83
+ # 决策树分类
84
+ def decision_tree_classifier(container: Container):
85
+ x_train = container.x_train
86
+ y_train = container.y_train
87
+ x_test = container.x_test
88
+ y_test = container.y_test
89
+ hyper_params_optimize = container.hyper_params_optimize
90
  info = {}
91
 
92
+ random_forest_regression_model = DecisionTreeClassifier(random_state=Config.RANDOM_STATE)
93
+ params = DecisionTreeClassifierParams.get_params()
 
 
 
 
 
 
94
 
95
  if hyper_params_optimize == "grid_search":
96
+ best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
97
  elif hyper_params_optimize == "bayes_search":
98
+ best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
99
  else:
100
+ best_model = random_forest_regression_model
101
+ best_model.fit(x_train, y_train)
 
 
102
 
103
+ info["参数"] = best_model.get_params()
104
 
105
  y_pred = best_model.predict(x_test)
106
+ container.set_y_pred(y_pred)
107
+
108
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
109
 
110
+ train_scores_mean = np.mean(train_scores, axis=1)
111
+ train_scores_std = np.std(train_scores, axis=1)
112
+ test_scores_mean = np.mean(test_scores, axis=1)
113
+ test_scores_std = np.std(test_scores, axis=1)
114
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
115
+ test_scores_std)
116
 
117
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
 
118
 
119
+ container.set_info(info)
120
+ container.set_status("trained")
121
+ container.set_model(best_model)
122
 
123
+ return container
124
 
125
+
126
+ class RandomForestClassifierParams:
127
+ @classmethod
128
+ def get_params(cls):
129
+ return {
130
+ "criterion": ["gini", "entropy"],
131
+ "n_estimators": [50, 100, 150],
132
+ "max_depth": [None, 5, 10, 15],
133
+ "min_samples_split": [2, 5, 10],
134
+ "min_samples_leaf": [1, 2, 4]
135
+ }
136
+
137
+
138
+ # 随机森林分类
139
+ def random_forest_classifier(container: Container):
140
+ x_train = container.x_train
141
+ y_train = container.y_train
142
+ x_test = container.x_test
143
+ y_test = container.y_test
144
+ hyper_params_optimize = container.hyper_params_optimize
145
  info = {}
146
 
147
+ random_forest_classifier_model = RandomForestClassifier(n_estimators=5, random_state=Config.RANDOM_STATE)
148
+ params = RandomForestClassifierParams.get_params()
 
 
 
 
 
 
 
149
 
150
  if hyper_params_optimize == "grid_search":
151
+ best_model = grid_search(params, random_forest_classifier_model, x_train, y_train)
152
  elif hyper_params_optimize == "bayes_search":
153
+ best_model = bayes_search(params, random_forest_classifier_model, x_train, y_train)
154
  else:
155
  best_model = random_forest_classifier_model
156
+ best_model.fit(x_train, y_train)
 
 
157
 
158
+ info["参数"] = best_model.get_params()
 
 
159
 
160
  y_pred = best_model.predict(x_test)
161
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
162
+ container.set_y_pred(y_pred)
163
 
164
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
165
 
166
+ train_scores_mean = np.mean(train_scores, axis=1)
167
+ train_scores_std = np.std(train_scores, axis=1)
168
+ test_scores_mean = np.mean(test_scores, axis=1)
169
+ test_scores_std = np.std(test_scores, axis=1)
170
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
171
+ test_scores_std)
172
+
173
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
174
+
175
+ container.set_info(info)
176
+ container.set_status("trained")
177
+ container.set_model(best_model)
178
+
179
+ return container
180
+
181
+
182
+ class XgboostClassifierParams:
183
+ @classmethod
184
+ def get_params(cls):
185
+ return {
186
+ "n_estimators": [50, 100, 150],
187
+ "learning_rate": [0.01, 0.1, 0.2],
188
+ "max_depth": [3, 4, 5],
189
+ "min_child_weight": [1, 2, 3],
190
+ "gamma": [0, 0.1, 0.2],
191
+ "subsample": [0.5, 0.8, 0.9, 1.0],
192
+ "colsample_bytree": [0.8, 0.9, 1.0]
193
+ }
194
+
195
+
196
+ # xgboost分类
197
+ def xgboost_classifier(container: Container):
198
+ x_train = container.x_train
199
+ y_train = container.y_train
200
+ x_test = container.x_test
201
+ y_test = container.y_test
202
+ hyper_params_optimize = container.hyper_params_optimize
203
+ info = {}
204
+
205
+ xgboost_classifier_model = XGBClassifier(random_state=Config.RANDOM_STATE)
206
+ params = XgboostClassifierParams.get_params()
207
+
208
+ if hyper_params_optimize == "grid_search":
209
+ best_model = grid_search(params, xgboost_classifier_model, x_train, y_train)
210
+ elif hyper_params_optimize == "bayes_search":
211
+ best_model = bayes_search(params, xgboost_classifier_model, x_train, y_train)
212
+ else:
213
+ best_model = xgboost_classifier_model
214
+ best_model.fit(x_train, y_train)
215
+
216
+ info["参数"] = best_model.get_params()
217
+
218
+ y_pred = best_model.predict(x_test)
219
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
220
+ container.set_y_pred(y_pred)
221
+
222
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
223
 
224
  train_scores_mean = np.mean(train_scores, axis=1)
225
  train_scores_std = np.std(train_scores, axis=1)
226
  test_scores_mean = np.mean(test_scores, axis=1)
227
  test_scores_std = np.std(test_scores, axis=1)
228
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
229
+ test_scores_std)
230
 
231
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
232
 
233
+ container.set_info(info)
234
+ container.set_status("trained")
235
+ container.set_model(best_model)
236
 
237
+ return container
 
238
 
 
239
 
240
+ class LightGBMClassifierParams:
241
+ @classmethod
242
+ def get_params(cls):
243
+ return
244
 
245
 
246
+ # lightGBM分类
247
+ def lightGBM_classifier(container: Container):
248
+ x_train = container.x_train
249
+ y_train = container.y_train
250
+ x_test = container.x_test
251
+ y_test = container.y_test
252
+ hyper_params_optimize = container.hyper_params_optimize
253
  info = {}
254
 
255
+ lightgbm_classifier_model = lightGBMClassifier
256
+ params = LightGBMClassifierParams.get_params()
 
 
 
 
 
 
 
 
257
 
258
  if hyper_params_optimize == "grid_search":
259
+ best_model = grid_search(params, lightgbm_classifier_model, x_train, y_train)
260
  elif hyper_params_optimize == "bayes_search":
261
+ best_model = bayes_search(params, lightgbm_classifier_model, x_train, y_train)
262
  else:
263
+ best_model = lightgbm_classifier_model
264
+ best_model.train(x_train, y_train)
 
 
265
 
266
+ info["参数"] = best_model.get_params()
 
 
267
 
268
  y_pred = best_model.predict(x_test)
269
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
270
+ container.set_y_pred(y_pred)
271
 
272
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
 
 
273
 
274
  train_scores_mean = np.mean(train_scores, axis=1)
275
  train_scores_std = np.std(train_scores, axis=1)
276
  test_scores_mean = np.mean(test_scores, axis=1)
277
  test_scores_std = np.std(test_scores, axis=1)
278
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
279
+ test_scores_std)
280
 
281
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
 
 
 
 
 
 
 
282
 
283
+ container.set_info(info)
284
+ container.set_status("trained")
285
+ container.set_model(best_model)
286
 
287
+ return container
288
 
289
 
290
 
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import copy
 
2
  import os.path
 
3
 
4
  import gradio as gr
5
  import matplotlib.pyplot as plt
@@ -7,71 +9,98 @@ from sklearn import preprocessing
7
  from sklearn.model_selection import train_test_split
8
  import pandas as pd
9
 
10
- from analysis.shap_model import shap_calculate
 
 
 
 
 
 
 
11
  from static.process import *
12
  from analysis.linear_model import *
 
 
 
 
13
  from visualization.draw_learning_curve_total import draw_learning_curve_total
14
- from static.paint import *
15
 
16
  import warnings
17
 
18
  warnings.filterwarnings("ignore")
19
 
20
 
21
- class Container:
22
- def __init__(self, x_train=None, y_train=None, x_test=None, y_test=None, hyper_params_optimize=None):
23
- self.x_train = x_train
24
- self.y_train = y_train
25
- self.x_test = x_test
26
- self.y_test = y_test
27
- self.hyper_params_optimize = hyper_params_optimize
28
- self.info = dict()
29
- self.y_pred = None
30
- self.train_sizes = None
31
- self.train_scores_mean = None
32
- self.train_scores_std = None
33
- self.test_scores_mean = None
34
- self.test_scores_std = None
35
- self.status = None
36
- self.model = None
37
-
38
- def set_info(self, info: dict):
39
- self.info = info
40
-
41
- def set_y_pred(self, y_pred):
42
- self.y_pred = y_pred
43
-
44
- def get_learning_curve_values(self):
45
- return [
46
- self.train_sizes,
47
- self.train_scores_mean,
48
- self.train_scores_std,
49
- self.test_scores_mean,
50
- self.test_scores_std
51
- ]
52
-
53
- def set_learning_curve_values(self, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
54
- self.train_sizes = train_sizes
55
- self.train_scores_mean = train_scores_mean
56
- self.train_scores_std = train_scores_std
57
- self.test_scores_mean = test_scores_mean
58
- self.test_scores_std = test_scores_std
59
-
60
- def get_status(self):
61
- return self.status
62
-
63
- def set_status(self, status: str):
64
- self.status = status
65
-
66
- def get_model(self):
67
- return self.model
68
-
69
- def set_model(self, model):
70
- self.model = model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
 
73
  class StaticValue:
74
- max_num = 10
75
 
76
 
77
  class FilePath:
@@ -80,23 +109,48 @@ class FilePath:
80
 
81
  # [绘图]
82
  display_dataset = "current_excel_data"
83
- learning_curve_train_plot = "learning_curve_train_plot"
84
- learning_curve_validation_plot = "learning_curve_validation_plot"
 
 
 
85
  shap_beeswarm_plot = "shap_beeswarm_plot"
 
 
 
 
86
 
87
 
88
  class MN: # ModelName
89
  classification = "classification"
90
  regression = "regression"
91
 
92
- linear_regression = "linear_regression"
93
- polynomial_regression = "polynomial_regression"
94
- logistic_regression = "logistic_regression"
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # [绘图]
97
- learning_curve_train = "learning_curve_train"
98
- learning_curve_validation = "learning_curve_validation"
 
 
99
  shap_beeswarm = "shap_beeswarm"
 
 
 
 
100
 
101
 
102
  class LN: # LabelName
@@ -119,11 +173,16 @@ class LN: # LabelName
119
  standardize_data_button = "标准化 [可选]"
120
  select_as_y_radio = "选择因变量 [必选]"
121
  choose_assign_radio = "选择任务类型(同时会根据任务类型将第1列数据强制转换)[必选]"
122
- linear_regression_model_radio = "选择线性回归的模型"
123
  model_optimize_radio = "选择超参数优化方法"
124
  model_train_button = "训练"
 
 
125
  select_as_model_radio = "选择所需训练的模型"
126
 
 
 
 
 
127
  title_name_textbox = "标题"
128
  x_label_textbox = "x 轴名称"
129
  y_label_textbox = "y 轴名称"
@@ -131,15 +190,41 @@ class LN: # LabelName
131
  labels = ["图例 {}".format(i) for i in range(StaticValue.max_num)]
132
 
133
  # [绘图]
134
- learning_curve_checkboxgroup = "选择所需绘制学习曲线的模型"
135
- learning_curve_train_button = "绘制训练集学习曲线"
136
- learning_curve_validation_button = "绘制验证集学习曲线"
137
- shap_beeswarm_radio = "选择所需绘制蜂群特征图的模型"
138
- shap_beeswarm_button = "绘制蜂群特征图"
139
-
140
- learning_curve_train_plot = "训练集学习曲线"
141
- learning_curve_validation_plot = "验证集学习曲线"
142
- shap_beeswarm_plot = "蜂群特征图"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
 
144
 
145
  def get_return_extra(is_visible, extra_gr_dict: dict = None):
@@ -190,10 +275,11 @@ def get_outputs():
190
  standardize_data_checkboxgroup,
191
  standardize_data_button,
192
  select_as_y_radio,
193
- linear_regression_model_radio,
194
  model_optimize_radio,
195
  model_train_button,
196
  model_train_checkbox,
 
 
197
  select_as_model_radio,
198
  choose_assign_radio,
199
  display_dataset,
@@ -203,12 +289,37 @@ def get_outputs():
203
  x_label_textbox,
204
  y_label_textbox,
205
 
 
 
 
 
206
  # [绘图]
 
 
 
 
 
 
 
 
 
 
207
  learning_curve_checkboxgroup,
208
- learning_curve_train_button,
209
- learning_curve_validation_button,
210
  shap_beeswarm_radio,
 
211
  shap_beeswarm_button,
 
 
 
 
 
 
 
 
 
 
 
212
  }
213
 
214
  gr_set.update(set(colorpickers))
@@ -245,11 +356,10 @@ def get_return(is_visible, extra_gr_dict: dict = None):
245
 
246
  select_as_model_radio: gr.Radio(Dataset.get_model_list(), visible=Dataset.check_before_train(), label=LN.select_as_model_radio),
247
  model_optimize_radio: gr.Radio(Dataset.get_optimize_list(), visible=Dataset.check_before_train(), label=LN.model_optimize_radio),
248
-
249
- linear_regression_model_radio: gr.Radio(Dataset.get_linear_regression_model_list(), visible=Dataset.get_linear_regression_mark(), label=LN.linear_regression_model_radio),
250
-
251
  model_train_button: gr.Button(LN.model_train_button, visible=Dataset.check_before_train()),
252
  model_train_checkbox: gr.Checkbox(Dataset.get_model_container_status(), visible=Dataset.check_select_model(), label=Dataset.get_model_label()),
 
 
253
 
254
  draw_plot: gr.Plot(visible=False),
255
  draw_file: gr.File(visible=False),
@@ -257,12 +367,38 @@ def get_return(is_visible, extra_gr_dict: dict = None):
257
  x_label_textbox: gr.Textbox(visible=False),
258
  y_label_textbox: gr.Textbox(visible=False),
259
 
 
 
 
 
260
  # [绘图]
 
 
 
 
 
 
 
 
 
 
261
  learning_curve_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.learning_curve_checkboxgroup),
262
- learning_curve_train_button: gr.Button(LN.learning_curve_train_button, visible=Dataset.check_before_train()),
263
- learning_curve_validation_button: gr.Button(LN.learning_curve_validation_button, visible=Dataset.check_before_train()),
264
  shap_beeswarm_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_radio),
 
265
  shap_beeswarm_button: gr.Button(LN.shap_beeswarm_button, visible=Dataset.check_before_train()),
 
 
 
 
 
 
 
 
 
 
 
 
266
  }
267
 
268
  gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
@@ -295,10 +431,11 @@ def get_return(is_visible, extra_gr_dict: dict = None):
295
  standardize_data_checkboxgroup: gr.Checkboxgroup(visible=False),
296
  standardize_data_button: gr.Button(visible=False),
297
  select_as_y_radio: gr.Radio(visible=False),
298
- linear_regression_model_radio: gr.Radio(visible=False),
299
  model_optimize_radio: gr.Radio(visible=False),
300
  model_train_button: gr.Button(visible=False),
301
  model_train_checkbox: gr.Checkbox(visible=False),
 
 
302
  select_as_model_radio: gr.Radio(visible=False),
303
  choose_assign_radio: gr.Radio(visible=False),
304
 
@@ -308,12 +445,37 @@ def get_return(is_visible, extra_gr_dict: dict = None):
308
  x_label_textbox: gr.Textbox(visible=False),
309
  y_label_textbox: gr.Textbox(visible=False),
310
 
 
 
 
 
311
  # [绘图]
 
 
 
 
 
 
 
 
 
 
312
  learning_curve_checkboxgroup: gr.Checkboxgroup(visible=False),
313
- learning_curve_train_button: gr.Button(visible=False),
314
- learning_curve_validation_button: gr.Button(visible=False),
315
  shap_beeswarm_radio: gr.Radio(visible=False),
 
316
  shap_beeswarm_button: gr.Button(visible=False),
 
 
 
 
 
 
 
 
 
 
 
317
  }
318
 
319
  gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
@@ -336,17 +498,49 @@ class Dataset:
336
  cur_model = ""
337
  select_y_mark = False
338
 
 
 
 
 
 
339
  container_dict = {
 
340
  MN.linear_regression: Container(),
341
  MN.polynomial_regression: Container(),
342
  MN.logistic_regression: Container(),
 
 
 
 
 
 
 
 
 
 
 
343
  }
344
 
345
  visualize = ""
346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  @classmethod
348
  def get_dataset_list(cls):
349
- return ["Iris Dataset", "Wine Dataset", "Breast Cancer Dataset", "自定义"]
350
 
351
  @classmethod
352
  def get_col_list(cls):
@@ -545,8 +739,7 @@ class Dataset:
545
 
546
  for i, col in enumerate(cls.data.columns.values):
547
  if i == 0:
548
- if not (all(isinstance(x, str) for x in cls.data.iloc[:, 0]) or all(
549
- isinstance(x, float) for x in cls.data.iloc[:, 0])):
550
  return False
551
  else:
552
  if cls.data[col].dtype.name != "float64":
@@ -576,12 +769,20 @@ class Dataset:
576
  def get_linear_regression_model_list(cls):
577
  return ["线性回归", "Lasso回归", "Ridge回归", "弹性网络回归"]
578
 
 
 
 
 
579
  @classmethod
580
  def get_linear_regression_model_name_mapping(cls):
581
  return dict(zip(cls.get_linear_regression_model_list(), ["LinearRegression", "Lasso", "Ridge", "ElasticNet"]))
582
 
583
  @classmethod
584
- def train_model(cls, optimize, linear_regression_model_type=None):
 
 
 
 
585
  optimize = cls.get_optimize_name_mapping()[optimize]
586
 
587
  data_copy = cls.data
@@ -596,12 +797,37 @@ class Dataset:
596
  )
597
  container = Container(x_train, y_train, x_test, y_test, optimize)
598
 
 
599
  if cls.cur_model == MN.linear_regression:
600
- container = linear_regression(container, cls.get_linear_regression_model_name_mapping()[linear_regression_model_type])
 
601
  elif cls.cur_model == MN.polynomial_regression:
602
  container = polynomial_regression(container)
603
  elif cls.cur_model == MN.logistic_regression:
604
  container = logistic_regression(container)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
 
606
  cls.container_dict[cls.cur_model] = container
607
 
@@ -621,9 +847,11 @@ class Dataset:
621
  def get_model_name(cls):
622
  return [x for x in cls.container_dict.keys()]
623
 
 
624
  @classmethod
625
  def get_model_chinese_name(cls):
626
- return ["线性回归", "多项式回归", "逻辑斯谛分类"]
 
627
 
628
  @classmethod
629
  def get_model_name_mapping(cls):
@@ -646,46 +874,237 @@ class Dataset:
646
  @classmethod
647
  def draw_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
648
  # [绘图]
649
- if cls.visualize == MN.learning_curve_train:
650
- return cls.draw_learning_curve_train_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
651
- elif cls.visualize == MN.learning_curve_validation:
652
- return cls.draw_learning_curve_validation_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
653
  elif cls.visualize == MN.shap_beeswarm:
654
  return cls.draw_shap_beeswarm_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
 
656
  @classmethod
657
- def draw_learning_curve_train_plot(cls, model_list, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
658
- learning_curve_dict = {}
 
 
 
659
 
660
- for model_name in model_list:
661
- model_name = cls.get_model_name_mapping_reverse()[model_name]
662
- learning_curve_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
 
 
663
 
664
- color_cur_list = Config.COLORS if is_default else color_list
665
- label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
666
- x_cur_label = "Train Sizes" if is_default else x_label
667
- y_cur_label = "Accuracy" if is_default else y_label
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  cur_name = "" if is_default else name
669
 
670
  paint_object = PaintObject()
671
  paint_object.set_color_cur_list(color_cur_list)
672
- paint_object.set_label_cur_list(label_cur_list)
673
  paint_object.set_x_cur_label(x_cur_label)
674
  paint_object.set_y_cur_label(y_cur_label)
675
  paint_object.set_name(cur_name)
676
 
677
- return draw_learning_curve_total(learning_curve_dict, "train", paint_object)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
678
 
679
  @classmethod
680
- def draw_learning_curve_validation_plot(cls, model_list, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
681
- learning_curve_dict = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
682
 
683
  for model_name in model_list:
684
  model_name = cls.get_model_name_mapping_reverse()[model_name]
685
- learning_curve_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
686
 
687
  color_cur_list = Config.COLORS if is_default else color_list
688
- label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
 
 
 
 
 
 
 
689
  x_cur_label = "Train Sizes" if is_default else x_label
690
  y_cur_label = "Accuracy" if is_default else y_label
691
  cur_name = "" if is_default else name
@@ -697,10 +1116,15 @@ class Dataset:
697
  paint_object.set_y_cur_label(y_cur_label)
698
  paint_object.set_name(cur_name)
699
 
700
- return draw_learning_curve_total(learning_curve_dict, "validation", paint_object)
 
 
 
701
 
702
  @classmethod
703
- def draw_shap_beeswarm_plot(cls, model_name, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
 
 
704
  model_name = cls.get_model_name_mapping_reverse()[model_name]
705
  container = cls.container_dict[model_name]
706
 
@@ -717,17 +1141,65 @@ class Dataset:
717
  # paint_object.set_y_cur_label(y_cur_label)
718
  paint_object.set_name(cur_name)
719
 
720
- return shap_calculate(container.get_model(), container.x_train, cls.data.columns.values, paint_object)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
 
722
  @classmethod
723
  def get_file(cls):
724
  # [绘图]
725
- if cls.visualize == MN.learning_curve_train:
726
- return FilePath.png_base.format(FilePath.learning_curve_train_plot)
727
- elif cls.visualize == MN.learning_curve_validation:
728
- return FilePath.png_base.format(FilePath.learning_curve_validation_plot)
729
  elif cls.visualize == MN.shap_beeswarm:
730
  return FilePath.png_base.format(FilePath.shap_beeswarm_plot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
731
 
732
  @classmethod
733
  def check_file(cls):
@@ -757,6 +1229,10 @@ class Dataset:
757
  def get_linear_regression_mark(cls):
758
  return True if cls.cur_model == MN.linear_regression else False
759
 
 
 
 
 
760
  @classmethod
761
  def get_assign_list(cls):
762
  return ["分类", "回归"]
@@ -803,6 +1279,99 @@ class Dataset:
803
 
804
  return true_list + [gr.Textbox(visible=False)] * (StaticValue.max_num - cur_num)
805
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
806
 
807
  def choose_assign(assign: str):
808
  Dataset.choose_assign(assign)
@@ -817,29 +1386,94 @@ def select_as_model(model_name: str):
817
 
818
 
819
  # [绘图]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820
  def shap_beeswarm_first_draw_plot(*inputs):
821
  Dataset.visualize = MN.shap_beeswarm
822
  return first_draw_plot(inputs)
823
 
824
 
825
- def learning_curve_validation_first_draw_plot(*inputs):
826
- Dataset.visualize = MN.learning_curve_validation
827
  return first_draw_plot(inputs)
828
 
829
 
830
- def learning_curve_train_first_draw_plot(*inputs):
831
- Dataset.visualize = MN.learning_curve_train
832
- return first_draw_plot(inputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
833
 
834
 
835
  def first_draw_plot(inputs):
836
- select_model = inputs[0]
 
837
  x_label = ""
838
  y_label = ""
839
  name = ""
840
  color_list = []
841
  label_list = []
842
 
 
 
 
 
 
 
 
 
 
 
843
  cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
844
 
845
  return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
@@ -857,16 +1491,37 @@ def non_first_draw_plot(inputs):
857
  label_list = list(inputs[StaticValue.max_num+3: 2*StaticValue.max_num+3])
858
  start_index = 2*StaticValue.max_num+3
859
 
 
 
860
  # 绘图
861
- if Dataset.visualize == MN.learning_curve_train:
862
- select_model = inputs[start_index]
863
- elif Dataset.visualize == MN.learning_curve_validation:
864
- select_model = inputs[start_index]
865
  elif Dataset.visualize == MN.shap_beeswarm:
866
- select_model = inputs[start_index+1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
 
868
  else:
869
- select_model = inputs[start_index: start_index+1]
870
 
871
  cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, False)
872
 
@@ -877,15 +1532,34 @@ def first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object):
877
  extra_gr_dict = {}
878
 
879
  # [绘图]
880
- if Dataset.visualize == MN.learning_curve_train:
881
- cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_train_plot), dpi=300)
882
- extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_train_plot)})
883
- elif Dataset.visualize == MN.learning_curve_validation:
884
- cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_validation_plot), dpi=300)
885
- extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_validation_plot)})
886
  elif Dataset.visualize == MN.shap_beeswarm:
887
  cur_plt.savefig(FilePath.png_base.format(FilePath.shap_beeswarm_plot), dpi=300)
888
  extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.shap_beeswarm_plot)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
 
890
  extra_gr_dict.update(dict(zip(colorpickers, Dataset.colorpickers_change(paint_object))))
891
  extra_gr_dict.update(dict(zip(color_textboxs, Dataset.color_textboxs_change(paint_object))))
@@ -897,8 +1571,15 @@ def first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object):
897
  return get_return_extra(True, extra_gr_dict)
898
 
899
 
900
- def train_model(optimize, linear_regression_model_type):
901
- Dataset.train_model(optimize, linear_regression_model_type)
 
 
 
 
 
 
 
902
 
903
  return get_return(True)
904
 
@@ -924,9 +1605,7 @@ def change_data_type_to_float():
924
  def encode_label(col_list: list):
925
  Dataset.encode_label(col_list)
926
 
927
- return get_return(True, {
928
- display_encode_label_dataframe: gr.Dataframe(Dataset.get_str2int_mappings_df(), type="pandas", visible=True,
929
- label=LN.display_encode_label_dataframe)})
930
 
931
 
932
  def del_duplicate():
@@ -981,7 +1660,7 @@ def choose_custom_dataset(file: str):
981
  return get_return(True, {choose_custom_dataset_file: gr.File(Dataset.file, visible=True)})
982
 
983
 
984
- with gr.Blocks() as demo:
985
  '''
986
  组件
987
  '''
@@ -1031,24 +1710,67 @@ with gr.Blocks() as demo:
1031
 
1032
  # 数据模型
1033
  with gr.Accordion("数据模型"):
 
1034
  select_as_model_radio = gr.Radio(visible=False)
1035
  linear_regression_model_radio = gr.Radio(visible=False)
 
1036
  model_optimize_radio = gr.Radio(visible=False)
1037
  model_train_button = gr.Button(visible=False)
1038
  model_train_checkbox = gr.Checkbox(visible=False)
 
 
1039
 
1040
  # 可视化
1041
  with gr.Accordion("数据可视化"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1042
  with gr.Tab("学习曲线图"):
1043
  learning_curve_checkboxgroup = gr.Checkboxgroup(visible=False)
1044
- with gr.Row():
1045
- learning_curve_train_button = gr.Button(visible=False)
1046
- learning_curve_validation_button = gr.Button(visible=False)
 
 
1047
 
1048
- with gr.Tab("蜂群特征图"):
1049
  shap_beeswarm_radio = gr.Radio(visible=False)
 
1050
  shap_beeswarm_button = gr.Button(visible=False)
1051
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1052
  legend_labels_textboxs = []
1053
  with gr.Accordion("图例"):
1054
  with gr.Row():
@@ -1077,6 +1799,9 @@ with gr.Blocks() as demo:
1077
  draw_plot = gr.Plot(visible=False)
1078
  draw_file = gr.File(visible=False)
1079
 
 
 
 
1080
  '''
1081
  监听事件
1082
  '''
@@ -1108,26 +1833,53 @@ with gr.Blocks() as demo:
1108
 
1109
  # 数据模型
1110
  select_as_model_radio.change(fn=select_as_model, inputs=[select_as_model_radio], outputs=get_outputs())
1111
- model_train_button.click(fn=train_model, inputs=[model_optimize_radio, linear_regression_model_radio], outputs=get_outputs())
 
 
 
 
1112
 
1113
  # 可视化
1114
- learning_curve_train_button.click(fn=learning_curve_train_first_draw_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
1115
- learning_curve_validation_button.click(fn=learning_curve_validation_first_draw_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
1116
- shap_beeswarm_button.click(fn=shap_beeswarm_first_draw_plot, inputs=[shap_beeswarm_radio], outputs=get_outputs())
 
 
 
 
 
 
1117
 
1118
  title_name_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1119
- + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 
 
 
1120
  x_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1121
- + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 
 
 
1122
  y_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1123
- + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 
 
 
1124
  for i in range(StaticValue.max_num):
1125
  colorpickers[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1126
- + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 
 
 
1127
  color_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + color_textboxs + legend_labels_textboxs
1128
- + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 
 
 
1129
  legend_labels_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1130
- + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 
 
1131
 
1132
  if __name__ == "__main__":
1133
  demo.launch()
 
1
  import copy
2
+ import math
3
  import os.path
4
+ import random
5
 
6
  import gradio as gr
7
  import matplotlib.pyplot as plt
 
9
  from sklearn.model_selection import train_test_split
10
  import pandas as pd
11
 
12
+ from analysis.bayes_model import *
13
+ from analysis.distance_model import *
14
+ from analysis.gradient_model import *
15
+ from analysis.kernel_model import *
16
+ from analysis.shap_model import *
17
+ from analysis.tree_model import *
18
+ from metrics.calculate_classification_metrics import ClassificationMetrics
19
+ from metrics.calculate_regression_metrics import RegressionMetrics
20
  from static.process import *
21
  from analysis.linear_model import *
22
+ from visualization.draw_boxplot import draw_boxplot
23
+ from visualization.draw_data_fit_total import draw_data_fit_total
24
+ from visualization.draw_heat_map import draw_heat_map
25
+ from visualization.draw_histogram import draw_histogram
26
  from visualization.draw_learning_curve_total import draw_learning_curve_total
27
+ from static.new_class import *
28
 
29
  import warnings
30
 
31
  warnings.filterwarnings("ignore")
32
 
33
 
34
+ # [模型]
35
+ class ChooseModelMetrics:
36
+ @classmethod
37
+ def choose(cls, cur_model):
38
+ if cur_model == MN.linear_regression:
39
+ return RegressionMetrics.get_metrics()
40
+ elif cur_model == MN.polynomial_regression:
41
+ return RegressionMetrics.get_metrics()
42
+ elif cur_model == MN.logistic_regression:
43
+ return ClassificationMetrics.get_metrics()
44
+ elif cur_model == MN.decision_tree_classifier:
45
+ return ClassificationMetrics.get_metrics()
46
+ elif cur_model == MN.random_forest_classifier:
47
+ return ClassificationMetrics.get_metrics()
48
+ elif cur_model == MN.random_forest_regression:
49
+ return RegressionMetrics.get_metrics()
50
+ elif cur_model == MN.xgboost_classifier:
51
+ return ClassificationMetrics.get_metrics()
52
+ elif cur_model == MN.lightGBM_classifier:
53
+ return ClassificationMetrics.get_metrics()
54
+ elif cur_model == MN.gradient_boosting_regression:
55
+ return RegressionMetrics.get_metrics()
56
+ elif cur_model == MN.svm_classifier:
57
+ return ClassificationMetrics.get_metrics()
58
+ elif cur_model == MN.svm_regression:
59
+ return RegressionMetrics.get_metrics()
60
+ elif cur_model == MN.knn_classifier:
61
+ return ClassificationMetrics.get_metrics()
62
+ elif cur_model == MN.knn_regression:
63
+ return RegressionMetrics.get_metrics()
64
+ elif cur_model == MN.naive_bayes_classification:
65
+ return ClassificationMetrics.get_metrics()
66
+
67
+
68
+ # [模型]
69
+ class ChooseModelParams:
70
+ @classmethod
71
+ def choose(cls, cur_model):
72
+ if cur_model == MN.linear_regression:
73
+ return LinearRegressionParams.get_params(Dataset.linear_regression_model_type)
74
+ elif cur_model == MN.polynomial_regression:
75
+ return PolynomialRegressionParams.get_params()
76
+ elif cur_model == MN.logistic_regression:
77
+ return LogisticRegressionParams.get_params()
78
+ elif cur_model == MN.decision_tree_classifier:
79
+ return DecisionTreeClassifierParams.get_params()
80
+ elif cur_model == MN.random_forest_classifier:
81
+ return RandomForestClassifierParams.get_params()
82
+ elif cur_model == MN.random_forest_regression:
83
+ return RandomForestRegressionParams.get_params()
84
+ elif cur_model == MN.xgboost_classifier:
85
+ return XgboostClassifierParams.get_params()
86
+ elif cur_model == MN.lightGBM_classifier:
87
+ return LightGBMClassifierParams.get_params()
88
+ elif cur_model == MN.gradient_boosting_regression:
89
+ return GradientBoostingParams.get_params()
90
+ elif cur_model == MN.svm_classifier:
91
+ return SVMClassifierParams.get_params()
92
+ elif cur_model == MN.svm_regression:
93
+ return SVMRegressionParams.get_params()
94
+ elif cur_model == MN.knn_classifier:
95
+ return KNNClassifierParams.get_params()
96
+ elif cur_model == MN.knn_regression:
97
+ return KNNRegressionParams.get_params()
98
+ elif cur_model == MN.naive_bayes_classification:
99
+ return NaiveBayesClassifierParams.get_params(Dataset.naive_bayes_classifier_model_type)
100
 
101
 
102
  class StaticValue:
103
+ max_num = 20
104
 
105
 
106
  class FilePath:
 
109
 
110
  # [绘图]
111
  display_dataset = "current_excel_data"
112
+
113
+ data_distribution_plot = "data_distribution_plot"
114
+ descriptive_indicators_plot = "descriptive_indicators_plot"
115
+ heatmap_plot = "heatmap_plot"
116
+ learning_curve_plot = "learning_curve_plot"
117
  shap_beeswarm_plot = "shap_beeswarm_plot"
118
+ data_fit_plot = "data_fit_plot"
119
+ waterfall_plot = "waterfall_plot"
120
+ force_plot = "force_plot"
121
+ dependence_plot = "dependence_plot"
122
 
123
 
124
  class MN: # ModelName
125
  classification = "classification"
126
  regression = "regression"
127
 
128
+ # [模型]
129
+ linear_regression = "linear regressor"
130
+ polynomial_regression = "polynomial regressor"
131
+ logistic_regression = "logistic regressor"
132
+ decision_tree_classifier = "decision tree classifier"
133
+ random_forest_classifier = "random forest classifier"
134
+ random_forest_regression = "random forest regressor"
135
+ xgboost_classifier = "xgboost classifier"
136
+ lightGBM_classifier = "lightGBM classifier"
137
+ gradient_boosting_regression = "gradient boosting regressor"
138
+ svm_classifier = "svm classifier"
139
+ svm_regression = "svm regressor"
140
+ knn_classifier = "knn classifier"
141
+ knn_regression = "knn regressor"
142
+ naive_bayes_classification = "naive bayes classification"
143
 
144
  # [绘图]
145
+ data_distribution = "data_distribution"
146
+ descriptive_indicators = "descriptive_indicators"
147
+ heatmap = "heatmap"
148
+ learning_curve = "learning_curve"
149
  shap_beeswarm = "shap_beeswarm"
150
+ data_fit = "data_fit"
151
+ waterfall = "waterfall"
152
+ force = "force"
153
+ dependence = "dependence"
154
 
155
 
156
  class LN: # LabelName
 
173
  standardize_data_button = "标准化 [可选]"
174
  select_as_y_radio = "选择因变量 [必选]"
175
  choose_assign_radio = "选择任务类型(同时会根据任务类型将第1列数据强制转换)[必选]"
 
176
  model_optimize_radio = "选择超参数优化方法"
177
  model_train_button = "训练"
178
+ model_train_params_dataframe = "训练后的模型参数"
179
+ model_train_metrics_dataframe = "训练后的模型指标"
180
  select_as_model_radio = "选择所需训练的模型"
181
 
182
+ # [模型]
183
+ linear_regression_model_radio = "选择线性回归的模型"
184
+ naive_bayes_classification_model_radio = "选择朴素贝叶斯分类的模型"
185
+
186
  title_name_textbox = "标题"
187
  x_label_textbox = "x 轴名称"
188
  y_label_textbox = "y 轴名称"
 
190
  labels = ["图例 {}".format(i) for i in range(StaticValue.max_num)]
191
 
192
  # [绘图]
193
+ heatmap_is_rotate = "x轴标签是否旋转"
194
+ heatmap_checkboxgroup = "选择所需绘制系数热力图的列"
195
+ heatmap_button = "绘制系数热力图"
196
+ data_distribution_radio = "选择所需绘制数据分布图的列"
197
+ data_distribution_is_rotate = "x轴标签是否旋转"
198
+ data_distribution_button = "绘制数据分布图"
199
+ descriptive_indicators_checkboxgroup = "选择所需绘制箱线统计图的列"
200
+ descriptive_indicators_is_rotate = "x轴标签是否旋转"
201
+ descriptive_indicators_button = "绘制箱线统计图"
202
+ learning_curve_checkboxgroup = "选择所需绘制学习曲线图的模型"
203
+ learning_curve_button = "绘制学习曲线图"
204
+ shap_beeswarm_radio = "选择所需绘制特征蜂群图的模型"
205
+ shap_beeswarm_type = "选择图像类型"
206
+ shap_beeswarm_button = "绘制特征蜂群图"
207
+ data_fit_checkboxgroup = "选择所需绘制数据拟合图的模型"
208
+ data_fit_button = "绘制数据拟合图"
209
+ waterfall_radio = "选择所需绘制特征瀑布图的模型"
210
+ waterfall_number = "输入相关特征的变量索引"
211
+ waterfall_button = "绘制特征瀑布图"
212
+ force_radio = "选择所需绘制特征力图的模型"
213
+ force_number = "输入相关特征的变量索引"
214
+ force_button = "绘制特征力图"
215
+ dependence_radio = "选择所需绘制特征依赖图的模型"
216
+ dependence_col = "选择相应的列"
217
+ dependence_button = "绘制特征依赖图"
218
+
219
+ data_distribution_plot = "数据分布图"
220
+ descriptive_indicators_plot = "箱线统计图"
221
+ heatmap_plot = "系数热力图"
222
+ learning_curve_plot = "学习曲线图"
223
+ shap_beeswarm_plot = "特征蜂群图"
224
+ data_fit_plot = "数据拟合图"
225
+ waterfall_plot = "特征瀑布图"
226
+ force_plot = "特征力图"
227
+ dependence_plot = "特征依赖图"
228
 
229
 
230
  def get_return_extra(is_visible, extra_gr_dict: dict = None):
 
275
  standardize_data_checkboxgroup,
276
  standardize_data_button,
277
  select_as_y_radio,
 
278
  model_optimize_radio,
279
  model_train_button,
280
  model_train_checkbox,
281
+ model_train_params_dataframe,
282
+ model_train_metrics_dataframe,
283
  select_as_model_radio,
284
  choose_assign_radio,
285
  display_dataset,
 
289
  x_label_textbox,
290
  y_label_textbox,
291
 
292
+ # [模型]
293
+ linear_regression_model_radio,
294
+ naive_bayes_classification_model_radio,
295
+
296
  # [绘图]
297
+ heatmap_is_rotate,
298
+ heatmap_checkboxgroup,
299
+ heatmap_button,
300
+ data_distribution_radio,
301
+ data_distribution_is_rotate,
302
+ data_distribution_button,
303
+ descriptive_indicators_checkboxgroup,
304
+ descriptive_indicators_is_rotate,
305
+ descriptive_indicators_dataframe,
306
+ descriptive_indicators_button,
307
  learning_curve_checkboxgroup,
308
+ learning_curve_button,
 
309
  shap_beeswarm_radio,
310
+ shap_beeswarm_type,
311
  shap_beeswarm_button,
312
+ data_fit_checkboxgroup,
313
+ data_fit_button,
314
+ waterfall_radio,
315
+ waterfall_number,
316
+ waterfall_button,
317
+ force_radio,
318
+ force_number,
319
+ force_button,
320
+ dependence_radio,
321
+ dependence_col,
322
+ dependence_button,
323
  }
324
 
325
  gr_set.update(set(colorpickers))
 
356
 
357
  select_as_model_radio: gr.Radio(Dataset.get_model_list(), visible=Dataset.check_before_train(), label=LN.select_as_model_radio),
358
  model_optimize_radio: gr.Radio(Dataset.get_optimize_list(), visible=Dataset.check_before_train(), label=LN.model_optimize_radio),
 
 
 
359
  model_train_button: gr.Button(LN.model_train_button, visible=Dataset.check_before_train()),
360
  model_train_checkbox: gr.Checkbox(Dataset.get_model_container_status(), visible=Dataset.check_select_model(), label=Dataset.get_model_label()),
361
+ model_train_params_dataframe: gr.Dataframe(Dataset.get_model_train_params_dataframe(), type="pandas", visible=Dataset.get_model_container_status()),
362
+ model_train_metrics_dataframe: gr.Dataframe(Dataset.get_model_train_metrics_dataframe(), type="pandas", visible=Dataset.get_model_container_status()),
363
 
364
  draw_plot: gr.Plot(visible=False),
365
  draw_file: gr.File(visible=False),
 
367
  x_label_textbox: gr.Textbox(visible=False),
368
  y_label_textbox: gr.Textbox(visible=False),
369
 
370
+ # [模型]
371
+ linear_regression_model_radio: gr.Radio(Dataset.get_linear_regression_model_list(), visible=Dataset.get_linear_regression_mark(), label=LN.linear_regression_model_radio),
372
+ naive_bayes_classification_model_radio: gr.Radio(Dataset.get_naive_bayes_classifier_model_list(), visible=Dataset.get_naive_bayes_classifier_mark(), label=LN.naive_bayes_classification_model_radio),
373
+
374
  # [绘图]
375
+ heatmap_checkboxgroup: gr.Checkboxgroup(Dataset.get_float_col_list(), visible=True, label=LN.heatmap_checkboxgroup),
376
+ heatmap_is_rotate: gr.Checkbox(visible=True, label=LN.heatmap_is_rotate),
377
+ heatmap_button: gr.Button(LN.heatmap_button, visible=True),
378
+ descriptive_indicators_checkboxgroup: gr.Checkboxgroup(Dataset.get_float_col_list(), visible=True, label=LN.descriptive_indicators_checkboxgroup),
379
+ data_distribution_radio: gr.Radio(Dataset.get_str_col_list(), visible=True, label=LN.data_distribution_radio),
380
+ data_distribution_is_rotate: gr.Checkbox(visible=True, label=LN.data_distribution_is_rotate),
381
+ data_distribution_button: gr.Button(LN.data_distribution_button, visible=True),
382
+ descriptive_indicators_is_rotate: gr.Checkbox(visible=True, label=LN.descriptive_indicators_is_rotate),
383
+ descriptive_indicators_dataframe: gr.Dataframe(Dataset.get_descriptive_indicators_df(), type="pandas", visible=Dataset.check_descriptive_indicators_df()),
384
+ descriptive_indicators_button: gr.Button(LN.descriptive_indicators_button, visible=True),
385
  learning_curve_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.learning_curve_checkboxgroup),
386
+ learning_curve_button: gr.Button(LN.learning_curve_button, visible=Dataset.check_before_train()),
 
387
  shap_beeswarm_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_radio),
388
+ shap_beeswarm_type: gr.Radio(Dataset.get_shap_beeswarm_plot_type(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_type),
389
  shap_beeswarm_button: gr.Button(LN.shap_beeswarm_button, visible=Dataset.check_before_train()),
390
+ data_fit_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.data_fit_checkboxgroup),
391
+ data_fit_button: gr.Button(LN.data_fit_button, visible=Dataset.check_before_train()),
392
+ waterfall_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.waterfall_radio),
393
+ waterfall_number: gr.Slider(0, Dataset.get_total_row_num(), value=0, step=1, visible=Dataset.check_before_train(), label=LN.waterfall_number),
394
+ waterfall_button: gr.Button(LN.waterfall_button, visible=Dataset.check_before_train()),
395
+ force_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.force_radio),
396
+ force_number: gr.Slider(0, Dataset.get_total_row_num(), value=0, step=1, visible=Dataset.check_before_train(), label=LN.force_number),
397
+ force_button: gr.Button(LN.force_button, visible=Dataset.check_before_train()),
398
+ dependence_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.dependence_radio),
399
+ dependence_col: gr.Radio(Dataset.get_col_list(), visible=Dataset.check_before_train(), label=LN.dependence_col),
400
+ dependence_button: gr.Button(LN.dependence_button, visible=Dataset.check_before_train()),
401
+
402
  }
403
 
404
  gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
 
431
  standardize_data_checkboxgroup: gr.Checkboxgroup(visible=False),
432
  standardize_data_button: gr.Button(visible=False),
433
  select_as_y_radio: gr.Radio(visible=False),
 
434
  model_optimize_radio: gr.Radio(visible=False),
435
  model_train_button: gr.Button(visible=False),
436
  model_train_checkbox: gr.Checkbox(visible=False),
437
+ model_train_metrics_dataframe: gr.Dataframe(visible=False),
438
+ model_train_params_dataframe: gr.Dataframe(visible=False),
439
  select_as_model_radio: gr.Radio(visible=False),
440
  choose_assign_radio: gr.Radio(visible=False),
441
 
 
445
  x_label_textbox: gr.Textbox(visible=False),
446
  y_label_textbox: gr.Textbox(visible=False),
447
 
448
+ # [模型]
449
+ linear_regression_model_radio: gr.Radio(visible=False),
450
+ naive_bayes_classification_model_radio: gr.Radio(visible=False),
451
+
452
  # [绘图]
453
+ heatmap_checkboxgroup: gr.Checkboxgroup(visible=False),
454
+ heatmap_is_rotate: gr.Checkbox(visible=False),
455
+ heatmap_button: gr.Button(visible=False),
456
+ data_distribution_radio: gr.Radio(visible=False),
457
+ data_distribution_is_rotate: gr.Checkbox(visible=False),
458
+ data_distribution_button: gr.Button(visible=False),
459
+ descriptive_indicators_checkboxgroup: gr.Checkboxgroup(visible=False),
460
+ descriptive_indicators_is_rotate: gr.Checkbox(visible=False),
461
+ descriptive_indicators_dataframe: gr.Dataframe(visible=False),
462
+ descriptive_indicators_button: gr.Button(visible=False),
463
  learning_curve_checkboxgroup: gr.Checkboxgroup(visible=False),
464
+ learning_curve_button: gr.Button(visible=False),
 
465
  shap_beeswarm_radio: gr.Radio(visible=False),
466
+ shap_beeswarm_type: gr.Radio(visible=False),
467
  shap_beeswarm_button: gr.Button(visible=False),
468
+ data_fit_checkboxgroup: gr.Checkboxgroup(visible=False),
469
+ data_fit_button: gr.Button(visible=False),
470
+ waterfall_radio: gr.Radio(visible=False),
471
+ waterfall_number: gr.Slider(visible=False),
472
+ waterfall_button: gr.Button(visible=False),
473
+ force_radio: gr.Radio(visible=False),
474
+ force_number: gr.Slider(visible=False),
475
+ force_button: gr.Button(visible=False),
476
+ dependence_radio: gr.Radio(visible=False),
477
+ dependence_col: gr.Radio(visible=False),
478
+ dependence_button: gr.Button(visible=False),
479
  }
480
 
481
  gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
 
498
  cur_model = ""
499
  select_y_mark = False
500
 
501
+ descriptive_indicators_df = pd.DataFrame()
502
+
503
+ linear_regression_model_type = ""
504
+ naive_bayes_classifier_model_type = ""
505
+
506
  container_dict = {
507
+ # [模型]
508
  MN.linear_regression: Container(),
509
  MN.polynomial_regression: Container(),
510
  MN.logistic_regression: Container(),
511
+ MN.decision_tree_classifier: Container(),
512
+ MN.random_forest_classifier: Container(),
513
+ MN.random_forest_regression: Container(),
514
+ MN.xgboost_classifier: Container(),
515
+ MN.lightGBM_classifier: Container(),
516
+ MN.gradient_boosting_regression: Container(),
517
+ MN.svm_classifier: Container(),
518
+ MN.svm_regression: Container(),
519
+ MN.knn_classifier: Container(),
520
+ MN.knn_regression: Container(),
521
+ MN.naive_bayes_classification: Container(),
522
  }
523
 
524
  visualize = ""
525
 
526
+ @classmethod
527
+ def check_descriptive_indicators_df(cls):
528
+ return True if not cls.descriptive_indicators_df.empty else False
529
+
530
+ @classmethod
531
+ def get_descriptive_indicators_df(cls):
532
+ return cls.descriptive_indicators_df
533
+
534
+ @classmethod
535
+ def get_notes(cls):
536
+ notes = ""
537
+ with open("./data/notes.md", "r", encoding="utf-8") as f:
538
+ notes = str(f.read())
539
+ return notes
540
+
541
  @classmethod
542
  def get_dataset_list(cls):
543
+ return ["自定义", "Iris Dataset", "Wine Dataset", "Breast Cancer Dataset", "Diabetes Dataset", "California Housing Dataset"]
544
 
545
  @classmethod
546
  def get_col_list(cls):
 
739
 
740
  for i, col in enumerate(cls.data.columns.values):
741
  if i == 0:
742
+ if not (all(isinstance(x, str) for x in cls.data.iloc[:, 0]) or all(isinstance(x, float) for x in cls.data.iloc[:, 0])):
 
743
  return False
744
  else:
745
  if cls.data[col].dtype.name != "float64":
 
769
  def get_linear_regression_model_list(cls):
770
  return ["线性回归", "Lasso回归", "Ridge回归", "弹性网络回归"]
771
 
772
+ @classmethod
773
+ def get_naive_bayes_classifier_model_list(cls):
774
+ return ["多项式朴素贝叶斯分类", "高斯朴素贝叶斯分类", "补充朴素贝叶斯分类"]
775
+
776
  @classmethod
777
  def get_linear_regression_model_name_mapping(cls):
778
  return dict(zip(cls.get_linear_regression_model_list(), ["LinearRegression", "Lasso", "Ridge", "ElasticNet"]))
779
 
780
  @classmethod
781
+ def get_naive_bayes_classifier_model_name_mapping(cls):
782
+ return dict(zip(cls.get_naive_bayes_classifier_model_list(), ["MultinomialNB", "GaussianNB", "ComplementNB"]))
783
+
784
+ @classmethod
785
+ def train_model(cls, optimize, linear_regression_model_type=None, naive_bayes_classifier_model_type=None):
786
  optimize = cls.get_optimize_name_mapping()[optimize]
787
 
788
  data_copy = cls.data
 
797
  )
798
  container = Container(x_train, y_train, x_test, y_test, optimize)
799
 
800
+ # [模型]
801
  if cls.cur_model == MN.linear_regression:
802
+ cls.linear_regression_model_type = cls.get_linear_regression_model_name_mapping()[linear_regression_model_type]
803
+ container = linear_regression(container, cls.linear_regression_model_type)
804
  elif cls.cur_model == MN.polynomial_regression:
805
  container = polynomial_regression(container)
806
  elif cls.cur_model == MN.logistic_regression:
807
  container = logistic_regression(container)
808
+ elif cls.cur_model == MN.decision_tree_classifier:
809
+ container = decision_tree_classifier(container)
810
+ elif cls.cur_model == MN.random_forest_classifier:
811
+ container = random_forest_classifier(container)
812
+ elif cls.cur_model == MN.random_forest_regression:
813
+ container = random_forest_regression(container)
814
+ elif cls.cur_model == MN.xgboost_classifier:
815
+ container = xgboost_classifier(container)
816
+ elif cls.cur_model == MN.lightGBM_classifier:
817
+ container = lightGBM_classifier(container)
818
+ elif cls.cur_model == MN.gradient_boosting_regression:
819
+ container = gradient_boosting_regression(container)
820
+ elif cls.cur_model == MN.svm_classifier:
821
+ container = svm_classifier(container)
822
+ elif cls.cur_model == MN.svm_regression:
823
+ container = svm_regression(container)
824
+ elif cls.cur_model == MN.knn_classifier:
825
+ container = knn_classifier(container)
826
+ elif cls.cur_model == MN.knn_regression:
827
+ container = knn_regression(container)
828
+ elif cls.cur_model == MN.naive_bayes_classification:
829
+ cls.naive_bayes_classifier_model_type = cls.get_naive_bayes_classifier_model_name_mapping()[naive_bayes_classifier_model_type]
830
+ container = naive_bayes_classification(container, cls.naive_bayes_classifier_model_type)
831
 
832
  cls.container_dict[cls.cur_model] = container
833
 
 
847
  def get_model_name(cls):
848
  return [x for x in cls.container_dict.keys()]
849
 
850
+ # [模型]
851
  @classmethod
852
  def get_model_chinese_name(cls):
853
+ return ["线性回归", "多项式回归", "逻辑斯谛分类", "决策树分类", "随机森林分类", "随机森林回归", "XGBoost分类", "LightGBM分类",
854
+ "梯度提升回归", "支持向量机分类", "支持向量机回归", "K-最近邻分类", "K-最近邻回归", "朴素贝叶斯分类"]
855
 
856
  @classmethod
857
  def get_model_name_mapping(cls):
 
874
  @classmethod
875
  def draw_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
876
  # [绘图]
877
+ if cls.visualize == MN.learning_curve:
878
+ return cls.draw_learning_curve_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
 
 
879
  elif cls.visualize == MN.shap_beeswarm:
880
  return cls.draw_shap_beeswarm_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
881
+ elif cls.visualize == MN.data_fit:
882
+ return cls.draw_data_fit_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
883
+ elif cls.visualize == MN.waterfall:
884
+ return cls.draw_waterfall_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
885
+ elif cls.visualize == MN.force:
886
+ return cls.draw_force_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
887
+ elif cls.visualize == MN.dependence:
888
+ return cls.draw_dependence_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
889
+ elif cls.visualize == MN.data_distribution:
890
+ return cls.draw_data_distribution_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
891
+ elif cls.visualize == MN.descriptive_indicators:
892
+ return cls.draw_descriptive_indicators_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
893
+ elif cls.visualize == MN.heatmap:
894
+ return cls.draw_heatmap_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
895
 
896
  @classmethod
897
+ def draw_heatmap_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
898
+ color_cur_list = [] if is_default else color_list
899
+ x_cur_label = "Indicators" if is_default else x_label
900
+ y_cur_label = "Value" if is_default else y_label
901
+ cur_name = "" if is_default else name
902
 
903
+ paint_object = PaintObject()
904
+ paint_object.set_color_cur_list(color_cur_list)
905
+ paint_object.set_x_cur_label(x_cur_label)
906
+ paint_object.set_y_cur_label(y_cur_label)
907
+ paint_object.set_name(cur_name)
908
 
909
+ if cls.check_col_list(select_model.get_heatmap_col()):
910
+ return cls.error_return_draw(paint_object)
911
+
912
+ df = Dataset.data
913
+ heatmap_col = select_model.get_heatmap_col()
914
+
915
+ covX = np.around(np.corrcoef(df[heatmap_col].T), decimals=3)
916
+ std_dev = np.sqrt(np.diag(covX))
917
+ pearson_matrix = covX / np.outer(std_dev, std_dev)
918
+
919
+ return draw_heat_map(pearson_matrix, heatmap_col, paint_object, select_model.get_heatmap_is_rotate())
920
+
921
+ @classmethod
922
+ def draw_descriptive_indicators_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
923
+ color_cur_list = [Config.COLORS[random.randint(0, 11)]]*3 if is_default else color_list
924
+ x_cur_label = "Indicators" if is_default else x_label
925
+ y_cur_label = "Value" if is_default else y_label
926
+ cur_name = "" if is_default else name
927
+
928
+ paint_object = PaintObject()
929
+ paint_object.set_color_cur_list(color_cur_list)
930
+ paint_object.set_x_cur_label(x_cur_label)
931
+ paint_object.set_y_cur_label(y_cur_label)
932
+ paint_object.set_name(cur_name)
933
+
934
+ if cls.check_col_list(select_model.get_descriptive_indicators_col()):
935
+ return cls.error_return_draw(paint_object)
936
+
937
+ df = Dataset.data
938
+ descriptive_indicators_col = select_model.get_descriptive_indicators_col()
939
+
940
+ descriptive_indicators_df = pd.DataFrame(
941
+ index=list(descriptive_indicators_col),
942
+ columns=[
943
+ "Name",
944
+ "Min",
945
+ "Max",
946
+ "Avg",
947
+ "Standard Deviation",
948
+ "Standard Error",
949
+ "Upper Quartile",
950
+ "Median",
951
+ "Lower Quartile",
952
+ "Interquartile Distance",
953
+ "Kurtosis",
954
+ "Skewness",
955
+ "Coefficient of Variation"
956
+ ]
957
+ )
958
+
959
+ for col in descriptive_indicators_col:
960
+ descriptive_indicators_df["Name"][col] = col
961
+ descriptive_indicators_df["Min"][col] = df[col].min()
962
+ descriptive_indicators_df["Max"][col] = df[col].max()
963
+ descriptive_indicators_df["Avg"][col] = df[col].mean()
964
+ descriptive_indicators_df["Standard Deviation"][col] = df[col].std()
965
+ descriptive_indicators_df["Standard Error"][col] = descriptive_indicators_df["Standard Deviation"][
966
+ col] / math.sqrt(len(df[col]))
967
+ descriptive_indicators_df["Upper Quartile"][col] = df[col].quantile(0.75)
968
+ descriptive_indicators_df["Median"][col] = df[col].quantile(0.5)
969
+ descriptive_indicators_df["Lower Quartile"][col] = df[col].quantile(0.25)
970
+ descriptive_indicators_df["Interquartile Distance"][col] = descriptive_indicators_df["Lower Quartile"][
971
+ col] - \
972
+ descriptive_indicators_df["Upper Quartile"][col]
973
+ descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
974
+ descriptive_indicators_df["Skewness"][col] = df[col].skew()
975
+ descriptive_indicators_df["Coefficient of Variation"][col] = \
976
+ descriptive_indicators_df["Standard Deviation"][col] / descriptive_indicators_df["Avg"][col]
977
+
978
+ cls.descriptive_indicators_df = descriptive_indicators_df
979
+
980
+ cur_df = df[descriptive_indicators_col].astype(float)
981
+
982
+ return draw_boxplot(cur_df, paint_object, select_model.get_descriptive_indicators_is_rotate())
983
+
984
+ @classmethod
985
+ def error_return_draw(cls, paint_object):
986
+ cur_plt = plt.Figure(figsize=(10, 8))
987
+ return cur_plt, paint_object
988
+
989
+ @classmethod
990
+ def draw_data_distribution_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
991
+ cur_col = select_model.get_data_distribution_col()
992
+
993
+ color_cur_list = [Config.COLORS[random.randint(0, 11)]] if is_default else color_list
994
+ x_cur_label = cur_col if is_default else x_label
995
+ y_cur_label = "Num" if is_default else y_label
996
  cur_name = "" if is_default else name
997
 
998
  paint_object = PaintObject()
999
  paint_object.set_color_cur_list(color_cur_list)
 
1000
  paint_object.set_x_cur_label(x_cur_label)
1001
  paint_object.set_y_cur_label(y_cur_label)
1002
  paint_object.set_name(cur_name)
1003
 
1004
+ if cls.check_col_list(select_model.get_data_distribution_col()):
1005
+ return cls.error_return_draw(paint_object)
1006
+
1007
+ counts_mapping = {}
1008
+ for x in Dataset.data.loc[:, cur_col].values:
1009
+ if x in counts_mapping.keys():
1010
+ counts_mapping[x] += 1
1011
+ else:
1012
+ counts_mapping[x] = 1
1013
+
1014
+ sorting = sorted(counts_mapping.items(), reverse=True, key=lambda m: m[1])
1015
+ nums = [x[1] for x in sorting]
1016
+ labels = [x[0] for x in sorting]
1017
+
1018
+ if Dataset.check_data_distribution_type(cur_col) == "histogram":
1019
+ return draw_histogram(nums, labels, paint_object, select_model.get_data_distribution_is_rotate())
1020
+ else:
1021
+ return cls.error_return_draw(paint_object)
1022
+
1023
+ @classmethod
1024
+ def draw_dependence_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
1025
+ model_name = select_model.get_models()
1026
+
1027
+ model_name = cls.get_model_name_mapping_reverse()[model_name]
1028
+ container = cls.container_dict[model_name]
1029
+
1030
+ # color_cur_list = Config.COLORS if is_default else color_list
1031
+ # label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
1032
+ # x_cur_label = "Train Sizes" if is_default else x_label
1033
+ # y_cur_label = "Accuracy" if is_default else y_label
1034
+ cur_name = "" if is_default else name
1035
+
1036
+ paint_object = PaintObject()
1037
+ # paint_object.set_color_cur_list(color_cur_list)
1038
+ # paint_object.set_label_cur_list(label_cur_list)
1039
+ # paint_object.set_x_cur_label(x_cur_label)
1040
+ # paint_object.set_y_cur_label(y_cur_label)
1041
+ paint_object.set_name(cur_name)
1042
+
1043
+ return draw_dependence(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_dependence_col(), paint_object)
1044
 
1045
  @classmethod
1046
+ def draw_force_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
1047
+ model_name = select_model.get_models()
1048
+
1049
+ model_name = cls.get_model_name_mapping_reverse()[model_name]
1050
+ container = cls.container_dict[model_name]
1051
+
1052
+ # color_cur_list = Config.COLORS if is_default else color_list
1053
+ # label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
1054
+ # x_cur_label = "Train Sizes" if is_default else x_label
1055
+ # y_cur_label = "Accuracy" if is_default else y_label
1056
+ cur_name = "" if is_default else name
1057
+
1058
+ paint_object = PaintObject()
1059
+ # paint_object.set_color_cur_list(color_cur_list)
1060
+ # paint_object.set_label_cur_list(label_cur_list)
1061
+ # paint_object.set_x_cur_label(x_cur_label)
1062
+ # paint_object.set_y_cur_label(y_cur_label)
1063
+ paint_object.set_name(cur_name)
1064
+
1065
+ return draw_force(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_force_number(), paint_object)
1066
+
1067
+ @classmethod
1068
+ def draw_waterfall_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
1069
+ model_name = select_model.get_models()
1070
+
1071
+ model_name = cls.get_model_name_mapping_reverse()[model_name]
1072
+ container = cls.container_dict[model_name]
1073
+
1074
+ # color_cur_list = Config.COLORS if is_default else color_list
1075
+ # label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
1076
+ # x_cur_label = "Train Sizes" if is_default else x_label
1077
+ # y_cur_label = "Accuracy" if is_default else y_label
1078
+ cur_name = "" if is_default else name
1079
+
1080
+ paint_object = PaintObject()
1081
+ # paint_object.set_color_cur_list(color_cur_list)
1082
+ # paint_object.set_label_cur_list(label_cur_list)
1083
+ # paint_object.set_x_cur_label(x_cur_label)
1084
+ # paint_object.set_y_cur_label(y_cur_label)
1085
+ paint_object.set_name(cur_name)
1086
+
1087
+ return draw_waterfall(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_waterfall_number(), paint_object)
1088
+
1089
+ @classmethod
1090
+ def draw_learning_curve_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
1091
+ cur_dict = {}
1092
+
1093
+ model_list = select_model.get_models()
1094
 
1095
  for model_name in model_list:
1096
  model_name = cls.get_model_name_mapping_reverse()[model_name]
1097
+ cur_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
1098
 
1099
  color_cur_list = Config.COLORS if is_default else color_list
1100
+ if is_default:
1101
+ label_cur_list = []
1102
+ for x in cur_dict.keys():
1103
+ label_cur_list.append("train " + str(x))
1104
+ label_cur_list.append("validation " + str(x))
1105
+ else:
1106
+ label_cur_list = label_list
1107
+
1108
  x_cur_label = "Train Sizes" if is_default else x_label
1109
  y_cur_label = "Accuracy" if is_default else y_label
1110
  cur_name = "" if is_default else name
 
1116
  paint_object.set_y_cur_label(y_cur_label)
1117
  paint_object.set_name(cur_name)
1118
 
1119
+ if cls.check_cur_dict(cur_dict):
1120
+ return cls.error_return_draw(paint_object)
1121
+
1122
+ return draw_learning_curve_total(cur_dict, paint_object)
1123
 
1124
  @classmethod
1125
+ def draw_shap_beeswarm_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
1126
+ model_name = select_model.get_models()
1127
+
1128
  model_name = cls.get_model_name_mapping_reverse()[model_name]
1129
  container = cls.container_dict[model_name]
1130
 
 
1141
  # paint_object.set_y_cur_label(y_cur_label)
1142
  paint_object.set_name(cur_name)
1143
 
1144
+ return draw_shap_beeswarm(container.get_model(), container.x_train, cls.data.columns.values.tolist()[1:], select_model.get_beeswarm_plot_type(), paint_object)
1145
+
1146
+ @classmethod
1147
+ def draw_data_fit_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
1148
+ cur_dict = {}
1149
+
1150
+ model_list = select_model.get_models()
1151
+
1152
+ for model_name in model_list:
1153
+ model_name = cls.get_model_name_mapping_reverse()[model_name]
1154
+ cur_dict[model_name] = cls.container_dict[model_name].get_data_fit_values()
1155
+
1156
+ color_cur_list = Config.COLORS if is_default else color_list
1157
+ if is_default:
1158
+ label_cur_list = []
1159
+ for x in cur_dict.keys():
1160
+ label_cur_list.append("pred " + str(x))
1161
+ label_cur_list.append("real data")
1162
+ else:
1163
+ label_cur_list = label_list
1164
+
1165
+ x_cur_label = "n value" if is_default else x_label
1166
+ y_cur_label = "y value" if is_default else y_label
1167
+ cur_name = "" if is_default else name
1168
+
1169
+ paint_object = PaintObject()
1170
+ paint_object.set_color_cur_list(color_cur_list)
1171
+ paint_object.set_label_cur_list(label_cur_list)
1172
+ paint_object.set_x_cur_label(x_cur_label)
1173
+ paint_object.set_y_cur_label(y_cur_label)
1174
+ paint_object.set_name(cur_name)
1175
+
1176
+ return draw_data_fit_total(cur_dict, paint_object)
1177
+
1178
+ @classmethod
1179
+ def get_shap_beeswarm_plot_type(cls):
1180
+ return ["bar", "violin"]
1181
 
1182
  @classmethod
1183
  def get_file(cls):
1184
  # [绘图]
1185
+ if cls.visualize == MN.learning_curve:
1186
+ return FilePath.png_base.format(FilePath.learning_curve_plot)
 
 
1187
  elif cls.visualize == MN.shap_beeswarm:
1188
  return FilePath.png_base.format(FilePath.shap_beeswarm_plot)
1189
+ elif cls.visualize == MN.data_fit:
1190
+ return FilePath.png_base.format(FilePath.data_fit_plot)
1191
+ elif cls.visualize == MN.waterfall:
1192
+ return FilePath.png_base.format(FilePath.waterfall_plot)
1193
+ elif cls.visualize == MN.force:
1194
+ return FilePath.png_base.format(FilePath.force_plot)
1195
+ elif cls.visualize == MN.dependence:
1196
+ return FilePath.png_base.format(FilePath.dependence_plot)
1197
+ elif cls.visualize == MN.data_distribution:
1198
+ return FilePath.png_base.format(FilePath.data_distribution_plot)
1199
+ elif cls.visualize == MN.descriptive_indicators:
1200
+ return FilePath.png_base.format(FilePath.descriptive_indicators_plot)
1201
+ elif cls.visualize == MN.heatmap:
1202
+ return FilePath.png_base.format(FilePath.heatmap_plot)
1203
 
1204
  @classmethod
1205
  def check_file(cls):
 
1229
  def get_linear_regression_mark(cls):
1230
  return True if cls.cur_model == MN.linear_regression else False
1231
 
1232
+ @classmethod
1233
+ def get_naive_bayes_classifier_mark(cls):
1234
+ return True if cls.cur_model == MN.naive_bayes_classification else False
1235
+
1236
  @classmethod
1237
  def get_assign_list(cls):
1238
  return ["分类", "回归"]
 
1279
 
1280
  return true_list + [gr.Textbox(visible=False)] * (StaticValue.max_num - cur_num)
1281
 
1282
+ @classmethod
1283
+ def get_model_train_metrics_dataframe(cls):
1284
+ if cls.cur_model != "" and cls.get_model_container_status():
1285
+ columns_list = ["指标", "数值"]
1286
+
1287
+ output_dict = cls.container_dict[cls.cur_model].get_info()["指标"]
1288
+
1289
+ output_df = pd.DataFrame(columns=columns_list)
1290
+ output_df["指标"] = [x for x in output_dict.keys() if x in ChooseModelMetrics.choose(cls.cur_model)]
1291
+ output_df["数值"] = [output_dict[x] for x in output_df["指标"]]
1292
+
1293
+ return output_df
1294
+
1295
+ @classmethod
1296
+ def get_model_train_params_dataframe(cls):
1297
+ if cls.cur_model != "" and cls.get_model_container_status():
1298
+ columns_list = ["参数", "数值"]
1299
+
1300
+ output_dict = cls.container_dict[cls.cur_model].get_info()["参数"]
1301
+
1302
+ output_df = pd.DataFrame(columns=columns_list)
1303
+ output_df["参数"] = [x for x in output_dict.keys() if x in ChooseModelParams.choose(cls.cur_model).keys()]
1304
+ output_df["数值"] = [output_dict[x] for x in output_df["参数"]]
1305
+
1306
+ return output_df
1307
+
1308
+ @classmethod
1309
+ def get_str_col_list(cls):
1310
+ str_col_list = []
1311
+ for col in cls.get_col_list():
1312
+ if all(isinstance(x, str) for x in cls.data.loc[:, col]):
1313
+ str_col_list.append(col)
1314
+
1315
+ return str_col_list
1316
+
1317
+ @classmethod
1318
+ def get_float_col_list(cls):
1319
+ float_col_list = []
1320
+ for col in cls.get_col_list():
1321
+ if all(isinstance(x, float) for x in cls.data.loc[:, col]):
1322
+ float_col_list.append(col)
1323
+
1324
+ return float_col_list
1325
+
1326
+ @classmethod
1327
+ def check_data_distribution_type(cls, col):
1328
+ if all(isinstance(x, str) for x in cls.data.loc[:, col]):
1329
+ return "histogram"
1330
+ # elif all(isinstance(x, float) for x in cls.data.loc[:, col]):
1331
+ # return "line_graph"
1332
+ else:
1333
+ gr.Warning("所选列的所有数据必须为字符型或浮点型")
1334
+
1335
+ @classmethod
1336
+ def check_col_list(cls, col):
1337
+ if not col:
1338
+ gr.Warning("请选择所需列")
1339
+ return True
1340
+ return False
1341
+
1342
+ @classmethod
1343
+ def check_train_model(cls, optimize):
1344
+ if cls.cur_model == "":
1345
+ gr.Warning("请选择所需训练的模型")
1346
+ return True
1347
+ if not optimize:
1348
+ gr.Warning("请选择超参数优化方法")
1349
+ return True
1350
+ return False
1351
+
1352
+ @classmethod
1353
+ def error_return_train(cls):
1354
+ return get_return(True)
1355
+
1356
+ @classmethod
1357
+ def check_train_model_other_related(cls, linear_regression_model_type, naive_bayes_classifier_model_type):
1358
+ if cls.cur_model == MN.linear_regression:
1359
+ if not linear_regression_model_type:
1360
+ gr.Warning("请选择线性回归对应的模型")
1361
+ return True
1362
+ elif cls.cur_model == MN.naive_bayes_classification:
1363
+ if not naive_bayes_classifier_model_type:
1364
+ gr.Warning("请选择朴素贝叶斯对应的模型")
1365
+ return True
1366
+ return False
1367
+
1368
+ @classmethod
1369
+ def check_cur_dict(cls, cur_dict):
1370
+ if not cur_dict:
1371
+ gr.Warning("请选择绘图所需的模型")
1372
+ return True
1373
+ return False
1374
+
1375
 
1376
  def choose_assign(assign: str):
1377
  Dataset.choose_assign(assign)
 
1386
 
1387
 
1388
  # [绘图]
1389
+ def heatmap_first_draw_plot(*inputs):
1390
+ Dataset.visualize = MN.heatmap
1391
+ return before_train_first_draw_plot(inputs)
1392
+
1393
+
1394
+ def descriptive_indicators_first_draw_plot(*inputs):
1395
+ Dataset.visualize = MN.descriptive_indicators
1396
+ return before_train_first_draw_plot(inputs)
1397
+
1398
+
1399
+ def data_distribution_first_draw_plot(*inputs):
1400
+ Dataset.visualize = MN.data_distribution
1401
+ return before_train_first_draw_plot(inputs)
1402
+
1403
+
1404
+ def dependence_first_draw_plot(*inputs):
1405
+ Dataset.visualize = MN.dependence
1406
+ return first_draw_plot(inputs)
1407
+
1408
+
1409
+ def force_first_draw_plot(*inputs):
1410
+ Dataset.visualize = MN.force
1411
+ return first_draw_plot(inputs)
1412
+
1413
+
1414
+ def waterfall_first_draw_plot(*inputs):
1415
+ Dataset.visualize = MN.waterfall
1416
+ return first_draw_plot(inputs)
1417
+
1418
+
1419
+ def data_fit_first_draw_plot(*inputs):
1420
+ Dataset.visualize = MN.data_fit
1421
+ return first_draw_plot(inputs)
1422
+
1423
+
1424
  def shap_beeswarm_first_draw_plot(*inputs):
1425
  Dataset.visualize = MN.shap_beeswarm
1426
  return first_draw_plot(inputs)
1427
 
1428
 
1429
+ def learning_curve_first_draw_plot(*inputs):
1430
+ Dataset.visualize = MN.learning_curve
1431
  return first_draw_plot(inputs)
1432
 
1433
 
1434
+ def before_train_first_draw_plot(inputs):
1435
+ select_model = SelectModel()
1436
+ x_label = ""
1437
+ y_label = ""
1438
+ name = ""
1439
+ color_list = []
1440
+ label_list = []
1441
+
1442
+ # [绘图][无训练模型]
1443
+ if Dataset.visualize == MN.data_distribution:
1444
+ select_model.set_data_distribution_col(inputs[0])
1445
+ select_model.set_data_distribution_is_rotate(inputs[1])
1446
+ elif Dataset.visualize == MN.descriptive_indicators:
1447
+ select_model.set_descriptive_indicators_is_rotate(inputs[0])
1448
+ select_model.set_descriptive_indicators_col(inputs[1])
1449
+ elif Dataset.visualize == MN.heatmap:
1450
+ select_model.set_heatmap_col(inputs[0])
1451
+ select_model.set_heatmap_is_rotate(inputs[1])
1452
+
1453
+ cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
1454
+
1455
+ return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
1456
 
1457
 
1458
  def first_draw_plot(inputs):
1459
+ select_model = SelectModel()
1460
+ select_model.set_models(inputs[0])
1461
  x_label = ""
1462
  y_label = ""
1463
  name = ""
1464
  color_list = []
1465
  label_list = []
1466
 
1467
+ # [绘图][有训练模型]
1468
+ if Dataset.visualize == MN.shap_beeswarm:
1469
+ select_model.set_beeswarm_plot_type(inputs[1])
1470
+ elif Dataset.visualize == MN.waterfall:
1471
+ select_model.set_waterfall_number(inputs[1])
1472
+ elif Dataset.visualize == MN.force:
1473
+ select_model.set_force_number(inputs[1])
1474
+ elif Dataset.visualize == MN.dependence:
1475
+ select_model.set_dependence_col(inputs[1])
1476
+
1477
  cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
1478
 
1479
  return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
 
1491
  label_list = list(inputs[StaticValue.max_num+3: 2*StaticValue.max_num+3])
1492
  start_index = 2*StaticValue.max_num+3
1493
 
1494
+ select_model = SelectModel()
1495
+
1496
  # 绘图
1497
+ if Dataset.visualize == MN.learning_curve:
1498
+ select_model.set_models(inputs[start_index+0])
1499
+ select_model.set_beeswarm_plot_type(inputs[start_index+1])
 
1500
  elif Dataset.visualize == MN.shap_beeswarm:
1501
+ select_model.set_models(inputs[start_index+2])
1502
+ elif Dataset.visualize == MN.data_fit:
1503
+ select_model.set_models(inputs[start_index+3])
1504
+ elif Dataset.visualize == MN.waterfall:
1505
+ select_model.set_models(inputs[start_index+4])
1506
+ select_model.set_waterfall_number(inputs[start_index+5])
1507
+ elif Dataset.visualize == MN.force:
1508
+ select_model.set_models(inputs[start_index+6])
1509
+ select_model.set_force_number(inputs[start_index+7])
1510
+ elif Dataset.visualize == MN.dependence:
1511
+ select_model.set_models(inputs[start_index+8])
1512
+ select_model.set_dependence_col(inputs[start_index+9])
1513
+ elif Dataset.visualize == MN.data_distribution:
1514
+ select_model.set_data_distribution_col(inputs[start_index+10])
1515
+ select_model.set_data_distribution_is_rotate(inputs[start_index+11])
1516
+ elif Dataset.visualize == MN.descriptive_indicators:
1517
+ select_model.set_descriptive_indicators_is_rotate(inputs[start_index+12])
1518
+ select_model.set_descriptive_indicators_col(inputs[start_index+13])
1519
+ elif Dataset.visualize == MN.descriptive_indicators:
1520
+ select_model.set_heatmap_col(inputs[start_index+14])
1521
+ select_model.set_heatmap_is_rotate(inputs[start_index+15])
1522
 
1523
  else:
1524
+ select_model.set_models(inputs[start_index])
1525
 
1526
  cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, False)
1527
 
 
1532
  extra_gr_dict = {}
1533
 
1534
  # [绘图]
1535
+ if Dataset.visualize == MN.learning_curve:
1536
+ cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_plot), dpi=300)
1537
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_plot)})
 
 
 
1538
  elif Dataset.visualize == MN.shap_beeswarm:
1539
  cur_plt.savefig(FilePath.png_base.format(FilePath.shap_beeswarm_plot), dpi=300)
1540
  extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.shap_beeswarm_plot)})
1541
+ elif Dataset.visualize == MN.data_fit:
1542
+ cur_plt.savefig(FilePath.png_base.format(FilePath.data_fit_plot), dpi=300)
1543
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.data_fit_plot)})
1544
+ elif Dataset.visualize == MN.waterfall:
1545
+ cur_plt.savefig(FilePath.png_base.format(FilePath.waterfall_plot), dpi=300)
1546
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.waterfall_plot)})
1547
+ elif Dataset.visualize == MN.force:
1548
+ cur_plt.savefig(FilePath.png_base.format(FilePath.force_plot), dpi=300)
1549
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.force_plot)})
1550
+ elif Dataset.visualize == MN.dependence:
1551
+ cur_plt.savefig(FilePath.png_base.format(FilePath.dependence_plot), dpi=300)
1552
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.dependence_plot)})
1553
+ elif Dataset.visualize == MN.data_distribution:
1554
+ cur_plt.savefig(FilePath.png_base.format(FilePath.data_distribution_plot), dpi=300)
1555
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.data_distribution_plot)})
1556
+ elif Dataset.visualize == MN.descriptive_indicators:
1557
+ cur_plt.savefig(FilePath.png_base.format(FilePath.descriptive_indicators_plot), dpi=300)
1558
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.descriptive_indicators_plot)})
1559
+ extra_gr_dict.update({descriptive_indicators_dataframe: gr.Dataframe(Dataset.get_descriptive_indicators_df(), type="pandas", visible=Dataset.check_descriptive_indicators_df())})
1560
+ elif Dataset.visualize == MN.heatmap:
1561
+ cur_plt.savefig(FilePath.png_base.format(FilePath.heatmap_plot), dpi=300)
1562
+ extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.heatmap_plot)})
1563
 
1564
  extra_gr_dict.update(dict(zip(colorpickers, Dataset.colorpickers_change(paint_object))))
1565
  extra_gr_dict.update(dict(zip(color_textboxs, Dataset.color_textboxs_change(paint_object))))
 
1571
  return get_return_extra(True, extra_gr_dict)
1572
 
1573
 
1574
+ # [模型]
1575
+ def train_model(optimize, linear_regression_model_type, naive_bayes_classifier_model_type):
1576
+ if Dataset.check_train_model(optimize):
1577
+ return Dataset.error_return_train()
1578
+
1579
+ if Dataset.check_train_model_other_related(linear_regression_model_type, naive_bayes_classifier_model_type):
1580
+ return Dataset.error_return_train()
1581
+
1582
+ Dataset.train_model(optimize, linear_regression_model_type, naive_bayes_classifier_model_type)
1583
 
1584
  return get_return(True)
1585
 
 
1605
  def encode_label(col_list: list):
1606
  Dataset.encode_label(col_list)
1607
 
1608
+ return get_return(True, {display_encode_label_dataframe: gr.Dataframe(Dataset.get_str2int_mappings_df(), type="pandas", visible=True, label=LN.display_encode_label_dataframe)})
 
 
1609
 
1610
 
1611
  def del_duplicate():
 
1660
  return get_return(True, {choose_custom_dataset_file: gr.File(Dataset.file, visible=True)})
1661
 
1662
 
1663
+ with gr.Blocks(js=Config.JS_0) as demo:
1664
  '''
1665
  组件
1666
  '''
 
1710
 
1711
  # 数据模型
1712
  with gr.Accordion("数据模型"):
1713
+ # [模型]
1714
  select_as_model_radio = gr.Radio(visible=False)
1715
  linear_regression_model_radio = gr.Radio(visible=False)
1716
+ naive_bayes_classification_model_radio = gr.Radio(visible=False)
1717
  model_optimize_radio = gr.Radio(visible=False)
1718
  model_train_button = gr.Button(visible=False)
1719
  model_train_checkbox = gr.Checkbox(visible=False)
1720
+ model_train_params_dataframe = gr.Dataframe(visible=False)
1721
+ model_train_metrics_dataframe = gr.Dataframe(visible=False)
1722
 
1723
  # 可视化
1724
  with gr.Accordion("数据可视化"):
1725
+ with gr.Tab("数据分布图"):
1726
+ data_distribution_radio = gr.Radio(visible=False)
1727
+ data_distribution_is_rotate = gr.Checkbox(visible=False)
1728
+ data_distribution_button = gr.Button(visible=False)
1729
+
1730
+ with gr.Tab("箱线统计图"):
1731
+ descriptive_indicators_checkboxgroup = gr.Checkboxgroup(visible=False)
1732
+ descriptive_indicators_is_rotate = gr.Checkbox(visible=False)
1733
+ descriptive_indicators_button = gr.Button(visible=False)
1734
+ descriptive_indicators_dataframe = gr.Dataframe(visible=False)
1735
+
1736
+ with gr.Tab("系数热力图"):
1737
+ heatmap_checkboxgroup = gr.Checkboxgroup(visible=False)
1738
+ heatmap_is_rotate = gr.Checkbox(visible=False)
1739
+ heatmap_button = gr.Button(visible=False)
1740
+
1741
+ # with gr.Tab("主成分分析"):
1742
+ # pca_button = gr.Button(visible=False)
1743
+ # pca_replace_data_button = gr.Button(visible=False)
1744
+
1745
+
1746
  with gr.Tab("学习曲线图"):
1747
  learning_curve_checkboxgroup = gr.Checkboxgroup(visible=False)
1748
+ learning_curve_button = gr.Button(visible=False)
1749
+
1750
+ with gr.Tab("数据拟合图"):
1751
+ data_fit_checkboxgroup = gr.Checkboxgroup(visible=False)
1752
+ data_fit_button = gr.Button(visible=False)
1753
 
1754
+ with gr.Tab("特征蜂群图"):
1755
  shap_beeswarm_radio = gr.Radio(visible=False)
1756
+ shap_beeswarm_type = gr.Radio(visible=False)
1757
  shap_beeswarm_button = gr.Button(visible=False)
1758
 
1759
+ with gr.Tab("特征瀑布图"):
1760
+ waterfall_radio = gr.Radio(visible=False)
1761
+ waterfall_number = gr.Slider(visible=False)
1762
+ waterfall_button = gr.Button(visible=False)
1763
+
1764
+ with gr.Tab("特征力图"):
1765
+ force_radio = gr.Radio(visible=False)
1766
+ force_number = gr.Slider(visible=False)
1767
+ force_button = gr.Button(visible=False)
1768
+
1769
+ with gr.Tab("特征依赖图"):
1770
+ dependence_radio = gr.Radio(visible=False)
1771
+ dependence_col = gr.Radio(visible=False)
1772
+ dependence_button = gr.Button(visible=False)
1773
+
1774
  legend_labels_textboxs = []
1775
  with gr.Accordion("图例"):
1776
  with gr.Row():
 
1799
  draw_plot = gr.Plot(visible=False)
1800
  draw_file = gr.File(visible=False)
1801
 
1802
+ with gr.Tab("文字说明"):
1803
+ notes = gr.Markdown(Dataset.get_notes(), visible=True)
1804
+
1805
  '''
1806
  监听事件
1807
  '''
 
1833
 
1834
  # 数据模型
1835
  select_as_model_radio.change(fn=select_as_model, inputs=[select_as_model_radio], outputs=get_outputs())
1836
+
1837
+ # [模型]
1838
+ model_train_button.click(fn=train_model, inputs=[model_optimize_radio, linear_regression_model_radio, naive_bayes_classification_model_radio], outputs=get_outputs())
1839
+
1840
+ # [绘图]
1841
 
1842
  # 可视化
1843
+ data_distribution_button.click(fn=data_distribution_first_draw_plot, inputs=[data_distribution_radio] + [data_distribution_is_rotate], outputs=get_outputs())
1844
+ descriptive_indicators_button.click(fn=descriptive_indicators_first_draw_plot, inputs=[descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup], outputs=get_outputs())
1845
+ heatmap_button.click(fn=heatmap_first_draw_plot, inputs=[heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1846
+ learning_curve_button.click(fn=learning_curve_first_draw_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
1847
+ shap_beeswarm_button.click(fn=shap_beeswarm_first_draw_plot, inputs=[shap_beeswarm_radio] + [shap_beeswarm_type], outputs=get_outputs())
1848
+ data_fit_button.click(fn=data_fit_first_draw_plot, inputs=[data_fit_checkboxgroup], outputs=get_outputs())
1849
+ waterfall_button.click(fn=waterfall_first_draw_plot, inputs=[waterfall_radio] + [waterfall_number], outputs=get_outputs())
1850
+ force_button.click(fn=force_first_draw_plot, inputs=[force_radio] + [force_number], outputs=get_outputs())
1851
+ dependence_button.click(fn=dependence_first_draw_plot, inputs=[dependence_radio] + [dependence_col], outputs=get_outputs())
1852
 
1853
  title_name_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1854
+ + [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
1855
+ + [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
1856
+ + [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1857
+
1858
  x_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1859
+ + [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
1860
+ + [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
1861
+ + [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1862
+
1863
  y_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1864
+ + [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
1865
+ + [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
1866
+ + [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1867
+
1868
  for i in range(StaticValue.max_num):
1869
  colorpickers[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1870
+ + [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
1871
+ + [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
1872
+ + [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1873
+
1874
  color_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + color_textboxs + legend_labels_textboxs
1875
+ + [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
1876
+ + [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
1877
+ + [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1878
+
1879
  legend_labels_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
1880
+ + [learning_curve_checkboxgroup] + [shap_beeswarm_radio] + [shap_beeswarm_type] + [data_fit_checkboxgroup] + [waterfall_radio] + [waterfall_number]
1881
+ + [force_radio] + [force_number] + [dependence_radio] + [dependence_col] + [data_distribution_radio] + [data_distribution_is_rotate]
1882
+ + [descriptive_indicators_is_rotate] + [descriptive_indicators_checkboxgroup] + [heatmap_checkboxgroup] + [heatmap_is_rotate], outputs=get_outputs())
1883
 
1884
  if __name__ == "__main__":
1885
  demo.launch()
data/__init__.py ADDED
File without changes
data/fetch_california_housing.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/notes.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # EasyMachineLearning
2
+ ### 介绍
3
+ - 版本:v1.0
4
+ - 作者:李凌浩
5
+ - 有任何新功能的想法和已出现的问题请和作者联系 ~
6
+ - *( WX: llh13857750421 )*
7
+ ### 尚未实现的功能
8
+ 1. [困难] 模型训练的进度条可视化(sklearn模型训练函数无回调函数)
9
+ 2. 模型训练完毕后保存模型文件,后续可直接加载
10
+ 3. 数据分析AI助手(直接处理Excel数据)
11
+ 4. PCA主成分分析
12
+ 5. 聚类
metrics/calculate_classification_metrics.py CHANGED
@@ -5,20 +5,27 @@ from sklearn.preprocessing import label_binarize
5
  from visualization.draw_line_graph import draw_line_graph
6
 
7
 
8
- def calculate_classification_metrics(pred_data, real_data, model_name):
 
 
 
 
 
 
9
  info = {}
10
 
11
  real_data = np.round(real_data, 0).astype(int)
12
  pred_data = np.round(pred_data, 0).astype(int)
13
 
14
  cur_confusion_matrix = confusion_matrix(real_data[:, 0], pred_data)
15
- info["Confusion matrix of "+model_name] = cur_confusion_matrix
16
 
17
- info["Accuracy of "+model_name] = np.sum(cur_confusion_matrix.diagonal()) / np.sum(cur_confusion_matrix)
18
- info["Precision of "+model_name] = cur_confusion_matrix.diagonal() / np.sum(cur_confusion_matrix, axis=1)
19
- info["Recall of "+model_name] = cur_confusion_matrix.diagonal() / np.sum(cur_confusion_matrix, axis=0)
20
- info["F1-score of "+model_name] = np.mean(2 * np.multiply(info["Precision of "+model_name], info["Recall of "+model_name]) / \
21
- (info["Precision of "+model_name] + info["Recall of "+model_name]))
 
22
 
23
  max_class = max(real_data)[0]
24
  min_class = min(real_data)[0]
@@ -29,7 +36,3 @@ def calculate_classification_metrics(pred_data, real_data, model_name):
29
  fpr, tpr, thresholds = roc_curve(real_data_[:, i], pred_data_[:, i])
30
  # draw_line_graph(fpr, tpr, "ROC curve with AUC={:.2f}".format(auc(fpr, tpr)))
31
 
32
- info["AUC of "+model_name] = roc_auc_score(real_data_, pred_data_)
33
-
34
- return info
35
-
 
5
  from visualization.draw_line_graph import draw_line_graph
6
 
7
 
8
+ class ClassificationMetrics:
9
+ @classmethod
10
+ def get_metrics(cls):
11
+ return ["Accuracy", "Precision", "Recall", "F1-score"]
12
+
13
+
14
+ def calculate_classification_metrics(pred_data, real_data):
15
  info = {}
16
 
17
  real_data = np.round(real_data, 0).astype(int)
18
  pred_data = np.round(pred_data, 0).astype(int)
19
 
20
  cur_confusion_matrix = confusion_matrix(real_data[:, 0], pred_data)
21
+ info["Confusion matrix"] = cur_confusion_matrix
22
 
23
+ info["Accuracy"] = np.sum(cur_confusion_matrix.diagonal()) / np.sum(cur_confusion_matrix)
24
+ info["Precision"] = cur_confusion_matrix.diagonal() / np.sum(cur_confusion_matrix, axis=1)
25
+ info["Recall"] = cur_confusion_matrix.diagonal() / np.sum(cur_confusion_matrix, axis=0)
26
+ info["F1-score"] = np.mean(2 * np.multiply(info["Precision"], info["Recall"]) / (info["Precision"] + info["Recall"]))
27
+
28
+ return info
29
 
30
  max_class = max(real_data)[0]
31
  min_class = min(real_data)[0]
 
36
  fpr, tpr, thresholds = roc_curve(real_data_[:, i], pred_data_[:, i])
37
  # draw_line_graph(fpr, tpr, "ROC curve with AUC={:.2f}".format(auc(fpr, tpr)))
38
 
 
 
 
 
metrics/calculate_regression_metrics.py CHANGED
@@ -2,44 +2,28 @@ import numpy as np
2
  from sklearn.metrics import *
3
 
4
 
5
- def calculate_ar2(real_data, pred_data):
6
- model_name = "a"
7
- info = {}
8
-
9
- info["MAE of "+model_name] = mean_absolute_error(real_data, pred_data)
10
- # mae = mean_absolute_error(real_data, pred_data)
11
- info["MSE of "+model_name] = mean_squared_error(real_data, pred_data)
12
- # mse = mean_squared_error(real_data, pred_data)
13
- info["RSME of "+model_name] = np.sqrt(info["MSE of "+model_name])
14
- # rsme = np.sqrt(info["MSE of "+model_name])
15
- info["R-Sqaure of "+model_name] = r2_score(real_data, pred_data)
16
- # r2 = r2_score(real_data, pred_data)
17
- if isinstance(max(real_data), np.ndarray):
18
- info["Adjusted R-Square of " + model_name] = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
19
- # ar2 = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
20
- else:
21
- info["Adjusted R-Square of " + model_name] = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
22
- # ar2 = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
23
-
24
- return info["Adjusted R-Square of " + model_name]
25
 
26
 
27
- def calculate_regression_metrics(pred_data, real_data, model_name):
28
  info = {}
29
 
30
- info["MAE of "+model_name] = mean_absolute_error(real_data, pred_data)
31
  # mae = mean_absolute_error(real_data, pred_data)
32
- info["MSE of "+model_name] = mean_squared_error(real_data, pred_data)
33
  # mse = mean_squared_error(real_data, pred_data)
34
- info["RSME of "+model_name] = np.sqrt(info["MSE of "+model_name])
35
  # rsme = np.sqrt(info["MSE of "+model_name])
36
- info["R-Sqaure of "+model_name] = r2_score(real_data, pred_data)
37
  # r2 = r2_score(real_data, pred_data)
38
  if isinstance(max(real_data), np.ndarray):
39
- info["Adjusted R-Square of " + model_name] = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
40
  # ar2 = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
41
  else:
42
- info["Adjusted R-Square of " + model_name] = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
43
  # ar2 = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
44
 
45
  return info
 
2
  from sklearn.metrics import *
3
 
4
 
5
+ class RegressionMetrics:
6
+ @classmethod
7
+ def get_metrics(cls):
8
+ return ["MAE", "MSE", "RSME", "R-Sqaure", "Adjusted R-Square"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
+ def calculate_regression_metrics(pred_data, real_data):
12
  info = {}
13
 
14
+ info["MAE"] = mean_absolute_error(real_data, pred_data)
15
  # mae = mean_absolute_error(real_data, pred_data)
16
+ info["MSE"] = mean_squared_error(real_data, pred_data)
17
  # mse = mean_squared_error(real_data, pred_data)
18
+ info["RSME"] = np.sqrt(info["MSE"])
19
  # rsme = np.sqrt(info["MSE of "+model_name])
20
+ info["R-Sqaure"] = r2_score(real_data, pred_data)
21
  # r2 = r2_score(real_data, pred_data)
22
  if isinstance(max(real_data), np.ndarray):
23
+ info["Adjusted R-Square"] = 1 - (1 - info["R-Sqaure"]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
24
  # ar2 = 1 - (1 - info["R-Sqaure of "+model_name]) * (len(pred_data)-1) / (len(pred_data)-max(real_data)[0]-1)
25
  else:
26
+ info["Adjusted R-Square"] = 1 - (1 - info["R-Sqaure"]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
27
  # ar2 = 1 - (1 - info["R-Sqaure of " + model_name]) * (len(pred_data) - 1) / (len(pred_data) - max(real_data) - 1)
28
 
29
  return info
requirements.txt CHANGED
@@ -1,13 +1,15 @@
1
  numpy~=1.23.5
2
  pandas~=1.5.3
3
- scikit-learn~=1.2.1
4
  hmmlearn~=0.3.0
5
  matplotlib~=3.7.0
6
  scikit-fuzzy~=0.4.2
7
  gradio~=4.17.0
 
8
  networkx~=2.8.4
9
  scipy~=1.10.0
 
10
  xgboost~=2.0.3
 
11
  tqdm~=4.64.1
12
- shap~=0.44.1
13
- scikit-optimize~=0.9.0
 
1
  numpy~=1.23.5
2
  pandas~=1.5.3
3
+ scikit-learn~=1.4.1.post1
4
  hmmlearn~=0.3.0
5
  matplotlib~=3.7.0
6
  scikit-fuzzy~=0.4.2
7
  gradio~=4.17.0
8
+ shap~=0.44.1
9
  networkx~=2.8.4
10
  scipy~=1.10.0
11
+ lightgbm~=4.3.0
12
  xgboost~=2.0.3
13
+ torch~=2.2.0+cu118
14
  tqdm~=4.64.1
15
+ scikit-optimize~=0.9.0
 
static/config.py CHANGED
@@ -1,8 +1,31 @@
1
  class Config:
2
  # 随机种子
3
  RANDOM_STATE = 123
 
 
 
 
4
  # 绘图颜色组
 
 
5
  COLORS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  "#8074C8",
7
  "#7895C1",
8
  "#A8CBDF",
@@ -11,44 +34,103 @@ class Config:
11
  "#E3625D",
12
  "#EF8B67",
13
  "#F0C284"
14
- ]
15
 
 
 
 
 
 
 
 
 
16
 
 
 
 
 
 
 
 
 
 
17
 
18
- COLORS_1 = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "#91CCC0",
20
  "#7FABD1",
21
  "#F7AC53",
22
  "#EC6E66",
23
  "#B5CE4E",
24
  "#BD7795",
25
- "#B55384",
26
- "#474769",
27
- "#257D88",
28
- "#ED8D5A",
29
- "#BFDFD2",
30
- "#EFCE87"
31
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- COLORS_2 = [
34
- "#A21A54",
35
- "#E7724F",
36
- "#32183C"
37
- ]
38
 
39
- COLORS_3 = [
40
- "#ABD1BC",
41
- "#CCCC99",
42
- "#E3BBED"
43
- ]
44
 
45
 
46
- COLORS_4 = [
47
- "#CFCFD0",
48
- "#B6B3D6",
49
- "#F58F7A",
50
- "#E9687A",
51
- ]
52
 
53
- # 预测图展示的点个数
54
- DISPLAY_RANGE = 100
 
1
  class Config:
2
  # 随机种子
3
  RANDOM_STATE = 123
4
+
5
+ # 预测图展示的点个数
6
+ DISPLAY_RANGE = 100
7
+
8
  # 绘图颜色组
9
+ COLOR_ITER_NUM = 3
10
+
11
  COLORS = [
12
+ "#ca5353",
13
+ "#c874a5",
14
+ "#b674c8",
15
+ "#8274c8",
16
+ "#748dc8",
17
+ "#74acc8",
18
+ "#74c8b7",
19
+ "#74c88d",
20
+ "#a6c874",
21
+ "#e0e27e",
22
+ "#df9b77",
23
+ "#404040",
24
+ "#999999",
25
+ "#d4d4d4"
26
+ ] * COLOR_ITER_NUM
27
+
28
+ COLORS_0 = [
29
  "#8074C8",
30
  "#7895C1",
31
  "#A8CBDF",
 
34
  "#E3625D",
35
  "#EF8B67",
36
  "#F0C284"
37
+ ] * COLOR_ITER_NUM
38
 
39
+ COLORS_1 = [
40
+ "#4A5F7E",
41
+ "#719AAC",
42
+ "#72B063",
43
+ "#94C6CD",
44
+ "#B8DBB3",
45
+ "#E29135"
46
+ ] * COLOR_ITER_NUM
47
 
48
+ COLORS_2 = [
49
+ "#4485C7",
50
+ "#D4562E",
51
+ "#DBB428",
52
+ "#682487",
53
+ "#84BA42",
54
+ "#7ABBDB",
55
+ "#A51C36"
56
+ ] * COLOR_ITER_NUM
57
 
58
+ COLORS_3 = [
59
+ "#8074C8",
60
+ "#7895C1",
61
+ "#A8CBDF",
62
+ "#F5EBAE",
63
+ "#F0C284",
64
+ "#EF8B67",
65
+ "#E3625D",
66
+ "#B54764"
67
+ ] * COLOR_ITER_NUM
68
+
69
+ COLORS_4 = [
70
+ "#979998",
71
+ "#C69287",
72
+ "#E79A90",
73
+ "#EFBC91",
74
+ "#E4CD87",
75
+ "#FAE5BB",
76
+ "#DDDDDF"
77
+ ] * COLOR_ITER_NUM
78
+
79
+ COLORS_5 = [
80
  "#91CCC0",
81
  "#7FABD1",
82
  "#F7AC53",
83
  "#EC6E66",
84
  "#B5CE4E",
85
  "#BD7795",
86
+ "#7C7979"
87
+ ] * COLOR_ITER_NUM
88
+
89
+ COLORS_6 = [
90
+ "#E9687A",
91
+ "#F58F7A",
92
+ "#FDE2D8",
93
+ "#CFCFD0",
94
+ "#B6B3D6"
95
+ ] * COLOR_ITER_NUM
96
+
97
+ JS_0 = """
98
+ function createGradioAnimation() {
99
+ var container = document.createElement('div');
100
+ container.id = 'gradio-animation';
101
+ container.style.fontSize = '2em';
102
+ container.style.fontWeight = 'bold';
103
+ container.style.textAlign = 'center';
104
+ container.style.marginBottom = '20px';
105
+
106
+ var text = 'Welcome to EasyMachineLearning!';
107
+ for (var i = 0; i < text.length; i++) {
108
+ (function(i){
109
+ setTimeout(function(){
110
+ var letter = document.createElement('span');
111
+ letter.style.opacity = '0';
112
+ letter.style.transition = 'opacity 0.5s';
113
+ letter.innerText = text[i];
114
+
115
+ container.appendChild(letter);
116
+
117
+ setTimeout(function() {
118
+ letter.style.opacity = '1';
119
+ }, 50);
120
+ }, i * 250);
121
+ })(i);
122
+ }
123
+
124
+ var gradioContainer = document.querySelector('.gradio-container');
125
+ gradioContainer.insertBefore(container, gradioContainer.firstChild);
126
+
127
+ return 'Animation created';
128
+ }
129
+ """
130
+
131
+
132
 
 
 
 
 
 
133
 
 
 
 
 
 
134
 
135
 
 
 
 
 
 
 
136
 
 
 
static/new_class.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Container:
2
+ def __init__(self, x_train=None, y_train=None, x_test=None, y_test=None, hyper_params_optimize=None):
3
+ self.x_train = x_train
4
+ self.y_train = y_train
5
+ self.x_test = x_test
6
+ self.y_test = y_test
7
+ self.hyper_params_optimize = hyper_params_optimize
8
+ self.info = {"参数": {}, "指标": {}}
9
+ self.y_pred = None
10
+ self.train_sizes = None
11
+ self.train_scores_mean = None
12
+ self.train_scores_std = None
13
+ self.test_scores_mean = None
14
+ self.test_scores_std = None
15
+ self.status = None
16
+ self.model = None
17
+
18
+ def get_info(self):
19
+ return self.info
20
+
21
+ def set_info(self, info: dict):
22
+ self.info = info
23
+
24
+ def set_y_pred(self, y_pred):
25
+ self.y_pred = y_pred
26
+
27
+ def get_data_fit_values(self):
28
+ return [
29
+ self.y_pred,
30
+ self.y_test
31
+ ]
32
+
33
+ def get_learning_curve_values(self):
34
+ return [
35
+ self.train_sizes,
36
+ self.train_scores_mean,
37
+ self.train_scores_std,
38
+ self.test_scores_mean,
39
+ self.test_scores_std
40
+ ]
41
+
42
+ def set_learning_curve_values(self, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
43
+ self.train_sizes = train_sizes
44
+ self.train_scores_mean = train_scores_mean
45
+ self.train_scores_std = train_scores_std
46
+ self.test_scores_mean = test_scores_mean
47
+ self.test_scores_std = test_scores_std
48
+
49
+ def get_status(self):
50
+ return self.status
51
+
52
+ def set_status(self, status: str):
53
+ self.status = status
54
+
55
+ def get_model(self):
56
+ return self.model
57
+
58
+ def set_model(self, model):
59
+ self.model = model
60
+
61
+
62
+ class PaintObject:
63
+ def __init__(self):
64
+ self.color_cur_num = 0
65
+ self.color_cur_list = []
66
+ self.label_cur_num = 0
67
+ self.label_cur_list = []
68
+ self.x_cur_label = ""
69
+ self.y_cur_label = ""
70
+ self.name = ""
71
+
72
+ def get_color_cur_num(self):
73
+ return self.color_cur_num
74
+
75
+ def set_color_cur_num(self, color_cur_num):
76
+ self.color_cur_num = color_cur_num
77
+
78
+ def get_color_cur_list(self):
79
+ return self.color_cur_list
80
+
81
+ def set_color_cur_list(self, color_cur_list):
82
+ self.color_cur_list = color_cur_list
83
+
84
+ def get_label_cur_num(self):
85
+ return self.label_cur_num
86
+
87
+ def set_label_cur_num(self, label_cur_num):
88
+ self.label_cur_num = label_cur_num
89
+
90
+ def get_label_cur_list(self):
91
+ return self.label_cur_list
92
+
93
+ def set_label_cur_list(self, label_cur_list):
94
+ self.label_cur_list = label_cur_list
95
+
96
+ def get_x_cur_label(self):
97
+ return self.x_cur_label
98
+
99
+ def set_x_cur_label(self, x_cur_label):
100
+ self.x_cur_label = x_cur_label
101
+
102
+ def get_y_cur_label(self):
103
+ return self.y_cur_label
104
+
105
+ def set_y_cur_label(self, y_cur_label):
106
+ self.y_cur_label = y_cur_label
107
+
108
+ def get_name(self):
109
+ return self.name
110
+
111
+ def set_name(self, name):
112
+ self.name = name
113
+
114
+
115
+ class SelectModel:
116
+ def __init__(self):
117
+ self.models = None
118
+ self.waterfall_number = None
119
+ self.force_number = None
120
+ self.beeswarm_plot_type = None
121
+ self.dependence_col = None
122
+ self.data_distribution_col = None
123
+ self.data_distribution_is_rotate = None
124
+ self.descriptive_indicators_col = None
125
+ self.descriptive_indicators_is_rotate = None
126
+ self.heatmap_col = None
127
+ self.heatmap_is_rotate = None
128
+
129
+ def get_heatmap_col(self):
130
+ return self.heatmap_col
131
+
132
+ def set_heatmap_col(self, heatmap_col):
133
+ self.heatmap_col = heatmap_col
134
+
135
+ def get_heatmap_is_rotate(self):
136
+ return self.heatmap_is_rotate
137
+
138
+ def set_heatmap_is_rotate(self, heatmap_is_rotate):
139
+ self.heatmap_is_rotate = heatmap_is_rotate
140
+
141
+ def get_models(self):
142
+ return self.models
143
+
144
+ def set_models(self, models):
145
+ self.models = models
146
+
147
+ def get_waterfall_number(self):
148
+ return self.waterfall_number
149
+
150
+ def set_waterfall_number(self, waterfall_number):
151
+ self.waterfall_number = waterfall_number
152
+
153
+ def get_force_number(self):
154
+ return self.force_number
155
+
156
+ def set_force_number(self, force_number):
157
+ self.force_number = force_number
158
+
159
+ def get_beeswarm_plot_type(self):
160
+ return self.beeswarm_plot_type
161
+
162
+ def set_beeswarm_plot_type(self, beeswarm_plot_type):
163
+ self.beeswarm_plot_type = beeswarm_plot_type
164
+
165
+ def get_dependence_col(self):
166
+ return self.dependence_col
167
+
168
+ def set_dependence_col(self, dependence_col):
169
+ self.dependence_col = dependence_col
170
+
171
+ def get_data_distribution_col(self):
172
+ return self.data_distribution_col
173
+
174
+ def set_data_distribution_col(self, data_distribution_col):
175
+ self.data_distribution_col = data_distribution_col
176
+
177
+ def get_data_distribution_is_rotate(self):
178
+ return self.data_distribution_is_rotate
179
+
180
+ def set_data_distribution_is_rotate(self, data_distribution_is_rotate):
181
+ self.data_distribution_is_rotate = data_distribution_is_rotate
182
+
183
+ def get_descriptive_indicators_is_rotate(self):
184
+ return self.descriptive_indicators_is_rotate
185
+
186
+ def set_descriptive_indicators_is_rotate(self, descriptive_indicators_is_rotate):
187
+ self.descriptive_indicators_is_rotate = descriptive_indicators_is_rotate
188
+
189
+ def get_descriptive_indicators_col(self):
190
+ return self.descriptive_indicators_col
191
+
192
+ def set_descriptive_indicators_col(self, descriptive_indicators_col):
193
+ self.descriptive_indicators_col = descriptive_indicators_col
194
+
195
+
static/process.py CHANGED
@@ -7,10 +7,10 @@ from skopt import BayesSearchCV
7
  import copy
8
  import pandas as pd
9
  from scipy.stats import spearmanr
 
 
10
 
11
- from sklearn.datasets import load_iris
12
- from sklearn.datasets import load_wine
13
- from sklearn.datasets import load_breast_cancer
14
  from scipy.linalg import eig
15
 
16
  from static.config import Config
@@ -175,20 +175,36 @@ def choose_y_col_in_dataframe(df: pd.DataFrame, y_col: str):
175
 
176
 
177
  def load_data(sort):
 
178
  if sort == "Iris Dataset":
179
  sk_data = load_iris()
 
180
  elif sort == "Wine Dataset":
181
  sk_data = load_wine()
 
182
  elif sort == "Breast Cancer Dataset":
183
  sk_data = load_breast_cancer()
 
 
 
 
 
 
 
 
 
 
184
 
185
- target_data = sk_data.target.astype(str)
186
- for i in range(len(sk_data.target_names)):
187
- target_data = np.where(target_data == str(i), sk_data.target_names[i], target_data)
 
 
 
188
 
189
- sk_feature_names = sk_data.feature_names
 
190
  sk_data = np.concatenate((target_data.reshape(-1, 1), sk_data.data), axis=1)
191
- sk_feature_names = np.insert(sk_feature_names, 0, "species")
192
 
193
  df = pd.DataFrame(data=sk_data, columns=sk_feature_names)
194
 
@@ -283,10 +299,7 @@ def k_fold_cross_validation_data_segmentation(x_train, y_train):
283
  def grid_search(params, model, x_train, y_train, scoring=None):
284
  info = {}
285
 
286
- if scoring == "neg_mean_squared_error":
287
- grid_search_model = GridSearchCV(model, params, cv=5, scoring="neg_mean_squared_error")
288
- else:
289
- grid_search_model = GridSearchCV(model, params, cv=5)
290
 
291
  grid_search_model.fit(x_train, y_train.ravel())
292
 
@@ -300,10 +313,7 @@ def grid_search(params, model, x_train, y_train, scoring=None):
300
  def bayes_search(params, model, x_train, y_train, scoring=None):
301
  info = {}
302
 
303
- if scoring == "neg_mean_squared_error":
304
- bayes_search_model = BayesSearchCV(model, params, cv=5, n_iter=50, scoring="neg_mean_squared_error")
305
- else:
306
- bayes_search_model = BayesSearchCV(model, params, cv=5, n_iter=50)
307
 
308
  bayes_search_model.fit(x_train, y_train)
309
 
 
7
  import copy
8
  import pandas as pd
9
  from scipy.stats import spearmanr
10
+ from io import StringIO
11
+ from contextlib import redirect_stdout
12
 
13
+ from sklearn.datasets import load_iris, load_wine, load_breast_cancer, load_diabetes
 
 
14
  from scipy.linalg import eig
15
 
16
  from static.config import Config
 
175
 
176
 
177
  def load_data(sort):
178
+ type = ""
179
  if sort == "Iris Dataset":
180
  sk_data = load_iris()
181
+ type = "classification"
182
  elif sort == "Wine Dataset":
183
  sk_data = load_wine()
184
+ type = "classification"
185
  elif sort == "Breast Cancer Dataset":
186
  sk_data = load_breast_cancer()
187
+ type = "classification"
188
+ elif sort == "Diabetes Dataset":
189
+ sk_data = load_diabetes()
190
+ type = "regression"
191
+ elif sort == "California Housing Dataset":
192
+ df = pd.read_csv("./data/fetch_california_housing.csv")
193
+ return df
194
+ else:
195
+ sk_data = load_iris()
196
+ type = "classification"
197
 
198
+ if type == "classification":
199
+ target_data = sk_data.target.astype(str)
200
+ for i in range(len(sk_data.target_names)):
201
+ target_data = np.where(target_data == str(i), sk_data.target_names[i], target_data)
202
+ else:
203
+ target_data = sk_data.target
204
 
205
+ feature_names = sk_data.feature_names
206
+ sk_feature_names = ["target"] + feature_names.tolist() if isinstance(feature_names, np.ndarray) else ["target"] + feature_names
207
  sk_data = np.concatenate((target_data.reshape(-1, 1), sk_data.data), axis=1)
 
208
 
209
  df = pd.DataFrame(data=sk_data, columns=sk_feature_names)
210
 
 
299
  def grid_search(params, model, x_train, y_train, scoring=None):
300
  info = {}
301
 
302
+ grid_search_model = GridSearchCV(model, params, cv=3, n_jobs=-1)
 
 
 
303
 
304
  grid_search_model.fit(x_train, y_train.ravel())
305
 
 
313
  def bayes_search(params, model, x_train, y_train, scoring=None):
314
  info = {}
315
 
316
+ bayes_search_model = BayesSearchCV(model, params, cv=3, n_iter=50, n_jobs=-1)
 
 
 
317
 
318
  bayes_search_model.fit(x_train, y_train)
319
 
visualization/draw_boxplot.py CHANGED
@@ -1,26 +1,33 @@
1
  import matplotlib.pyplot as plt
 
2
 
3
- from coding.llh.static.config import Config
4
 
5
 
6
- # draw boxplot
7
- def draw_boxplot(x_data, title):
8
- plt.figure(figsize=(10, 14))
9
  plt.grid(True)
10
 
11
  plt.boxplot(
12
  x_data,
13
  meanline=True,
14
  showmeans=True,
15
- medianprops={"color": Config.COLORS[0], "linewidth": 1.5},
16
- meanprops={"color": Config.COLORS[1], "ls": "--", "linewidth": 1.5},
17
- flierprops={"marker": "o", "markerfacecolor": Config.COLORS[2]},
18
  labels=x_data.columns.values
19
  )
20
 
21
- plt.xticks(rotation=-45)
22
- plt.title(title)
 
 
 
 
 
 
 
23
 
24
- plt.savefig("./diagram/{}.png".format(title), dpi=300)
25
 
26
- plt.show()
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
3
 
4
+ from static.config import Config
5
 
6
 
7
+ def draw_boxplot(x_data, paint_object, will_rotate=False):
8
+ plt.figure(figsize=(10, 8), dpi=300)
9
+
10
  plt.grid(True)
11
 
12
  plt.boxplot(
13
  x_data,
14
  meanline=True,
15
  showmeans=True,
16
+ medianprops={"color": paint_object.get_color_cur_list()[0], "linewidth": 1.5},
17
+ meanprops={"color": paint_object.get_color_cur_list()[1], "ls": "--", "linewidth": 1.5},
18
+ flierprops={"marker": "o", "markerfacecolor": paint_object.get_color_cur_list()[2]},
19
  labels=x_data.columns.values
20
  )
21
 
22
+ if will_rotate:
23
+ plt.xticks(rotation=-45)
24
+
25
+ plt.title(paint_object.get_name())
26
+
27
+ plt.xlabel(paint_object.get_x_cur_label())
28
+ plt.ylabel(paint_object.get_y_cur_label())
29
+
30
+ paint_object.set_color_cur_num(3)
31
 
32
+ return plt, paint_object
33
 
 
visualization/draw_data_fit_total.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from matplotlib import pyplot as plt
3
+
4
+ from static.new_class import PaintObject
5
+ from static.config import Config
6
+
7
+
8
+ def draw_data_fit_total(input_dict, paint_object: PaintObject):
9
+ plt.figure(figsize=(10, 6), dpi=300)
10
+
11
+ for i, input_dict_items in enumerate(input_dict.items()):
12
+ name, cur_list = input_dict_items
13
+
14
+ if i == len(input_dict.keys())-1:
15
+ final_list = cur_list
16
+
17
+ plt.plot(
18
+ np.array([x for x in range(len(cur_list[0]))]),
19
+ cur_list[0],
20
+ "-",
21
+ color=paint_object.get_color_cur_list()[i],
22
+ alpha=0.9,
23
+ label=paint_object.get_label_cur_list()[i]
24
+ )
25
+
26
+ plt.plot(
27
+ np.array([x for x in range(len(final_list[1]))]),
28
+ final_list[1],
29
+ "--",
30
+ color=paint_object.get_color_cur_list()[len(input_dict.keys())],
31
+ alpha=0.9,
32
+ label=paint_object.get_label_cur_list()[len(input_dict.keys())]
33
+ )
34
+
35
+ plt.title(paint_object.get_name())
36
+
37
+ plt.xlabel(paint_object.get_x_cur_label())
38
+ plt.ylabel(paint_object.get_y_cur_label())
39
+ plt.legend()
40
+
41
+ # plt.savefig("./diagram/{}.png".format(title), dpi=300)
42
+ # plt.show()
43
+
44
+ paint_object.set_color_cur_num(len(input_dict.values())+1)
45
+ paint_object.set_label_cur_num(len(input_dict.values())+1)
46
+
47
+ return plt, paint_object
48
+
visualization/draw_heat_map.py CHANGED
@@ -2,16 +2,13 @@ import numpy as np
2
  import matplotlib.pyplot as plt
3
  import pandas as pd
4
 
5
- from coding.llh.static.config import Config
6
 
7
 
8
- # Draw heat map
9
- def draw_heat_map(x_data, title, is_rotate, col_name):
10
- # col_name = np.delete(col_name, np.where(col_name == "swing"))
11
-
12
  plt.rcParams.update({'figure.autolayout': True})
13
 
14
- plt.figure(figsize=(16, 16))
15
 
16
  if isinstance(x_data, np.ndarray):
17
  np_data = np.around(x_data.astype("float64"), 2)
@@ -24,17 +21,22 @@ def draw_heat_map(x_data, title, is_rotate, col_name):
24
  for j in range(np_data.shape[1]):
25
  plt.text(j, i, np_data[i, j], ha="center", va="center", color="w")
26
 
27
- if is_rotate:
28
- plt.xticks(np.arange(len(pd_data.columns.values)), col_name, rotation=-90)
29
  else:
30
- plt.xticks(np.arange(len(pd_data.columns.values)), col_name)
31
 
32
- plt.yticks(np.arange(len(pd_data.index.values)), col_name)
33
  plt.imshow(np_data)
34
- # plt.colorbar(False)
35
  plt.tight_layout()
36
- # plt.title(title)
37
 
38
- plt.savefig("./diagram/{}.png".format(title), dpi=300)
 
 
 
 
 
 
 
39
 
40
- plt.show()
 
2
  import matplotlib.pyplot as plt
3
  import pandas as pd
4
 
5
+ from static.config import Config
6
 
7
 
8
+ def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
 
 
 
9
  plt.rcParams.update({'figure.autolayout': True})
10
 
11
+ plt.figure(figsize=(10, 8), dpi=300)
12
 
13
  if isinstance(x_data, np.ndarray):
14
  np_data = np.around(x_data.astype("float64"), 2)
 
21
  for j in range(np_data.shape[1]):
22
  plt.text(j, i, np_data[i, j], ha="center", va="center", color="w")
23
 
24
+ if will_rotate:
25
+ plt.xticks(np.arange(len(col_list)), col_list, rotation=-90)
26
  else:
27
+ plt.xticks(np.arange(len(col_list)), col_list)
28
 
29
+ plt.yticks(np.arange(len(col_list)), col_list)
30
  plt.imshow(np_data)
31
+ plt.colorbar(True)
32
  plt.tight_layout()
 
33
 
34
+ plt.title(paint_object.get_name())
35
+
36
+ plt.xlabel(paint_object.get_x_cur_label())
37
+ plt.ylabel(paint_object.get_y_cur_label())
38
+
39
+ paint_object.set_color_cur_num(0)
40
+
41
+ return plt, paint_object
42
 
 
visualization/draw_histogram.py CHANGED
@@ -1,26 +1,27 @@
 
 
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
 
4
- from coding.llh.static.config import Config
 
5
 
6
 
7
- # Plot bar charts
8
- def draw_histogram(x_data, y_data, will_rotate, will_show_text, title):
9
- fig, ax = plt.subplots(figsize=(10, 8))
10
 
11
  bars = plt.bar(
12
- np.arange(0, len(x_data)),
13
- x_data,
14
  align="center",
15
  alpha=1,
16
- color=Config.COLORS,
17
- tick_label=y_data
18
  )
19
 
20
- # Bar annotation
21
  if will_show_text:
22
  for bar in bars:
23
- ax.annotate(
24
  str(bar.get_height()),
25
  xy=(bar.get_x() + bar.get_width() / 2,
26
  bar.get_height()),
@@ -31,10 +32,14 @@ def draw_histogram(x_data, y_data, will_rotate, will_show_text, title):
31
  )
32
 
33
  if will_rotate:
34
- plt.xticks(rotation=-90)
 
 
 
 
 
35
 
36
- plt.title(title)
37
 
38
- plt.savefig("./diagram/{}.png".format(title), dpi=300)
39
 
40
- plt.show()
 
1
+ import random
2
+
3
  import numpy as np
4
  import matplotlib.pyplot as plt
5
 
6
+ from static.config import Config
7
+ from static.new_class import PaintObject
8
 
9
 
10
+ def draw_histogram(nums, labels, paint_object, will_rotate=False, will_show_text=True):
11
+ plt.figure(figsize=(10, 8), dpi=300)
 
12
 
13
  bars = plt.bar(
14
+ np.arange(0, len(nums)),
15
+ nums,
16
  align="center",
17
  alpha=1,
18
+ color=paint_object.get_color_cur_list()[0],
19
+ tick_label=labels
20
  )
21
 
 
22
  if will_show_text:
23
  for bar in bars:
24
+ plt.annotate(
25
  str(bar.get_height()),
26
  xy=(bar.get_x() + bar.get_width() / 2,
27
  bar.get_height()),
 
32
  )
33
 
34
  if will_rotate:
35
+ plt.xticks(rotation=-45)
36
+
37
+ plt.title(paint_object.get_name())
38
+
39
+ plt.xlabel(paint_object.get_x_cur_label())
40
+ plt.ylabel(paint_object.get_y_cur_label())
41
 
42
+ paint_object.set_color_cur_num(1)
43
 
44
+ return plt, paint_object
45
 
 
visualization/draw_histogram_line_subgraph.py CHANGED
@@ -1,7 +1,7 @@
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
- from coding.llh.static.config import Config
5
 
6
 
7
  def draw_histogram_line_subgraph(total_data_for_plot):
 
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
+ from static.config import Config
5
 
6
 
7
  def draw_histogram_line_subgraph(total_data_for_plot):
visualization/draw_learning_curve_total.py CHANGED
@@ -1,59 +1,48 @@
1
- import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
- from static.paint import PaintObject
5
- from static.config import Config
6
-
7
-
8
- def draw_learning_curve_total(input_dict, type, paint_object: PaintObject):
9
- plt.figure(figsize=(10, 6), dpi=300)
10
-
11
- if type == "train":
12
- for i, values in enumerate(input_dict.values()):
13
- train_sizes = values[0]
14
- train_scores_mean = values[1]
15
- train_scores_std = values[2]
16
- test_scores_mean = values[3]
17
- test_scores_std = values[4]
18
-
19
- plt.fill_between(
20
- train_sizes,
21
- train_scores_mean - train_scores_std,
22
- train_scores_mean + train_scores_std,
23
- alpha=0.1,
24
- color=paint_object.get_color_cur_list()[i]
25
- )
26
-
27
- plt.plot(
28
- train_sizes,
29
- train_scores_mean,
30
- "o-",
31
- color=paint_object.get_color_cur_list()[i],
32
- label=paint_object.get_label_cur_list()[i]
33
- )
34
-
35
- else:
36
- for i, values in enumerate(input_dict.values()):
37
- train_sizes = values[0]
38
- train_scores_mean = values[1]
39
- train_scores_std = values[2]
40
- test_scores_mean = values[3]
41
- test_scores_std = values[4]
42
-
43
- plt.fill_between(
44
- train_sizes,
45
- test_scores_mean - test_scores_std,
46
- test_scores_mean + test_scores_std,
47
- alpha=0.1,
48
- color=paint_object.get_color_cur_list()[i]
49
- )
50
- plt.plot(
51
- train_sizes,
52
- test_scores_mean,
53
- "o-",
54
- color=paint_object.get_color_cur_list()[i],
55
- label=paint_object.get_label_cur_list()[i]
56
- )
57
 
58
  plt.title(paint_object.get_name())
59
 
@@ -61,11 +50,8 @@ def draw_learning_curve_total(input_dict, type, paint_object: PaintObject):
61
  plt.ylabel(paint_object.get_y_cur_label())
62
  plt.legend()
63
 
64
- # plt.savefig("./diagram/{}.png".format(title), dpi=300)
65
- # plt.show()
66
-
67
- paint_object.set_color_cur_num(len(input_dict.keys()))
68
- paint_object.set_label_cur_num(len(input_dict.keys()))
69
 
70
  return plt, paint_object
71
 
 
 
1
  from matplotlib import pyplot as plt
2
 
3
+ from static.new_class import PaintObject
4
+
5
+
6
+ def draw_learning_curve_total(input_dict, paint_object: PaintObject):
7
+ plt.figure(figsize=(10, 8), dpi=300)
8
+
9
+ for i, values in enumerate(input_dict.values()):
10
+ train_sizes = values[0]
11
+ train_scores_mean = values[1]
12
+ train_scores_std = values[2]
13
+ test_scores_mean = values[3]
14
+ test_scores_std = values[4]
15
+
16
+ plt.fill_between(
17
+ train_sizes,
18
+ train_scores_mean - train_scores_std,
19
+ train_scores_mean + train_scores_std,
20
+ alpha=0.1,
21
+ color=paint_object.get_color_cur_list()[2*i]
22
+ )
23
+
24
+ plt.plot(
25
+ train_sizes,
26
+ train_scores_mean,
27
+ "o-",
28
+ color=paint_object.get_color_cur_list()[2*i],
29
+ label=paint_object.get_label_cur_list()[2*i]
30
+ )
31
+
32
+ plt.fill_between(
33
+ train_sizes,
34
+ test_scores_mean - test_scores_std,
35
+ test_scores_mean + test_scores_std,
36
+ alpha=0.1,
37
+ color=paint_object.get_color_cur_list()[2*i+1]
38
+ )
39
+ plt.plot(
40
+ train_sizes,
41
+ test_scores_mean,
42
+ "o-",
43
+ color=paint_object.get_color_cur_list()[2*i+1],
44
+ label=paint_object.get_label_cur_list()[2*i+1]
45
+ )
 
 
 
 
 
 
 
 
 
 
46
 
47
  plt.title(paint_object.get_name())
48
 
 
50
  plt.ylabel(paint_object.get_y_cur_label())
51
  plt.legend()
52
 
53
+ paint_object.set_color_cur_num(2*len(input_dict.values()))
54
+ paint_object.set_label_cur_num(2*len(input_dict.values()))
 
 
 
55
 
56
  return plt, paint_object
57
 
visualization/draw_line_graph.py CHANGED
@@ -4,37 +4,24 @@ import matplotlib.pyplot as plt
4
  from static.config import Config
5
 
6
 
7
- # draw line graph
8
- def draw_line_graph(x_data, y_data: list, title):
9
- plt.figure(figsize=(10, 8))
10
 
11
  plt.plot(
12
- x_data,
13
- y_data,
14
  "-o",
15
- color=Config.COLORS[0]
16
  )
17
 
18
- plt.title(title)
19
- plt.savefig("./diagram/{}.png".format(title), dpi=300)
20
 
21
- plt.show()
 
22
 
 
23
 
24
- def draw_line_graph_1(x_data, y_data: list, title, labels: list):
25
- plt.figure(figsize=(10, 8))
26
 
27
- for i, single_y_data in enumerate(y_data):
28
- plt.plot(
29
- x_data,
30
- single_y_data,
31
- "-o",
32
- color=Config.COLORS[i],
33
- label=labels[i]
34
- )
35
 
36
- plt.legend()
37
- plt.title(title)
38
- plt.savefig("./diagram/{}.png".format(title), dpi=300)
39
 
40
- plt.show()
 
4
  from static.config import Config
5
 
6
 
7
+ def draw_line_graph(nums, labels, paint_object):
8
+ plt.figure(figsize=(10, 8), dpi=300)
 
9
 
10
  plt.plot(
11
+ nums,
12
+ labels,
13
  "-o",
14
+ color=paint_object.get_color_cur_list()[0]
15
  )
16
 
17
+ plt.title(paint_object.get_name())
 
18
 
19
+ plt.xlabel(paint_object.get_x_cur_label())
20
+ plt.ylabel(paint_object.get_y_cur_label())
21
 
22
+ paint_object.set_color_cur_num(1)
23
 
24
+ return plt, paint_object
 
25
 
 
 
 
 
 
 
 
 
26
 
 
 
 
27
 
 
visualization/draw_pred_total.py CHANGED
@@ -7,30 +7,28 @@ from coding.llh.static.config import Config
7
  def draw_pred_total(input_dict):
8
  plt.figure(figsize=(10, 6))
9
 
10
- i = 0
11
- for name, cur_list in input_dict.items():
12
- mylist = cur_list
 
13
  plt.plot(
14
  np.array([x for x in range(len(cur_list[0]))]),
15
  cur_list[0],
16
  "-",
17
- color=Config.COLORS_4[i],
18
  alpha=0.9,
19
- label=name
20
  )
21
- i += 1
22
 
23
  plt.plot(
24
- np.array([x for x in range(len(mylist[1]))]),
25
- mylist[1],
26
  "--",
27
- color=Config.COLORS_4[1],
28
  alpha=0.9,
29
- label="actual data"
30
  )
31
 
32
- title = "pred curve"
33
-
34
  plt.xlabel("Sizes")
35
  plt.ylabel("Value")
36
  plt.legend()
 
7
  def draw_pred_total(input_dict):
8
  plt.figure(figsize=(10, 6))
9
 
10
+ for i, name, cur_list in enumerate(input_dict.items()):
11
+ if i == len(input_dict.keys())-1:
12
+ final_list = cur_list
13
+
14
  plt.plot(
15
  np.array([x for x in range(len(cur_list[0]))]),
16
  cur_list[0],
17
  "-",
18
+ color=paint_object.get_color_cur_list()[i],
19
  alpha=0.9,
20
+ label=paint_object.get_label_cur_list()[i]
21
  )
 
22
 
23
  plt.plot(
24
+ np.array([x for x in range(len(final_list[1]))]),
25
+ final_list[1],
26
  "--",
27
+ color=paint_object.get_color_cur_list()[len(input_dict.keys())],
28
  alpha=0.9,
29
+ label=paint_object.get_label_cur_list[len(input_dict.keys())]
30
  )
31
 
 
 
32
  plt.xlabel("Sizes")
33
  plt.ylabel("Value")
34
  plt.legend()
visualization/draw_scatter_line_graph.py CHANGED
@@ -1,7 +1,7 @@
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
 
4
- from coding.llh.static.config import Config
5
 
6
 
7
  # draw scatter line graph
 
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
 
4
+ from static.config import Config
5
 
6
 
7
  # draw scatter line graph