LLH commited on
Commit
8d94a86
·
1 Parent(s): 4a491db

2024/03/07/16:46

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ /programmer.md
2
+ /venv
3
+ /test
4
+ /old
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: EasyMachineLearning test
3
  emoji: 🔥
4
  colorFrom: red
5
  colorTo: red
 
1
  ---
2
+ title: EasyMachineLearning
3
  emoji: 🔥
4
  colorFrom: red
5
  colorTo: red
__init__.py ADDED
File without changes
analysis/model_train/__init__.py ADDED
File without changes
analysis/model_train/bayes_model.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.model_selection import learning_curve
3
+ from sklearn.naive_bayes import *
4
+ from analysis.others.hyperparam_optimize import *
5
+ from classes.static_custom_class import StaticValue
6
+ from functions.process import transform_params_list, get_values_from_container_class
7
+
8
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
9
+
10
+
11
+ class NaiveBayesClassifierParams:
12
+ @classmethod
13
+ def get_params_type(cls, sort):
14
+ if sort == "MultinomialNB":
15
+ return {
16
+ "alpha": StaticValue.FLOAT
17
+ }
18
+ elif sort == "GaussianNB":
19
+ return {}
20
+ elif sort == "ComplementNB":
21
+ return {
22
+ "alpha": StaticValue.FLOAT,
23
+ "fit_prior": StaticValue.BOOL,
24
+ "norm": StaticValue.BOOL
25
+ }
26
+
27
+ @classmethod
28
+ def get_params(cls, sort):
29
+ if sort == "MultinomialNB":
30
+ return {
31
+ "alpha": [0.1, 0.5, 1.0, 2.0]
32
+ }
33
+ elif sort == "GaussianNB":
34
+ return {}
35
+ elif sort == "ComplementNB":
36
+ return {
37
+ "alpha": [0.1, 0.5, 1, 10],
38
+ "fit_prior": [True, False],
39
+ "norm": [True, False]
40
+ }
41
+
42
+
43
+ # 朴素贝叶斯分类
44
+ def naive_bayes_classifier(container, params_list, model=None):
45
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
46
+ info = {}
47
+
48
+ params_list = transform_params_list(NaiveBayesClassifierParams, params_list, model)
49
+
50
+ if model == "MultinomialNB":
51
+ naive_bayes_model = MultinomialNB()
52
+ params = params_list
53
+ elif model == "GaussianNB":
54
+ naive_bayes_model = GaussianNB()
55
+ params = params_list
56
+ elif model == "ComplementNB":
57
+ naive_bayes_model = ComplementNB()
58
+ params = params_list
59
+ else:
60
+ naive_bayes_model = GaussianNB()
61
+ params = params_list
62
+
63
+ if hyper_params_optimize == "grid_search":
64
+ best_model = grid_search(params, naive_bayes_model, x_train, y_train)
65
+ elif hyper_params_optimize == "bayes_search":
66
+ best_model = bayes_search(params, naive_bayes_model, x_train, y_train)
67
+ else:
68
+ best_model = naive_bayes_model
69
+ best_model.fit(x_train, y_train)
70
+
71
+ info["参数"] = best_model.get_params()
72
+
73
+ y_pred = best_model.predict(x_test)
74
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
75
+ container.set_y_pred(y_pred)
76
+
77
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
78
+
79
+ train_scores_mean = np.mean(train_scores, axis=1)
80
+ train_scores_std = np.std(train_scores, axis=1)
81
+ test_scores_mean = np.mean(test_scores, axis=1)
82
+ test_scores_std = np.std(test_scores, axis=1)
83
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
84
+ test_scores_std)
85
+
86
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
87
+
88
+ container.set_info(info)
89
+ container.set_status("trained")
90
+ container.set_model(best_model)
91
+
92
+ return container
93
+
analysis/model_train/distance_model.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.model_selection import learning_curve
2
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
3
+
4
+ from analysis.others.shap_model import *
5
+ from classes.static_custom_class import StaticValue
6
+ from functions.process import get_values_from_container_class, transform_params_list
7
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
8
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
9
+ from analysis.others.hyperparam_optimize import *
10
+
11
+
12
+ class KNNClassifierParams:
13
+ @classmethod
14
+ def get_params_type(cls):
15
+ return {
16
+ "n_neighbors": StaticValue.INT,
17
+ "weights": StaticValue.STR,
18
+ "p": StaticValue.INT
19
+ }
20
+
21
+ @classmethod
22
+ def get_params(cls):
23
+ return {
24
+ "n_neighbors": [3, 5, 7, 9],
25
+ "weights": ['uniform', 'distance'],
26
+ "p": [1, 2]
27
+ }
28
+
29
+
30
+ # KNN分类
31
+ def knn_classifier(container, params_list):
32
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
33
+ info = {}
34
+
35
+ params_list = transform_params_list(KNNClassifierParams, params_list)
36
+
37
+ knn_classifier_model = KNeighborsClassifier()
38
+ params = params_list
39
+
40
+ if hyper_params_optimize == "grid_search":
41
+ best_model = grid_search(params, knn_classifier_model, x_train, y_train)
42
+ elif hyper_params_optimize == "bayes_search":
43
+ best_model = bayes_search(params, knn_classifier_model, x_train, y_train)
44
+ else:
45
+ best_model = knn_classifier_model
46
+ best_model.fit(x_train, y_train)
47
+
48
+ info["参数"] = best_model.get_params()
49
+
50
+ y_pred = best_model.predict(x_test)
51
+ container.set_y_pred(y_pred)
52
+
53
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
54
+
55
+ train_scores_mean = np.mean(train_scores, axis=1)
56
+ train_scores_std = np.std(train_scores, axis=1)
57
+ test_scores_mean = np.mean(test_scores, axis=1)
58
+ test_scores_std = np.std(test_scores, axis=1)
59
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
60
+ test_scores_std)
61
+
62
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
63
+
64
+ container.set_info(info)
65
+ container.set_status("trained")
66
+ container.set_model(best_model)
67
+
68
+ return container
69
+
70
+
71
+ class KNNRegressionParams:
72
+ @classmethod
73
+ def get_params_type(cls):
74
+ return {
75
+ "n_neighbors": StaticValue.INT,
76
+ "weights": StaticValue.STR,
77
+ "p": StaticValue.INT
78
+ }
79
+
80
+ @classmethod
81
+ def get_params(cls):
82
+ return {
83
+ "n_neighbors": [3, 5, 7, 9],
84
+ "weights": ['uniform', 'distance'],
85
+ "p": [1, 2]
86
+ }
87
+
88
+
89
+ # KNN回归
90
+ def knn_regressor(container, params_list):
91
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
92
+ info = {}
93
+
94
+ params_list = transform_params_list(KNNRegressionParams, params_list)
95
+
96
+ knn_regression_model = KNeighborsRegressor()
97
+ params = params_list
98
+
99
+ if hyper_params_optimize == "grid_search":
100
+ best_model = grid_search(params, knn_regression_model, x_train, y_train)
101
+ elif hyper_params_optimize == "bayes_search":
102
+ best_model = bayes_search(params, knn_regression_model, x_train, y_train)
103
+ else:
104
+ best_model = knn_regression_model
105
+ best_model.fit(x_train, y_train)
106
+
107
+ info["参数"] = best_model.get_params()
108
+
109
+ y_pred = best_model.predict(x_test)
110
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
111
+ container.set_y_pred(y_pred)
112
+
113
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
114
+
115
+ train_scores_mean = np.mean(train_scores, axis=1)
116
+ train_scores_std = np.std(train_scores, axis=1)
117
+ test_scores_mean = np.mean(test_scores, axis=1)
118
+ test_scores_std = np.std(test_scores, axis=1)
119
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
120
+ test_scores_std)
121
+
122
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
123
+
124
+ container.set_info(info)
125
+ container.set_status("trained")
126
+ container.set_model(best_model)
127
+
128
+ return container
analysis/model_train/gradient_model.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.ensemble import GradientBoostingRegressor
3
+ from sklearn.model_selection import learning_curve
4
+
5
+ from functions.process import transform_params_list, get_values_from_container_class
6
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
7
+ from analysis.others.hyperparam_optimize import *
8
+ from classes.static_custom_class import StaticValue
9
+
10
+
11
+ class GradientBoostingParams:
12
+ @classmethod
13
+ def get_params_type(cls):
14
+ return {
15
+ 'n_estimators': StaticValue.INT,
16
+ 'learning_rate': StaticValue.FLOAT,
17
+ 'max_depth': StaticValue.INT,
18
+ 'min_samples_split': StaticValue.INT,
19
+ 'min_samples_leaf': StaticValue.INT,
20
+ 'random_state': StaticValue.INT
21
+ }
22
+
23
+ @classmethod
24
+ def get_params(cls):
25
+ return {
26
+ 'n_estimators': [50, 100, 150],
27
+ 'learning_rate': [0.01, 0.1, 0.2],
28
+ 'max_depth': [3, 5, 7],
29
+ 'min_samples_split': [2, 5, 10],
30
+ 'min_samples_leaf': [1, 2, 4],
31
+ 'random_state': [StaticValue.RANDOM_STATE]
32
+ }
33
+
34
+
35
+ # 梯度提升回归
36
+ def gradient_boosting_regressor(container, params_list):
37
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
38
+ info = {}
39
+
40
+ params_list = transform_params_list(GradientBoostingParams, params_list)
41
+
42
+ gradient_boosting_regression_model = GradientBoostingRegressor(random_state=StaticValue.RANDOM_STATE)
43
+ params = params_list
44
+
45
+ if hyper_params_optimize == "grid_search":
46
+ best_model = grid_search(params, gradient_boosting_regression_model, x_train, y_train)
47
+ elif hyper_params_optimize == "bayes_search":
48
+ best_model = bayes_search(params, gradient_boosting_regression_model, x_train, y_train)
49
+ else:
50
+ best_model = gradient_boosting_regression_model
51
+ best_model.fit(x_train, y_train)
52
+
53
+ info["参数"] = best_model.get_params()
54
+
55
+ y_pred = best_model.predict(x_test)
56
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
57
+ container.set_y_pred(y_pred)
58
+
59
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
60
+
61
+ train_scores_mean = np.mean(train_scores, axis=1)
62
+ train_scores_std = np.std(train_scores, axis=1)
63
+ test_scores_mean = np.mean(test_scores, axis=1)
64
+ test_scores_std = np.std(test_scores, axis=1)
65
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
66
+ test_scores_std)
67
+
68
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
69
+
70
+ container.set_info(info)
71
+ container.set_status("trained")
72
+ container.set_model(best_model)
73
+
74
+ return container
analysis/model_train/kernel_model.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.model_selection import learning_curve
3
+ from sklearn.svm import SVC
4
+ from sklearn.svm import SVR
5
+
6
+ from classes.static_custom_class import StaticValue
7
+ from functions.process import get_values_from_container_class, transform_params_list
8
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
9
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
10
+ from analysis.others.hyperparam_optimize import *
11
+
12
+
13
+ class SVMRegressionParams:
14
+ @classmethod
15
+ def get_params_type(cls):
16
+ return {
17
+ 'kernel': StaticValue.STR,
18
+ 'C': StaticValue.FLOAT,
19
+ 'gamma': StaticValue.FLOAT,
20
+ 'epsilon': StaticValue.FLOAT
21
+ }
22
+
23
+ @classmethod
24
+ def get_params(cls):
25
+ return {
26
+ 'kernel': ['linear', 'rbf'],
27
+ 'C': [0.1, 1, 10, 100],
28
+ 'gamma': [0.01, 0.1, 1, 10],
29
+ 'epsilon': [0.01, 0.1, 1]
30
+ }
31
+
32
+
33
+ # 支持向量机回归
34
+ def svm_regressor(container, params_list):
35
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
36
+ info = {}
37
+
38
+ params_list = transform_params_list(SVMRegressionParams, params_list)
39
+
40
+ svm_regression_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
41
+ params = params_list
42
+
43
+ if hyper_params_optimize == "grid_search":
44
+ best_model = grid_search(params, svm_regression_model, x_train, y_train)
45
+ elif hyper_params_optimize == "bayes_search":
46
+ best_model = bayes_search(params, svm_regression_model, x_train, y_train)
47
+ else:
48
+ best_model = svm_regression_model
49
+ best_model.fit(x_train, y_train)
50
+
51
+ info["参数"] = best_model.get_params()
52
+
53
+ y_pred = best_model.predict(x_test)
54
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
55
+ container.set_y_pred(y_pred)
56
+
57
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
58
+
59
+ train_scores_mean = np.mean(train_scores, axis=1)
60
+ train_scores_std = np.std(train_scores, axis=1)
61
+ test_scores_mean = np.mean(test_scores, axis=1)
62
+ test_scores_std = np.std(test_scores, axis=1)
63
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
64
+ test_scores_std)
65
+
66
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
67
+
68
+ container.set_info(info)
69
+ container.set_status("trained")
70
+ container.set_model(best_model)
71
+
72
+ return container
73
+
74
+
75
+ class SVMClassifierParams:
76
+ @classmethod
77
+ def get_params_type(cls):
78
+ return {
79
+ "C": StaticValue.FLOAT,
80
+ "kernel": StaticValue.STR,
81
+ "gamma": StaticValue.FLOAT,
82
+ 'random_state': StaticValue.INT
83
+ }
84
+
85
+ @classmethod
86
+ def get_params(cls):
87
+ return {
88
+ "C": [0.1, 1, 10, 100],
89
+ "kernel": ['linear', 'rbf', 'poly'],
90
+ "gamma": [0.1, 1, 10],
91
+ 'random_state': [StaticValue.RANDOM_STATE]
92
+ }
93
+
94
+
95
+ # 支持向量机分类
96
+ def svm_classifier(container, params_list):
97
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
98
+ info = {}
99
+
100
+ params_list = transform_params_list(SVMClassifierParams, params_list)
101
+
102
+ svm_classifier_model = SVC(kernel="rbf", random_state=StaticValue.RANDOM_STATE)
103
+ params = params_list
104
+
105
+ if hyper_params_optimize == "grid_search":
106
+ best_model = grid_search(params, svm_classifier_model, x_train, y_train)
107
+ elif hyper_params_optimize == "bayes_search":
108
+ best_model = bayes_search(params, svm_classifier_model, x_train, y_train)
109
+ else:
110
+ best_model = svm_classifier_model
111
+ best_model.fit(x_train, y_train)
112
+
113
+ info["参数"] = best_model.get_params()
114
+
115
+ y_pred = best_model.predict(x_test)
116
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
117
+ container.set_y_pred(y_pred)
118
+
119
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
120
+
121
+ train_scores_mean = np.mean(train_scores, axis=1)
122
+ train_scores_std = np.std(train_scores, axis=1)
123
+ test_scores_mean = np.mean(test_scores, axis=1)
124
+ test_scores_std = np.std(test_scores, axis=1)
125
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
126
+ test_scores_std)
127
+
128
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
129
+
130
+ container.set_info(info)
131
+ container.set_status("trained")
132
+ container.set_model(best_model)
133
+
134
+ return container
analysis/model_train/linear_model.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ from sklearn.linear_model import ElasticNet
4
+ from sklearn.linear_model import Lasso
5
+ from sklearn.linear_model import LinearRegression
6
+ from sklearn.linear_model import LogisticRegression
7
+ from sklearn.linear_model import Ridge
8
+ from sklearn.model_selection import learning_curve
9
+ from sklearn.pipeline import Pipeline
10
+ from sklearn.preprocessing import PolynomialFeatures
11
+
12
+ from functions.process import get_values_from_container_class, transform_params_list
13
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
14
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
15
+ from analysis.others.hyperparam_optimize import *
16
+ from classes.static_custom_class import StaticValue
17
+
18
+
19
+ class LinearRegressionParams:
20
+ @classmethod
21
+ def get_params_type(cls, sort):
22
+ if sort in ["Lasso", "Ridge", "ElasticNet"]:
23
+ return {
24
+ "fit_intercept": StaticValue.BOOL,
25
+ "alpha": StaticValue.FLOAT,
26
+ "random_state": StaticValue.INT
27
+ }
28
+ else:
29
+ return {
30
+ "fit_intercept": StaticValue.BOOL
31
+ }
32
+
33
+ @classmethod
34
+ def get_params(cls, sort):
35
+ if sort in ["Lasso", "Ridge", "ElasticNet"]:
36
+ return {
37
+ "fit_intercept": [True, False],
38
+ "alpha": [0.001, 0.01, 0.1, 1.0, 10.0],
39
+ "random_state": [StaticValue.RANDOM_STATE]
40
+ }
41
+ else:
42
+ return {
43
+ "fit_intercept": [True, False]
44
+ }
45
+
46
+
47
+ # 线性回归
48
+ def linear_regressor(container, params_list, model=None):
49
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
50
+ info = {}
51
+
52
+ input_params = transform_params_list(LinearRegressionParams, params_list, model)
53
+
54
+ if model == "Lasso":
55
+ linear_regression_model = Lasso(alpha=0.1, random_state=StaticValue.RANDOM_STATE)
56
+ params = input_params
57
+ elif model == "Ridge":
58
+ linear_regression_model = Ridge(alpha=0.1, random_state=StaticValue.RANDOM_STATE)
59
+ params = input_params
60
+ elif model == "ElasticNet":
61
+ linear_regression_model = ElasticNet(alpha=0.1, random_state=StaticValue.RANDOM_STATE)
62
+ params = input_params
63
+ elif model == "LinearRegression":
64
+ linear_regression_model = LinearRegression()
65
+ params = input_params
66
+ else:
67
+ linear_regression_model = LinearRegression()
68
+ params = input_params
69
+
70
+ try:
71
+ if hyper_params_optimize == "grid_search":
72
+ best_model = grid_search(params, linear_regression_model, x_train, y_train)
73
+ elif hyper_params_optimize == "bayes_search":
74
+ best_model = bayes_search(params, linear_regression_model, x_train, y_train)
75
+ else:
76
+ best_model = linear_regression_model
77
+ best_model.fit(x_train, y_train)
78
+ except Exception:
79
+ gr.Warning("超参数设置有误,将按照默认模型训练")
80
+ best_model = LinearRegression()
81
+ best_model.fit(x_train, y_train)
82
+
83
+ info["参数"] = best_model.get_params()
84
+
85
+ # lr_intercept = best_model.intercept_
86
+ # info["Intercept of linear regression equation"] = lr_intercept
87
+ #
88
+ # lr_coef = best_model.coef_
89
+ # info["Coefficients of linear regression equation"] = lr_coef
90
+
91
+ y_pred = best_model.predict(x_test)
92
+ container.set_y_pred(y_pred)
93
+
94
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
95
+
96
+ train_scores_mean = np.mean(train_scores, axis=1)
97
+ train_scores_std = np.std(train_scores, axis=1)
98
+ test_scores_mean = np.mean(test_scores, axis=1)
99
+ test_scores_std = np.std(test_scores, axis=1)
100
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
101
+ test_scores_std)
102
+
103
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
104
+
105
+ container.set_info(info)
106
+ container.set_status("trained")
107
+ container.set_model(best_model)
108
+
109
+ return container
110
+
111
+
112
+ class PolynomialRegressionParams:
113
+ @classmethod
114
+ def get_params_type(cls):
115
+ return {
116
+ "polynomial_features__degree": StaticValue.INT,
117
+ "linear_regression_model__fit_intercept": StaticValue.BOOL
118
+ }
119
+
120
+ @classmethod
121
+ def get_params(cls):
122
+ return {
123
+ "polynomial_features__degree": [2, 3],
124
+ "linear_regression_model__fit_intercept": [True, False]
125
+ }
126
+
127
+
128
+ # 多项式回归
129
+ def polynomial_regressor(container, params_list):
130
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
131
+ info = {}
132
+
133
+ params_list = transform_params_list(PolynomialRegressionParams, params_list)
134
+
135
+ polynomial_features = PolynomialFeatures(degree=2)
136
+ linear_regression_model = LinearRegression()
137
+
138
+ polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
139
+ ("linear_regression_model", linear_regression_model)])
140
+ params = params_list
141
+
142
+ if hyper_params_optimize == "grid_search":
143
+ best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
144
+ elif hyper_params_optimize == "bayes_search":
145
+ best_model = bayes_search(params, polynomial_regression_model, x_train, y_train)
146
+ else:
147
+ best_model = polynomial_regression_model
148
+ best_model.fit(x_train, y_train)
149
+
150
+ info["参数"] = best_model.get_params()
151
+
152
+ # feature_names = best_model["polynomial_features"].get_feature_names_out()
153
+ # info["Feature names of polynomial regression"] = feature_names
154
+ #
155
+ # lr_intercept = best_model["linear_regression_model"].intercept_
156
+ # info["Intercept of polynomial regression equation"] = lr_intercept
157
+ #
158
+ # lr_coef = best_model["linear_regression_model"].coef_
159
+ # info["Coefficients of polynomial regression equation"] = lr_coef
160
+
161
+ x_test_ = best_model["polynomial_features"].fit_transform(x_test)
162
+ y_pred = best_model["linear_regression_model"].predict(x_test_)
163
+ container.set_y_pred(y_pred)
164
+
165
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
166
+
167
+ train_scores_mean = np.mean(train_scores, axis=1)
168
+ train_scores_std = np.std(train_scores, axis=1)
169
+ test_scores_mean = np.mean(test_scores, axis=1)
170
+ test_scores_std = np.std(test_scores, axis=1)
171
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
172
+
173
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
174
+
175
+ container.set_info(info)
176
+ container.set_status("trained")
177
+ container.set_model(best_model)
178
+
179
+ return container
180
+
181
+
182
+ class LogisticRegressionParams:
183
+ @classmethod
184
+ def get_params_type(cls):
185
+ return {
186
+ "C": StaticValue.FLOAT,
187
+ "max_iter": StaticValue.INT,
188
+ "solver": StaticValue.STR,
189
+ "random_state": StaticValue.INT
190
+ }
191
+
192
+ @classmethod
193
+ def get_params(cls):
194
+ return {
195
+ "C": [0.001, 0.01, 0.1, 1.0, 10.0],
196
+ "max_iter": [100, 200, 300],
197
+ "solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"],
198
+ "random_state": [StaticValue.RANDOM_STATE]
199
+ }
200
+
201
+
202
+ # 逻辑斯谛分类
203
+ def logistic_classifier(container, params_list):
204
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
205
+ info = {}
206
+
207
+ params_list = transform_params_list(LogisticRegressionParams, params_list)
208
+
209
+ logistic_regression_model = LogisticRegression(random_state=StaticValue.RANDOM_STATE)
210
+ params = params_list
211
+
212
+ if hyper_params_optimize == "grid_search":
213
+ best_model = grid_search(params, logistic_regression_model, x_train, y_train)
214
+ elif hyper_params_optimize == "bayes_search":
215
+ best_model = bayes_search(params, logistic_regression_model, x_train, y_train)
216
+ else:
217
+ best_model = logistic_regression_model
218
+ best_model.fit(x_train, y_train)
219
+
220
+ info["参数"] = best_model.get_params()
221
+
222
+ # lr_intercept = best_model.intercept_
223
+ # info["Intercept of logistic regression equation"] = lr_intercept.tolist()
224
+ #
225
+ # lr_coef = best_model.coef_
226
+ # info["Coefficients of logistic regression equation"] = lr_coef.tolist()
227
+
228
+ y_pred = best_model.predict(x_test)
229
+ container.set_y_pred(y_pred)
230
+
231
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
232
+
233
+ train_scores_mean = np.mean(train_scores, axis=1)
234
+ train_scores_std = np.std(train_scores, axis=1)
235
+ test_scores_mean = np.mean(test_scores, axis=1)
236
+ test_scores_std = np.std(test_scores, axis=1)
237
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
238
+ test_scores_std)
239
+
240
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
241
+
242
+ container.set_info(info)
243
+ container.set_status("trained")
244
+ container.set_model(best_model)
245
+
246
+ return container
analysis/model_train/tree_model.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lightgbm as lightGBMClassifier
2
+ from sklearn.ensemble import RandomForestClassifier
3
+ from sklearn.ensemble import RandomForestRegressor
4
+ from sklearn.model_selection import learning_curve
5
+ from sklearn.tree import DecisionTreeClassifier
6
+ from xgboost import XGBClassifier
7
+
8
+ from analysis.others.shap_model import *
9
+ from functions.process import get_values_from_container_class, transform_params_list
10
+ from metrics.calculate_classification_metrics import calculate_classification_metrics
11
+ from metrics.calculate_regression_metrics import calculate_regression_metrics
12
+ from analysis.others.hyperparam_optimize import *
13
+ from classes.static_custom_class import StaticValue
14
+
15
+
16
+ class RandomForestRegressionParams:
17
+ @classmethod
18
+ def get_params_type(cls):
19
+ return {
20
+ 'n_estimators': StaticValue.INT,
21
+ 'max_depth': StaticValue.INT,
22
+ 'min_samples_split': StaticValue.INT,
23
+ 'min_samples_leaf': StaticValue.INT,
24
+ 'random_state': StaticValue.INT
25
+ }
26
+
27
+ @classmethod
28
+ def get_params(cls):
29
+ return {
30
+ 'n_estimators': [10, 50, 100, 200],
31
+ 'max_depth': [0, 10, 20, 30],
32
+ 'min_samples_split': [2, 5, 10],
33
+ 'min_samples_leaf': [1, 2, 4],
34
+ 'random_state': [StaticValue.RANDOM_STATE]
35
+ }
36
+
37
+
38
+ # 随机森林回归
39
+ def random_forest_regressor(container, params_list):
40
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
41
+ info = {}
42
+
43
+ params_list = transform_params_list(RandomForestRegressionParams, params_list)
44
+
45
+ random_forest_regression_model = RandomForestRegressor(n_estimators=5, random_state=StaticValue.RANDOM_STATE)
46
+ params = params_list
47
+
48
+ if hyper_params_optimize == "grid_search":
49
+ best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
50
+ elif hyper_params_optimize == "bayes_search":
51
+ best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
52
+ else:
53
+ best_model = random_forest_regression_model
54
+ best_model.fit(x_train, y_train)
55
+
56
+ info["参数"] = best_model.get_params()
57
+
58
+ y_pred = best_model.predict(x_test)
59
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
60
+ container.set_y_pred(y_pred)
61
+
62
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
63
+
64
+ train_scores_mean = np.mean(train_scores, axis=1)
65
+ train_scores_std = np.std(train_scores, axis=1)
66
+ test_scores_mean = np.mean(test_scores, axis=1)
67
+ test_scores_std = np.std(test_scores, axis=1)
68
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
69
+ test_scores_std)
70
+
71
+ info["指标"] = calculate_regression_metrics(y_pred, y_test)
72
+
73
+ container.set_info(info)
74
+ container.set_status("trained")
75
+ container.set_model(best_model)
76
+
77
+ return container
78
+
79
+
80
+ class DecisionTreeClassifierParams:
81
+ @classmethod
82
+ def get_params_type(cls):
83
+ return {
84
+ "criterion": StaticValue.STR,
85
+ "splitter": StaticValue.STR,
86
+ "max_depth": StaticValue.INT,
87
+ "min_samples_split": StaticValue.INT,
88
+ "min_samples_leaf": StaticValue.INT,
89
+ 'random_state': StaticValue.INT
90
+ }
91
+
92
+ @classmethod
93
+ def get_params(cls):
94
+ return {
95
+ "criterion": ["gini", "entropy"],
96
+ "splitter": ["best", "random"],
97
+ "max_depth": [0, 5, 10, 15],
98
+ "min_samples_split": [2, 5, 10],
99
+ "min_samples_leaf": [1, 2, 4],
100
+ 'random_state': [StaticValue.RANDOM_STATE]
101
+ }
102
+
103
+
104
+ # 决策树分类
105
+ def decision_tree_classifier(container, params_list):
106
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
107
+ info = {}
108
+
109
+ params_list = transform_params_list(DecisionTreeClassifierParams, params_list)
110
+
111
+ random_forest_regression_model = DecisionTreeClassifier(random_state=StaticValue.RANDOM_STATE)
112
+ params = params_list
113
+
114
+ if hyper_params_optimize == "grid_search":
115
+ best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
116
+ elif hyper_params_optimize == "bayes_search":
117
+ best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
118
+ else:
119
+ best_model = random_forest_regression_model
120
+ best_model.fit(x_train, y_train)
121
+
122
+ info["参数"] = best_model.get_params()
123
+
124
+ y_pred = best_model.predict(x_test)
125
+ container.set_y_pred(y_pred)
126
+
127
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
128
+
129
+ train_scores_mean = np.mean(train_scores, axis=1)
130
+ train_scores_std = np.std(train_scores, axis=1)
131
+ test_scores_mean = np.mean(test_scores, axis=1)
132
+ test_scores_std = np.std(test_scores, axis=1)
133
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
134
+ test_scores_std)
135
+
136
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
137
+
138
+ container.set_info(info)
139
+ container.set_status("trained")
140
+ container.set_model(best_model)
141
+
142
+ return container
143
+
144
+
145
+ class RandomForestClassifierParams:
146
+ @classmethod
147
+ def get_params_type(cls):
148
+ return {
149
+ "criterion": StaticValue.STR,
150
+ "n_estimators": StaticValue.INT,
151
+ "max_depth": StaticValue.INT,
152
+ "min_samples_split": StaticValue.INT,
153
+ "min_samples_leaf": StaticValue.INT,
154
+ "random_state": StaticValue.INT
155
+ }
156
+
157
+ @classmethod
158
+ def get_params(cls):
159
+ return {
160
+ "criterion": ["gini", "entropy"],
161
+ "n_estimators": [50, 100, 150],
162
+ "max_depth": [0, 5, 10, 15],
163
+ "min_samples_split": [2, 5, 10],
164
+ "min_samples_leaf": [1, 2, 4],
165
+ "random_state": [StaticValue.RANDOM_STATE]
166
+ }
167
+
168
+
169
+ # 随机森林分类
170
+ def random_forest_classifier(container, params_list):
171
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
172
+ info = {}
173
+
174
+ params_list = transform_params_list(RandomForestClassifierParams, params_list)
175
+
176
+ random_forest_classifier_model = RandomForestClassifier(n_estimators=5, random_state=StaticValue.RANDOM_STATE)
177
+ params = params_list
178
+
179
+ if hyper_params_optimize == "grid_search":
180
+ best_model = grid_search(params, random_forest_classifier_model, x_train, y_train)
181
+ elif hyper_params_optimize == "bayes_search":
182
+ best_model = bayes_search(params, random_forest_classifier_model, x_train, y_train)
183
+ else:
184
+ best_model = random_forest_classifier_model
185
+ best_model.fit(x_train, y_train)
186
+
187
+ info["参数"] = best_model.get_params()
188
+
189
+ y_pred = best_model.predict(x_test)
190
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
191
+ container.set_y_pred(y_pred)
192
+
193
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
194
+
195
+ train_scores_mean = np.mean(train_scores, axis=1)
196
+ train_scores_std = np.std(train_scores, axis=1)
197
+ test_scores_mean = np.mean(test_scores, axis=1)
198
+ test_scores_std = np.std(test_scores, axis=1)
199
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
200
+ test_scores_std)
201
+
202
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
203
+
204
+ container.set_info(info)
205
+ container.set_status("trained")
206
+ container.set_model(best_model)
207
+
208
+ return container
209
+
210
+
211
+ class XgboostClassifierParams:
212
+ @classmethod
213
+ def get_params_type(cls):
214
+ return {
215
+ "n_estimators": StaticValue.INT,
216
+ "learning_rate": StaticValue.FLOAT,
217
+ "max_depth": StaticValue.INT,
218
+ "min_child_weight": StaticValue.INT,
219
+ "gamma": StaticValue.FLOAT,
220
+ "subsample": StaticValue.FLOAT,
221
+ "colsample_bytree": StaticValue.FLOAT,
222
+ "random_state": StaticValue.INT
223
+ }
224
+
225
+ @classmethod
226
+ def get_params(cls):
227
+ return {
228
+ "n_estimators": [50, 100, 150],
229
+ "learning_rate": [0.01, 0.1, 0.2],
230
+ "max_depth": [3, 4, 5],
231
+ "min_child_weight": [1, 2, 3],
232
+ "gamma": [0, 0.1, 0.2],
233
+ "subsample": [0.5, 0.8, 0.9, 1.0],
234
+ "colsample_bytree": [0.8, 0.9, 1.0],
235
+ "random_state": [StaticValue.RANDOM_STATE]
236
+ }
237
+
238
+
239
+ # xgboost分类
240
+ def xgboost_classifier(container, params_list):
241
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
242
+ info = {}
243
+
244
+ params_list = transform_params_list(XgboostClassifierParams, params_list)
245
+
246
+ xgboost_classifier_model = XGBClassifier(random_state=StaticValue.RANDOM_STATE)
247
+ params = params_list
248
+
249
+ if hyper_params_optimize == "grid_search":
250
+ best_model = grid_search(params, xgboost_classifier_model, x_train, y_train)
251
+ elif hyper_params_optimize == "bayes_search":
252
+ best_model = bayes_search(params, xgboost_classifier_model, x_train, y_train)
253
+ else:
254
+ best_model = xgboost_classifier_model
255
+ best_model.fit(x_train, y_train)
256
+
257
+ info["参数"] = best_model.get_params()
258
+
259
+ y_pred = best_model.predict(x_test)
260
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
261
+ container.set_y_pred(y_pred)
262
+
263
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
264
+
265
+ train_scores_mean = np.mean(train_scores, axis=1)
266
+ train_scores_std = np.std(train_scores, axis=1)
267
+ test_scores_mean = np.mean(test_scores, axis=1)
268
+ test_scores_std = np.std(test_scores, axis=1)
269
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
270
+ test_scores_std)
271
+
272
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
273
+
274
+ container.set_info(info)
275
+ container.set_status("trained")
276
+ container.set_model(best_model)
277
+
278
+ return container
279
+
280
+
281
+ class LightGBMClassifierParams:
282
+ @classmethod
283
+ def get_params(cls):
284
+ return
285
+
286
+
287
+ # lightGBM分类
288
+ def lightGBM_classifier(container, params_list):
289
+ x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
290
+ info = {}
291
+
292
+ params_list = transform_params_list(LightGBMClassifierParams, params_list)
293
+
294
+ lightgbm_classifier_model = lightGBMClassifier
295
+ params = params_list
296
+
297
+ if hyper_params_optimize == "grid_search":
298
+ best_model = grid_search(params, lightgbm_classifier_model, x_train, y_train)
299
+ elif hyper_params_optimize == "bayes_search":
300
+ best_model = bayes_search(params, lightgbm_classifier_model, x_train, y_train)
301
+ else:
302
+ best_model = lightgbm_classifier_model
303
+ best_model.train(x_train, y_train)
304
+
305
+ info["参数"] = best_model.get_params()
306
+
307
+ y_pred = best_model.predict(x_test)
308
+ # y_pred = best_model.predict(x_test).reshape(-1, 1)
309
+ container.set_y_pred(y_pred)
310
+
311
+ train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
312
+
313
+ train_scores_mean = np.mean(train_scores, axis=1)
314
+ train_scores_std = np.std(train_scores, axis=1)
315
+ test_scores_mean = np.mean(test_scores, axis=1)
316
+ test_scores_std = np.std(test_scores, axis=1)
317
+ container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
318
+ test_scores_std)
319
+
320
+ info["指标"] = calculate_classification_metrics(y_pred, y_test)
321
+
322
+ container.set_info(info)
323
+ container.set_status("trained")
324
+ container.set_model(best_model)
325
+
326
+ return container
327
+
328
+
329
+
analysis/others/hyperparam_optimize.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.model_selection import GridSearchCV
2
+ from skopt import BayesSearchCV
3
+
4
+
5
+ def grid_search(params, model, x_train, y_train, scoring=None):
6
+ info = {}
7
+
8
+ grid_search_model = GridSearchCV(model, params, cv=3, n_jobs=-1)
9
+
10
+ grid_search_model.fit(x_train, y_train.ravel())
11
+
12
+ info["Optimal hyperparameters"] = grid_search_model.best_params_
13
+
14
+ best_model = grid_search_model.best_estimator_
15
+
16
+ return best_model
17
+
18
+
19
+ def bayes_search(params, model, x_train, y_train, scoring=None):
20
+ info = {}
21
+
22
+ bayes_search_model = BayesSearchCV(model, params, cv=3, n_iter=50, n_jobs=-1)
23
+
24
+ bayes_search_model.fit(x_train, y_train)
25
+
26
+ info["Optimal hyperparameters"] = bayes_search_model.best_params_
27
+
28
+ best_model = bayes_search_model.best_estimator_
29
+
30
+ return best_model
analysis/others/shap_model.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import numpy as np
3
+ import shap
4
+
5
+
6
+ def draw_shap_beeswarm(model, x, feature_names, type, paint_object):
7
+ explainer = shap.KernelExplainer(model.predict, x)
8
+ shap_values = explainer(x)
9
+
10
+ shap.summary_plot(shap_values, x, feature_names=feature_names, plot_type=type, show=False)
11
+
12
+ plt.title(paint_object.get_name())
13
+ plt.tight_layout()
14
+
15
+ return plt, paint_object
16
+
17
+
18
+ def draw_waterfall(model, x, feature_names, number, paint_object):
19
+ explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
20
+ shap_values = explainer(x)
21
+
22
+ shap.waterfall_plot(shap_values[number], show=False)
23
+
24
+ plt.title(paint_object.get_name())
25
+ plt.tight_layout()
26
+
27
+ return plt, paint_object
28
+
29
+
30
+ def draw_force(model, x, feature_names, number, paint_object):
31
+ explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
32
+ shap_values = explainer(x[number])
33
+
34
+ shap.force_plot(explainer.expected_value, shap_values.values, feature_names=feature_names, show=False, matplotlib=True)
35
+
36
+ plt.title(paint_object.get_name())
37
+ plt.tight_layout()
38
+
39
+ return plt, paint_object
40
+
41
+
42
+ def draw_dependence(model, x, feature_names, col, paint_object):
43
+ explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
44
+ shap_values = explainer(x)
45
+
46
+ shap.dependence_plot(feature_names.index(col), shap_values.values, x, feature_names=feature_names, show=False)
47
+
48
+ plt.title(paint_object.get_name())
49
+ plt.tight_layout()
50
+
51
+ return plt, paint_object
52
+
53
+
54
+
55
+
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
classes/__init__.py ADDED
File without changes
classes/static_custom_class.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 全局静态变量值存储类
2
+ class StaticValue:
3
+ # 超参数文本框的最大组件数量
4
+ MAX_PARAMS_NUM = 60
5
+ # 颜色和标签显示的最大组件数量
6
+ MAX_NUM = 20
7
+ # 随机种子 (数据集切分+模型训练)
8
+ RANDOM_STATE = 123
9
+
10
+ # 参数类型
11
+ INT = "int"
12
+ FLOAT = "float"
13
+ BOOL = "bool"
14
+ STR = "str"
15
+
16
+ # 画图颜色组重复次数
17
+ COLOR_ITER_NUM = 3
18
+
19
+ # 颜色组
20
+ COLORS = [
21
+ "#ca5353",
22
+ "#c874a5",
23
+ "#b674c8",
24
+ "#8274c8",
25
+ "#748dc8",
26
+ "#74acc8",
27
+ "#74c8b7",
28
+ "#74c88d",
29
+ "#a6c874",
30
+ "#e0e27e",
31
+ "#df9b77",
32
+ "#404040",
33
+ "#999999",
34
+ "#d4d4d4"
35
+ ] * COLOR_ITER_NUM
36
+
37
+ COLORS_0 = [
38
+ "#8074C8",
39
+ "#7895C1",
40
+ "#A8CBDF",
41
+ "#992224",
42
+ "#B54764",
43
+ "#E3625D",
44
+ "#EF8B67",
45
+ "#F0C284"
46
+ ] * COLOR_ITER_NUM
47
+
48
+ COLORS_1 = [
49
+ "#4A5F7E",
50
+ "#719AAC",
51
+ "#72B063",
52
+ "#94C6CD",
53
+ "#B8DBB3",
54
+ "#E29135"
55
+ ] * COLOR_ITER_NUM
56
+
57
+ COLORS_2 = [
58
+ "#4485C7",
59
+ "#D4562E",
60
+ "#DBB428",
61
+ "#682487",
62
+ "#84BA42",
63
+ "#7ABBDB",
64
+ "#A51C36"
65
+ ] * COLOR_ITER_NUM
66
+
67
+ COLORS_3 = [
68
+ "#8074C8",
69
+ "#7895C1",
70
+ "#A8CBDF",
71
+ "#F5EBAE",
72
+ "#F0C284",
73
+ "#EF8B67",
74
+ "#E3625D",
75
+ "#B54764"
76
+ ] * COLOR_ITER_NUM
77
+
78
+ COLORS_4 = [
79
+ "#979998",
80
+ "#C69287",
81
+ "#E79A90",
82
+ "#EFBC91",
83
+ "#E4CD87",
84
+ "#FAE5BB",
85
+ "#DDDDDF"
86
+ ] * COLOR_ITER_NUM
87
+
88
+ COLORS_5 = [
89
+ "#91CCC0",
90
+ "#7FABD1",
91
+ "#F7AC53",
92
+ "#EC6E66",
93
+ "#B5CE4E",
94
+ "#BD7795",
95
+ "#7C7979"
96
+ ] * COLOR_ITER_NUM
97
+
98
+ COLORS_6 = [
99
+ "#E9687A",
100
+ "#F58F7A",
101
+ "#FDE2D8",
102
+ "#CFCFD0",
103
+ "#B6B3D6"
104
+ ] * COLOR_ITER_NUM
105
+
106
+
107
+ # 文件路径相关静态变量存储类
108
+ class FilePath:
109
+ png_base = "./buffer/{}.png"
110
+ excel_base = "./buffer/{}.xlsx"
111
+
112
+ # [绘图]
113
+ display_dataset = "current_excel_data"
114
+
115
+ data_distribution_plot = "data_distribution_plot"
116
+ descriptive_indicators_plot = "descriptive_indicators_plot"
117
+ heatmap_plot = "heatmap_plot"
118
+ learning_curve_plot = "learning_curve_plot"
119
+ shap_beeswarm_plot = "shap_beeswarm_plot"
120
+ data_fit_plot = "data_fit_plot"
121
+ waterfall_plot = "waterfall_plot"
122
+ force_plot = "force_plot"
123
+ dependence_plot = "dependence_plot"
124
+ # 绘图Step 15:在这里添加新的绘图方法名称
125
+
126
+
127
+ # 模型名称静态变量存储类
128
+ class MN: # ModelName
129
+ classification = "classification"
130
+ regression = "regression"
131
+
132
+ # [模型]
133
+ linear_regressor = "linear regressor"
134
+ polynomial_regressor = "polynomial regressor"
135
+ logistic_classifier = "logistic classifier"
136
+ decision_tree_classifier = "decision tree classifier"
137
+ random_forest_classifier = "random forest classifier"
138
+ random_forest_regressor = "random forest regressor"
139
+ xgboost_classifier = "xgboost classifier"
140
+ lightGBM_classifier = "lightGBM classifier"
141
+ gradient_boosting_regressor = "gradient boosting regressor"
142
+ svm_classifier = "svm classifier"
143
+ svm_regressor = "svm regressor"
144
+ knn_classifier = "knn classifier"
145
+ knn_regressor = "knn regressor"
146
+ naive_bayes_classifier = "naive bayes classifier"
147
+ # 模型Step 4:在这里添加新的模型名称
148
+
149
+ # [绘图]
150
+ data_distribution = "data_distribution"
151
+ descriptive_indicators = "descriptive_indicators"
152
+ heatmap = "heatmap"
153
+ learning_curve = "learning_curve"
154
+ shap_beeswarm = "shap_beeswarm"
155
+ data_fit = "data_fit"
156
+ waterfall = "waterfall"
157
+ force = "force"
158
+ dependence = "dependence"
159
+ # 绘图Step 4:在这里添加新的绘图方法名称
160
+
161
+
162
+ # 组件标签名称静态变量存储类
163
+ class LN: # LabelName
164
+ choose_dataset_radio = "选择所需数据源 [必选]"
165
+ display_total_col_num_text = "总列数"
166
+ display_total_row_num_text = "总行数"
167
+ display_na_list_text = "存在缺失值的列"
168
+ del_all_na_col_button = "删除所有存在缺失值的列 [可选]"
169
+ display_duplicate_num_text = "重复的行���"
170
+ del_col_checkboxgroup = "选择所需删除的列"
171
+ del_col_button = "删除 [可选]"
172
+ remain_row_slider = "保留的行数"
173
+ remain_row_button = "保留 [可选]"
174
+ del_duplicate_button = "删除所有重复行 [可选]"
175
+ encode_label_checkboxgroup = "选择所需标签编码的字符型数值列"
176
+ display_encode_label_dataframe = "标签编码信息"
177
+ encode_label_button = "字符型转数值型 [可选]"
178
+ change_data_type_to_float_button = "将所有数据强制转换为浮点型(除第1列以外)[必选]"
179
+ standardize_data_checkboxgroup = "选择所需标准化的列"
180
+ standardize_data_button = "标准化 [可选]"
181
+ select_as_y_radio = "选择因变量 [必选]"
182
+ choose_assign_radio = "选择任务类型(同时会根据任务类型将第1列数据强制转换)[必选]"
183
+ train_size_textbox = "分割出的训练集所占比例"
184
+ model_optimize_radio = "选择超参数优化方法"
185
+ model_train_input_params_dataframe = "超参数列表"
186
+ model_train_button = "训练"
187
+ model_train_params_dataframe = "训练后的模型参数"
188
+ model_train_metrics_dataframe = "训练后的模型指标"
189
+ select_as_model_radio = "选择所需训练的模型"
190
+
191
+ # [模型]
192
+ linear_regression_model_radio = "选择线性回归的模型"
193
+ naive_bayes_classification_model_radio = "选择朴素贝叶斯分类的模型"
194
+ # 模型Step 5:在这里添加新的模型额外组件名称
195
+
196
+ title_name_textbox = "标题"
197
+ x_label_textbox = "x 轴名称"
198
+ y_label_textbox = "y 轴名称"
199
+ colors = ["颜色 {}".format(i) for i in range(StaticValue.MAX_NUM)]
200
+ labels = ["图例 {}".format(i) for i in range(StaticValue.MAX_NUM)]
201
+
202
+ # [绘图]
203
+ heatmap_is_rotate = "x轴标签是否旋转"
204
+ heatmap_checkboxgroup = "选择所需绘制系数热力图的列"
205
+ heatmap_button = "绘制系数热力图"
206
+ data_distribution_radio = "选择所需绘制数据分布图的列"
207
+ data_distribution_is_rotate = "x轴标签是否旋转"
208
+ data_distribution_button = "绘制数据分布图"
209
+ descriptive_indicators_checkboxgroup = "选择所需绘制箱线统计图的列"
210
+ descriptive_indicators_is_rotate = "x轴标签是否旋转"
211
+ descriptive_indicators_button = "绘制箱线统计图"
212
+ learning_curve_checkboxgroup = "选择所需绘制学习曲线图的模型"
213
+ learning_curve_button = "绘制学习曲线图"
214
+ shap_beeswarm_radio = "选择所需绘制特征蜂群图的模型"
215
+ shap_beeswarm_type = "选择图像类型"
216
+ shap_beeswarm_button = "绘制特征蜂群图"
217
+ data_fit_checkboxgroup = "选择所需绘制数据拟合图的模型"
218
+ data_fit_button = "绘制数据拟合图"
219
+ waterfall_radio = "选择所需绘制特征瀑布图的模型"
220
+ waterfall_number = "输入相关特征的变量索引"
221
+ waterfall_button = "绘制特征瀑布图"
222
+ force_radio = "选择所需绘制特征力图的模型"
223
+ force_number = "输入相关特征的变量索引"
224
+ force_button = "绘制特征力图"
225
+ dependence_radio = "选择所需绘制特征依赖图的模型"
226
+ dependence_col = "选择相应的列"
227
+ dependence_button = "绘制特征依赖图"
228
+ # 绘图Step 5:在这里添加新的绘图方法相关组件名称
229
+
230
+ data_distribution_plot = "数据分布图"
231
+ descriptive_indicators_plot = "箱线统计图"
232
+ heatmap_plot = "系数热力图"
233
+ learning_curve_plot = "学习曲线图"
234
+ shap_beeswarm_plot = "特征蜂群图"
235
+ data_fit_plot = "数据拟合图"
236
+ waterfall_plot = "特征瀑布图"
237
+ force_plot = "特征力图"
238
+ dependence_plot = "特征依赖图"
239
+ # 绘图Step 6:在这里添加新的绘图方法名称
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+
data/notes.md CHANGED
@@ -1,12 +1,198 @@
1
  # EasyMachineLearning
2
- ### 介绍
 
3
  - 版本:v1.0
4
  - 作者:李凌浩
5
- - 有任何新功能的想法和已出现的问题请和作者联系 ~
 
 
6
  - *( WX: llh13857750421 )*
7
- ### 尚未实现的功能
8
- 1. [困难] 模型训练的进度条可视化(sklearn模型训练函数无回调函数)
9
- 2. 模型训练完毕后保存模型文件,后续可直接加载
10
- 3. 数据分析AI助手(直接处理Excel数据)
11
- 4. PCA主成分分析
12
- 5. 聚类
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # EasyMachineLearning
2
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3
+ ## 介绍
4
  - 版本:v1.0
5
  - 作者:李凌浩
6
+ - 有任何出现的问题请私信 或 在Github上反馈 ~
7
+ - 有任何新功能的想法请加作者微信 ~
8
+ - 合作请联系作者 ~
9
  - *( WX: llh13857750421 )*
10
+ - ଘ(੭ˊ꒳​ˋ)੭
11
+
12
+ ## 注意事项
13
+ - 模型训练和可视化过程暂未实现进度条,后续版本可能会出该功能
14
+
15
+ ## 解释
16
+ ### 1.数据源
17
+ ##### *i.选择所需数据源 [必选]*
18
+ ```angular2html
19
+ ·自定义:上传自己的Excel表格作为数据源
20
+ ·Iris Dataset: 鸢尾花数据集 (用于分类任务) [sklearn内置数据集]
21
+ ·Wine Dataset: 酒的数据集 (用于分类任务) [sklearn内置数据集]
22
+ ·Breast Cancer Dataset: 乳腺癌数据集 (用于分类任务) [sklearn内置数据集]
23
+ ·Diabetes Dataset: 糖尿病数据集 (用于回归任务) [sklearn内置数据集]
24
+ ·California Housing Dataset: 加利福尼亚房价数据集 (用于回归任务) [程序内置数据集]
25
+
26
+ ·为机器学习任务的起始操作
27
+ ·更改数据源会自动清理所有已训练的模型缓存和其他部分缓存
28
+ ```
29
+ ### 2.当前数据信息
30
+ ```angular2html
31
+ ·总列数: 当前操作的数据的总列数
32
+ ·总行数: 当前操作的数据的总行数
33
+ ·保留的行数 [可选]: 拖动滑动条可以选择 当前操作的数据中所需保留的行数,其余行全部删除
34
+ ·存在缺失值的列: 显示当前操作的数据中有缺失值如Nan等的列名
35
+ ·删除所有存在缺失值的列 [可选]: 删除当前操作的数据中含有缺失值如Nan等的列
36
+ ·重复的行数: 当前操作的数据中一样的行数
37
+ ·删除所有重复行 [可选]: 删除当前操作的数据中一样的行
38
+
39
+ ·实时显示当前操作的数据 (只能查看,不能更改)
40
+ ·[*index]列为当前数据索引,非数据内的列 (自动添加该列是为了方便查看数据)
41
+ ·最左边第一列始终为因变量的列,其余为自变量的列
42
+ ·可随时将当前操作的数据下载到本地 (Excel格式)
43
+
44
+ ```
45
+ ### 3.数据处理
46
+ ##### *i.选择因变量 [必选]*
47
+ ```angular2html
48
+ ·当前操作数据中的所有列名
49
+
50
+ ·在显示的所有行名字中选择作为任务的因变量的列 (选中后自动将该列移动到当前数据的第一列)
51
+ ```
52
+ ##### *ii.将所有数据强制转换为浮点型 (除第1列以外) [必选]*
53
+ ```angular2html
54
+ ·"列名-数据类型"数据表: 当前操作中的数据的所有列对应的数据类型 (字符型为"object")
55
+
56
+ ·第1列因变量的列在这里不会成为被转换的对象
57
+ ```
58
+ ##### *iii.选择任务类型 (同时会根据任务类型将第1列数据强制转换) [必选]*
59
+ ```angular2html
60
+ ·分类
61
+ ·回归
62
+
63
+ ·选择分类会将第1列的数据强制转换为字符型数据
64
+ ·选择回归会将第1列的数据强制转换为浮点型数据
65
+ ```
66
+ ##### *iv.选择所需删除的列*
67
+ ```angular2html
68
+ ·当前操作数据中的所有列名
69
+
70
+ ·删除 [可选]: 在当前操作的数据中删除所选列
71
+ ```
72
+ ##### *v.选择所需标签编码的字符型数值列*
73
+ ```angular2html
74
+ ·当前操作数据中是字符型数据的所有列名
75
+
76
+ ·字符型转数值型 [可选]: 将选中的数据列强制转换为浮点型
77
+ ```
78
+ ##### *vi.选择所需标准化的列*
79
+ ```angular2html
80
+ ·当前操作数据中尚未标准化的所有列名
81
+
82
+ ·标准化 [可选]: 将选中的数据列根据各自列进行标准化 (变成0~1范围)
83
+ ```
84
+ ### 4.数据模型 (上述[必选]全部选择完毕后才会显示!)
85
+ ##### *i.选择所需训练的模型*
86
+ ```angular2html
87
+ ·当前可选择的所有模型名
88
+
89
+ ·选中后会展示当前选中模型的相关信息和选项
90
+ ```
91
+ ##### *ii.分割出的训练集所占比例*
92
+ ```angular2html
93
+ ·该比例为训练集占所有数据的比例
94
+ ·默认为0.8
95
+ ```
96
+ ##### *iii.选择超参数优化方法*
97
+ ```angular2html
98
+ ·无: 直接训练 (速度快) (如果其他超参数优化方法速度较慢可选择当前项看效果)
99
+ ·网格搜索: (速度相对适中) (请作为模型调参的首选项)
100
+ ·贝叶斯优化: (速度很慢)
101
+
102
+ ·请优先选择网格搜索
103
+ ·树模型如决策树、随机森林、XGBoost模型的超参数优化速度在数据量大的情况下非常慢,自行考虑是否需要等待
104
+ ·参数量越大速度越慢
105
+ ```
106
+ ##### *iv.超参数列表 (选择超参数优化方法后会显示)*
107
+ ```angular2html
108
+ ·橙色边框的文本框为超参数名称 (可修改)
109
+ ·每行无颜色边框的文本框为超参数对应的各个候选参数 (可修改)
110
+
111
+ ·将文本框中的参数删除后(空字符串),就相当于删除了该参数
112
+ ·只能修改和减少参数,不能增加
113
+ ·每次加载默认显示每个模型的默认参数字典
114
+ ```
115
+ ##### *v.模型是否完成训练*
116
+ ```angular2html
117
+ ·若选中的该模型训练完成后,会勾选
118
+ ```
119
+ ### 5.数据可视化
120
+ ##### *i.数据分布图*
121
+ ```angular2html
122
+ ·需要选择可视化的相应列
123
+ ·每列数据的数量统计的柱状图
124
+ ```
125
+ ##### *ii.箱线统计图*
126
+ ```angular2html
127
+ ·需要选择可视化的相应列
128
+ ·每列数据的常见统计量的箱线图
129
+ ```
130
+ ##### *iii.系数热力图*
131
+ ```angular2html
132
+ ·需要选择可视化的相应列
133
+ ·列与列之间的皮尔逊相关系数
134
+ ```
135
+ ##### *iv.学习曲线图*
136
+ ```angular2html
137
+ ·需要选择已训练的模型名称
138
+ ·训练集与验证集数据的拟合效果 (1个模型有2条曲线)
139
+ ```
140
+ ##### *v.数据拟合图*
141
+ ```angular2html
142
+ ·需要选择已训练的模型名称
143
+ ·测试集的真实数值曲线与模型预测数值曲线
144
+ ```
145
+ ##### *vi.特征蜂群图*
146
+ ```angular2html
147
+ ·需要选择已训练的模型名称
148
+ ·需要选择图像类型
149
+ ·特征对模型整体能力的表征程度
150
+ ```
151
+ ##### *vii.特征瀑布图*
152
+ ```angular2html
153
+ ·需要选择已训练的模型名称
154
+ ·需要选择相关特征的变量索引
155
+ ·特征对模型整体能力的表征程度
156
+ ```
157
+ ##### *viii.特征力图*
158
+ ```angular2html
159
+ ·需要选择已训练的模型名称
160
+ ·需要选择相关特征的变量索引
161
+ ·特征对模型整体能力的表征程度
162
+ ```
163
+ ##### *viiii.特征依赖图*
164
+ ```angular2html
165
+ ·需要选择已训练的模型名称
166
+ ·需要选择对应的列
167
+ ·特征对模型整体能力的表征程度
168
+ ```
169
+ ##### *图例*
170
+ ```angular2html
171
+ ·图中每个图例的名称
172
+
173
+ ·图中有图例才会显示 (不支持中文)
174
+ ```
175
+ ##### *坐标轴*
176
+ ```angular2html
177
+ ·标题
178
+ ·x轴名称
179
+ ·y轴名称
180
+
181
+ ·不支持中文
182
+ ```
183
+ ##### *颜色*
184
+ ```angular2html
185
+ ·图中每个颜色对应的取色器和十六进制
186
+
187
+ ·图中有可更改的颜色才会显示
188
+ ```
189
+ ##### *图*
190
+ ```angular2html
191
+ ·可随时将当前操作的图片下载到本地 (png格式)
192
+ ```
193
+
194
+
195
+
196
+
197
+
198
+
design/__init__.py ADDED
File without changes
design/custom.css ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .params_name textarea {
2
+ font-weight: bold;
3
+ font-style: oblique;
4
+ border: solid #ee9900;
5
+ }
design/welcome.js ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ function createGradioAnimation() {
2
+ var container = document.createElement('div');
3
+ container.id = 'gradio-animation';
4
+ container.style.fontSize = '2em';
5
+ container.style.fontWeight = 'bold';
6
+ container.style.textAlign = 'center';
7
+ container.style.marginBottom = '20px';
8
+
9
+ var text = 'Welcome to EasyMachineLearning!';
10
+ for (var i = 0; i < text.length; i++) {
11
+ (function(i){
12
+ setTimeout(function(){
13
+ var letter = document.createElement('span');
14
+ letter.style.opacity = '0';
15
+ letter.style.transition = 'opacity 0.5s';
16
+ letter.innerText = text[i];
17
+
18
+ container.appendChild(letter);
19
+
20
+ setTimeout(function() {
21
+ letter.style.opacity = '1';
22
+ }, 50);
23
+ }, i * 250);
24
+ })(i);
25
+ }
26
+
27
+ var gradioContainer = document.querySelector('.gradio-container');
28
+ gradioContainer.insertBefore(container, gradioContainer.firstChild);
29
+
30
+ return 'Animation created';
31
+ }
functions/__init__.py ADDED
File without changes
functions/process.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def get_values_from_container_class(container):
2
+ return container.x_train, container.y_train, container.x_test, container.y_test, container.hyper_params_optimize
3
+
4
+
5
+ def transform_params_list(params_class, params_list, model=None):
6
+ input_params_keys = []
7
+ input_params_values = []
8
+ inner_value_list = []
9
+
10
+ keys = params_class.get_params(model).keys() if model else params_class.get_params().keys()
11
+ for i, param in enumerate(params_list):
12
+ if param in keys:
13
+ input_params_keys.append(param)
14
+ if i != 0:
15
+ input_params_values.append(inner_value_list)
16
+ inner_value_list = []
17
+ else:
18
+ inner_value_list.append(param)
19
+ else:
20
+ input_params_values.append(inner_value_list)
21
+ input_params = dict(zip(input_params_keys, input_params_values))
22
+
23
+ for k, v in input_params.items():
24
+ if k in keys:
25
+ value_type = params_class.get_params_type(model)[k] if model else params_class.get_params_type()[k]
26
+ try:
27
+ if value_type == "int":
28
+ input_params[k] = [int(x) for x in input_params[k]]
29
+ elif value_type == "float":
30
+ input_params[k] = [float(x) for x in input_params[k]]
31
+ elif value_type == "bool":
32
+ input_params[k] = [x == "True" for x in input_params[k]]
33
+ elif value_type == "str":
34
+ input_params[k] = [str(x) for x in input_params[k]]
35
+ except Exception:
36
+ input_params[k] = [str(x) for x in input_params[k]]
37
+
38
+ return input_params
visualization/draw_boxplot.py CHANGED
@@ -1,7 +1,4 @@
1
  import matplotlib.pyplot as plt
2
- import numpy as np
3
-
4
- from static.config import Config
5
 
6
 
7
  def draw_boxplot(x_data, paint_object, will_rotate=False):
 
1
  import matplotlib.pyplot as plt
 
 
 
2
 
3
 
4
  def draw_boxplot(x_data, paint_object, will_rotate=False):
visualization/draw_data_fit_total.py CHANGED
@@ -1,11 +1,8 @@
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
- from static.new_class import PaintObject
5
- from static.config import Config
6
 
7
-
8
- def draw_data_fit_total(input_dict, paint_object: PaintObject):
9
  plt.figure(figsize=(10, 6), dpi=300)
10
 
11
  for i, input_dict_items in enumerate(input_dict.items()):
 
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
 
 
4
 
5
+ def draw_data_fit_total(input_dict, paint_object):
 
6
  plt.figure(figsize=(10, 6), dpi=300)
7
 
8
  for i, input_dict_items in enumerate(input_dict.items()):
visualization/draw_heat_map.py CHANGED
@@ -1,9 +1,7 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
 
3
  import pandas as pd
4
 
5
- from static.config import Config
6
-
7
 
8
  def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
9
  plt.rcParams.update({'figure.autolayout': True})
@@ -28,8 +26,8 @@ def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
28
 
29
  plt.yticks(np.arange(len(col_list)), col_list)
30
  plt.imshow(np_data)
31
- plt.colorbar(True)
32
- plt.tight_layout()
33
 
34
  plt.title(paint_object.get_name())
35
 
 
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
3
  import pandas as pd
4
 
 
 
5
 
6
  def draw_heat_map(x_data, col_list, paint_object, will_rotate=False):
7
  plt.rcParams.update({'figure.autolayout': True})
 
26
 
27
  plt.yticks(np.arange(len(col_list)), col_list)
28
  plt.imshow(np_data)
29
+ plt.colorbar()
30
+ # plt.tight_layout()
31
 
32
  plt.title(paint_object.get_name())
33
 
visualization/draw_histogram.py CHANGED
@@ -1,10 +1,5 @@
1
- import random
2
-
3
- import numpy as np
4
  import matplotlib.pyplot as plt
5
-
6
- from static.config import Config
7
- from static.new_class import PaintObject
8
 
9
 
10
  def draw_histogram(nums, labels, paint_object, will_rotate=False, will_show_text=True):
 
 
 
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
 
 
3
 
4
 
5
  def draw_histogram(nums, labels, paint_object, will_rotate=False, will_show_text=True):
visualization/draw_histogram_line_subgraph.py CHANGED
@@ -1,7 +1,7 @@
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
- from static.config import Config
5
 
6
 
7
  def draw_histogram_line_subgraph(total_data_for_plot):
@@ -22,7 +22,7 @@ def draw_histogram_line_subgraph(total_data_for_plot):
22
  data[1],
23
  data[2],
24
  "-o",
25
- color=Config.COLORS[0],
26
  markersize=4
27
  )
28
  ax[str(chr(i+65))].set_title(data[3])
@@ -33,7 +33,7 @@ def draw_histogram_line_subgraph(total_data_for_plot):
33
  data[1],
34
  align="center",
35
  alpha=1,
36
- color=Config.COLORS,
37
  tick_label=data[2]
38
  )
39
 
 
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
+ from classes.static_custom_class import *
5
 
6
 
7
  def draw_histogram_line_subgraph(total_data_for_plot):
 
22
  data[1],
23
  data[2],
24
  "-o",
25
+ color=StaticValue.COLORS[0],
26
  markersize=4
27
  )
28
  ax[str(chr(i+65))].set_title(data[3])
 
33
  data[1],
34
  align="center",
35
  alpha=1,
36
+ color=StaticValue.COLORS,
37
  tick_label=data[2]
38
  )
39
 
visualization/draw_learning_curve.py CHANGED
@@ -1,7 +1,6 @@
1
- import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
- from static.config import Config
5
 
6
 
7
  def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
@@ -12,13 +11,13 @@ def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_s
12
  train_scores_mean - train_scores_std,
13
  train_scores_mean + train_scores_std,
14
  alpha=0.1,
15
- color=Config.COLORS[0]
16
  )
17
  plt.plot(
18
  train_sizes,
19
  train_scores_mean,
20
  "o-",
21
- color=Config.COLORS[0],
22
  label="Training score"
23
  )
24
 
@@ -27,13 +26,13 @@ def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_s
27
  test_scores_mean - test_scores_std,
28
  test_scores_mean + test_scores_std,
29
  alpha=0.1,
30
- color=Config.COLORS[1]
31
  )
32
  plt.plot(
33
  train_sizes,
34
  test_scores_mean,
35
  "o-",
36
- color=Config.COLORS[1],
37
  label="Cross-validation score"
38
  )
39
 
 
 
1
  from matplotlib import pyplot as plt
2
 
3
+ from classes.static_custom_class import *
4
 
5
 
6
  def draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
 
11
  train_scores_mean - train_scores_std,
12
  train_scores_mean + train_scores_std,
13
  alpha=0.1,
14
+ color=StaticValue.COLORS[0]
15
  )
16
  plt.plot(
17
  train_sizes,
18
  train_scores_mean,
19
  "o-",
20
+ color=StaticValue.COLORS[0],
21
  label="Training score"
22
  )
23
 
 
26
  test_scores_mean - test_scores_std,
27
  test_scores_mean + test_scores_std,
28
  alpha=0.1,
29
+ color=StaticValue.COLORS[1]
30
  )
31
  plt.plot(
32
  train_sizes,
33
  test_scores_mean,
34
  "o-",
35
+ color=StaticValue.COLORS[1],
36
  label="Cross-validation score"
37
  )
38
 
visualization/draw_learning_curve_total.py CHANGED
@@ -1,9 +1,7 @@
1
  from matplotlib import pyplot as plt
2
 
3
- from static.new_class import PaintObject
4
 
5
-
6
- def draw_learning_curve_total(input_dict, paint_object: PaintObject):
7
  plt.figure(figsize=(10, 8), dpi=300)
8
 
9
  for i, values in enumerate(input_dict.values()):
 
1
  from matplotlib import pyplot as plt
2
 
 
3
 
4
+ def draw_learning_curve_total(input_dict, paint_object):
 
5
  plt.figure(figsize=(10, 8), dpi=300)
6
 
7
  for i, values in enumerate(input_dict.values()):
visualization/draw_line_graph.py CHANGED
@@ -1,8 +1,5 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
3
 
4
- from static.config import Config
5
-
6
 
7
  def draw_line_graph(nums, labels, paint_object):
8
  plt.figure(figsize=(10, 8), dpi=300)
 
 
1
  import matplotlib.pyplot as plt
2
 
 
 
3
 
4
  def draw_line_graph(nums, labels, paint_object):
5
  plt.figure(figsize=(10, 8), dpi=300)
visualization/draw_momentum.py CHANGED
@@ -1,9 +1,4 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
3
- from sklearn.metrics import *
4
- from sklearn.preprocessing import label_binarize
5
-
6
- from coding.llh.static.config import Config
7
 
8
 
9
  def draw_momentum(df, p1_name, p2_name):
 
 
1
  import matplotlib.pyplot as plt
 
 
 
 
2
 
3
 
4
  def draw_momentum(df, p1_name, p2_name):
visualization/draw_parallel_coordinates.py CHANGED
@@ -1,7 +1,5 @@
1
- import pandas as pd
2
  import matplotlib.pyplot as plt
3
-
4
- from coding.llh.static.config import Config
5
 
6
 
7
  def draw_parallel_coordinates(df):
 
 
1
  import matplotlib.pyplot as plt
2
+ import pandas as pd
 
3
 
4
 
5
  def draw_parallel_coordinates(df):
visualization/draw_play_flow.py CHANGED
@@ -1,9 +1,4 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
3
- from sklearn.metrics import *
4
- from sklearn.preprocessing import label_binarize
5
-
6
- from coding.llh.static.config import Config
7
 
8
 
9
  def draw_play_flow(df, p1_name, p2_name, p1_ace, p2_ace, p1_net_pt_won, p2_net_pt_won, p1_break_pt_won, p2_break_pt_won):
 
 
1
  import matplotlib.pyplot as plt
 
 
 
 
2
 
3
 
4
  def draw_play_flow(df, p1_name, p2_name, p1_ace, p2_ace, p1_net_pt_won, p2_net_pt_won, p1_break_pt_won, p2_break_pt_won):
visualization/draw_pred_total.py CHANGED
@@ -1,8 +1,6 @@
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
4
- from coding.llh.static.config import Config
5
-
6
 
7
  def draw_pred_total(input_dict):
8
  plt.figure(figsize=(10, 6))
 
1
  import numpy as np
2
  from matplotlib import pyplot as plt
3
 
 
 
4
 
5
  def draw_pred_total(input_dict):
6
  plt.figure(figsize=(10, 6))
visualization/draw_roc_auc_curve_total.py CHANGED
@@ -1,9 +1,7 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
3
  from sklearn.metrics import *
4
- from sklearn.preprocessing import label_binarize
5
 
6
- from coding.llh.static.config import Config
7
 
8
 
9
  def draw_roc_auc_curve_total(input_dict, type):
@@ -20,7 +18,7 @@ def draw_roc_auc_curve_total(input_dict, type):
20
  fpr,
21
  tpr,
22
  "o-",
23
- color=Config.COLORS[i],
24
  label=label_name+str(round(auc(fpr, tpr), 2))
25
  )
26
 
@@ -40,7 +38,7 @@ def draw_roc_auc_curve_total(input_dict, type):
40
  fpr,
41
  tpr,
42
  "o-",
43
- color=Config.COLORS[i],
44
  label=label_name + str(round(auc(fpr, tpr), 2))
45
  )
46
 
 
 
1
  import matplotlib.pyplot as plt
2
  from sklearn.metrics import *
 
3
 
4
+ from classes.static_custom_class import *
5
 
6
 
7
  def draw_roc_auc_curve_total(input_dict, type):
 
18
  fpr,
19
  tpr,
20
  "o-",
21
+ color=StaticValue.COLORS[i],
22
  label=label_name+str(round(auc(fpr, tpr), 2))
23
  )
24
 
 
38
  fpr,
39
  tpr,
40
  "o-",
41
+ color=StaticValue.COLORS[i],
42
  label=label_name + str(round(auc(fpr, tpr), 2))
43
  )
44
 
visualization/draw_scatter.py CHANGED
@@ -1,9 +1,7 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
 
3
  from mpl_toolkits.mplot3d import Axes3D
4
 
5
- from coding.llh.static.config import Config
6
-
7
 
8
  # Draw scatter
9
  def draw_scatter_2D(x_data, y_data, centers, title):
 
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
3
  from mpl_toolkits.mplot3d import Axes3D
4
 
 
 
5
 
6
  # Draw scatter
7
  def draw_scatter_2D(x_data, y_data, centers, title):
visualization/draw_scatter_line_graph.py CHANGED
@@ -1,7 +1,7 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
 
3
 
4
- from static.config import Config
5
 
6
 
7
  # draw scatter line graph
@@ -15,9 +15,9 @@ def draw_scatter_line_graph(x_data, y_pred_data, y_real_data, coef, intercept, l
15
  fig, ax = plt.subplot_mosaic(layout, figsize=(16, 16))
16
 
17
  for i in range(np.size(x_data, 1)):
18
- ax[str(chr(i+65))].scatter(x_data[:, i], y_pred_data.T, color=Config.COLORS[0], s=4, label=labels[0])
19
- ax[str(chr(i+65))].scatter(x_data[:, i], y_real_data, color=Config.COLORS[1], s=4, label=labels[1])
20
- ax[str(chr(i+65))].plot(x_data[:, i], x_data[:, i] * coef[i] + intercept, color=Config.COLORS[2], markersize=4)
21
  ax[str(chr(i + 65))].legend()
22
 
23
  plt.suptitle(title)
 
 
1
  import matplotlib.pyplot as plt
2
+ import numpy as np
3
 
4
+ from classes.static_custom_class import *
5
 
6
 
7
  # draw scatter line graph
 
15
  fig, ax = plt.subplot_mosaic(layout, figsize=(16, 16))
16
 
17
  for i in range(np.size(x_data, 1)):
18
+ ax[str(chr(i+65))].scatter(x_data[:, i], y_pred_data.T, color=StaticValue.COLORS[0], s=4, label=labels[0])
19
+ ax[str(chr(i+65))].scatter(x_data[:, i], y_real_data, color=StaticValue.COLORS[1], s=4, label=labels[1])
20
+ ax[str(chr(i+65))].plot(x_data[:, i], x_data[:, i] * coef[i] + intercept, color=StaticValue.COLORS[2], markersize=4)
21
  ax[str(chr(i + 65))].legend()
22
 
23
  plt.suptitle(title)
visualization/draw_swings_and_positives.py CHANGED
@@ -1,9 +1,4 @@
1
- import numpy as np
2
  import matplotlib.pyplot as plt
3
- from sklearn.metrics import *
4
- from sklearn.preprocessing import label_binarize
5
-
6
- from coding.llh.static.config import Config
7
 
8
 
9
  def draw_swings_and_positives(df, p1_name, p2_name):
 
 
1
  import matplotlib.pyplot as plt
 
 
 
 
2
 
3
 
4
  def draw_swings_and_positives(df, p1_name, p2_name):