LLH commited on
Commit
3cfb9a3
·
1 Parent(s): 0c19671

2024/03/09/11:50

Browse files
analysis/bayes_model.py DELETED
@@ -1,82 +0,0 @@
1
- from sklearn.model_selection import learning_curve
2
- from sklearn.naive_bayes import *
3
- import numpy as np
4
-
5
- from static.new_class import Container
6
- from static.process import grid_search, bayes_search
7
- from visualization.draw_line_graph import draw_line_graph
8
- from visualization.draw_scatter_line_graph import draw_scatter_line_graph
9
- from metrics.calculate_classification_metrics import calculate_classification_metrics
10
- from metrics.calculate_regression_metrics import calculate_regression_metrics
11
-
12
-
13
- class NaiveBayesClassifierParams:
14
- @classmethod
15
- def get_params(cls, sort):
16
- if sort == "MultinomialNB":
17
- return {
18
- "alpha": [0.1, 0.5, 1.0, 2.0]
19
- }
20
- elif sort == "GaussianNB":
21
- return {}
22
- elif sort == "ComplementNB":
23
- return {
24
- "alpha": [0.1, 0.5, 1, 10],
25
- "fit_prior": [True, False],
26
- "norm": [True, False]
27
- }
28
-
29
-
30
- # 朴素贝叶斯分类
31
- def naive_bayes_classification(container: Container, model=None):
32
- x_train = container.x_train
33
- y_train = container.y_train
34
- x_test = container.x_test
35
- y_test = container.y_test
36
- hyper_params_optimize = container.hyper_params_optimize
37
- info = {}
38
-
39
- if model == "MultinomialNB":
40
- naive_bayes_model = MultinomialNB()
41
- params = NaiveBayesClassifierParams.get_params(model)
42
- elif model == "GaussianNB":
43
- naive_bayes_model = GaussianNB()
44
- params = NaiveBayesClassifierParams.get_params(model)
45
- elif model == "ComplementNB":
46
- naive_bayes_model = ComplementNB()
47
- params = NaiveBayesClassifierParams.get_params(model)
48
- else:
49
- naive_bayes_model = GaussianNB()
50
- params = NaiveBayesClassifierParams.get_params(model)
51
-
52
- if hyper_params_optimize == "grid_search":
53
- best_model = grid_search(params, naive_bayes_model, x_train, y_train)
54
- elif hyper_params_optimize == "bayes_search":
55
- best_model = bayes_search(params, naive_bayes_model, x_train, y_train)
56
- else:
57
- best_model = naive_bayes_model
58
- best_model.fit(x_train, y_train)
59
-
60
- info["参数"] = best_model.get_params()
61
-
62
- y_pred = best_model.predict(x_test)
63
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
64
- container.set_y_pred(y_pred)
65
-
66
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
67
-
68
- train_scores_mean = np.mean(train_scores, axis=1)
69
- train_scores_std = np.std(train_scores, axis=1)
70
- test_scores_mean = np.mean(test_scores, axis=1)
71
- test_scores_std = np.std(test_scores, axis=1)
72
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
73
- test_scores_std)
74
-
75
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
76
-
77
- container.set_info(info)
78
- container.set_status("trained")
79
- container.set_model(best_model)
80
-
81
- return container
82
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/descriptive_analysis.py DELETED
@@ -1,303 +0,0 @@
1
-
2
- from datetime import datetime
3
-
4
- import json
5
- import sys
6
- import numpy as np
7
- import pandas as pd
8
- import math
9
- import time as sys_time
10
-
11
- from coding.llh.visualization.draw_boxplot import draw_boxplot
12
- from coding.llh.visualization.draw_heat_map import draw_heat_map
13
- from coding.llh.visualization.draw_histogram import draw_histogram
14
- from coding.llh.visualization.draw_histogram_line_subgraph import draw_histogram_line_subgraph
15
- from coding.llh.visualization.draw_line_graph import draw_line_graph
16
- from tqdm import tqdm
17
-
18
-
19
- # 0202:
20
- def data_transformation_extra(df: pd.DataFrame, str2int_mappings: dict) -> (pd.DataFrame):
21
-
22
- # Delete "match_id" column
23
- # df.drop("match_id", axis=1, inplace=True)
24
- df["match_id"] = df["match_id"].apply(lambda x: x[-4:])
25
-
26
- # Dissolve the two-mode data mapping into two part
27
-
28
- value_to_replace_dict = {
29
- "AD": "50"
30
- }
31
-
32
- value_to_replace = "AD"
33
- df["p1_score"].replace(value_to_replace, value_to_replace_dict[value_to_replace], inplace=True)
34
- df["p2_score"].replace(value_to_replace, value_to_replace_dict[value_to_replace], inplace=True)
35
-
36
- str2int_mappings_to_dissolve = {
37
- "p1_score": {"0": 0},
38
- "p2_score": {"0": 0}
39
- }
40
-
41
- df["p1_score_mark"] = 0
42
- df["p2_score_mark"] = 0
43
-
44
- for key in str2int_mappings_to_dissolve.keys():
45
- for i in range(1, len(df)):
46
- if df.loc[i, key] == "15" and df.loc[i-1, key] == "0":
47
- df.loc[i, key+"_mark"] = 1
48
- elif df.loc[i, key] == "1" and df.loc[i-1, key] == "0":
49
- df.loc[i, key + "_mark"] = 2
50
-
51
- df["p1_score_normal"] = 0
52
- df["p1_score_tiebreak"] = 0
53
- df["p2_score_normal"] = 0
54
- df["p2_score_tiebreak"] = 0
55
-
56
- normal_counter = 0
57
- tiebreak_counter = 0
58
- for key in str2int_mappings_to_dissolve.keys():
59
- for i in range(0, len(df)):
60
- if df.loc[i, key] == "0":
61
- normal_counter = 0
62
- tiebreak_counter = 0
63
- continue
64
-
65
- if df.loc[i, key+"_mark"] == 1 or normal_counter > 0:
66
- if int(df.loc[i, key]) > int(df.loc[i-1, key]):
67
- normal_counter += 1
68
- df.loc[i, key + "_normal"] = normal_counter
69
- if df.loc[i, key] == value_to_replace_dict[value_to_replace]:
70
- str2int_mappings_to_dissolve[key][value_to_replace] = normal_counter
71
- else:
72
- str2int_mappings_to_dissolve[key][df.loc[i, key]] = normal_counter
73
-
74
- elif int(df.loc[i, key]) < int(df.loc[i-1, key]):
75
- normal_counter -= 1
76
- df.loc[i, key + "_normal"] = normal_counter
77
-
78
- else:
79
- df.loc[i, key + "_normal"] = normal_counter
80
-
81
- elif df.loc[i, key+"_mark"] == 2 or tiebreak_counter > 0:
82
- if int(df.loc[i, key]) > int(df.loc[i - 1, key]):
83
- tiebreak_counter += 1
84
- df.loc[i, key+"_tiebreak"] = tiebreak_counter
85
- if df.loc[i, key] == value_to_replace_dict[value_to_replace]:
86
- str2int_mappings_to_dissolve[key][value_to_replace] = tiebreak_counter
87
- else:
88
- str2int_mappings_to_dissolve[key][df.loc[i, key]] = tiebreak_counter
89
-
90
- elif int(df.loc[i, key]) < int(df.loc[i - 1, key]):
91
- tiebreak_counter -= 1
92
- df.loc[i, key+"_tiebreak"] = tiebreak_counter
93
-
94
- else:
95
- df.loc[i, key + "_tiebreak"] = tiebreak_counter
96
-
97
- str2int_mappings.update(str2int_mappings_to_dissolve)
98
-
99
- df.drop("p1_score_mark", axis=1, inplace=True)
100
- df.drop("p2_score_mark", axis=1, inplace=True)
101
- df.drop("p1_score", axis=1, inplace=True)
102
- df.drop("p2_score", axis=1, inplace=True)
103
-
104
- # Transform "elapsed_time" time column
105
-
106
- def transform_time_col(time: str):
107
- h, m, s = time.strip().split(":")
108
- seconds = int(h) * 3600 + int(m) * 60 + int(s)
109
- return seconds
110
-
111
- df["elapsed_time"] = df["elapsed_time"].apply(transform_time_col)
112
-
113
- # Calculate "game_victor", "set_victor" column cumulative value
114
-
115
- df["p1_game_victor"] = df.apply(lambda x: 1 if x["game_victor"] == 1 else 0, axis=1)
116
- df["p2_game_victor"] = df.apply(lambda x: 1 if x["game_victor"] == 2 else 0, axis=1)
117
- df["p1_set_victor"] = df.apply(lambda x: 1 if x["set_victor"] == 1 else 0, axis=1)
118
- df["p2_set_victor"] = df.apply(lambda x: 1 if x["set_victor"] == 2 else 0, axis=1)
119
-
120
- df["p1_game_victor"] = df.groupby(["player1", "player2"])["p1_game_victor"].cumsum()
121
- df["p2_game_victor"] = df.groupby(["player1", "player2"])["p2_game_victor"].cumsum()
122
- df["p1_set_victor"] = df.groupby(["player1", "player2"])["p1_set_victor"].cumsum()
123
- df["p2_set_victor"] = df.groupby(["player1", "player2"])["p2_set_victor"].cumsum()
124
-
125
- # Forced conversion of data types
126
- for col in df.columns.values:
127
- df[col] = df[col].astype("float")
128
-
129
- # Save the mappings to a json format file
130
- with open("./data/mappings.json", "w", encoding="utf-8") as f:
131
- json.dump(str2int_mappings, f, indent=4, ensure_ascii=False)
132
-
133
- return df
134
-
135
-
136
- def data_transformation(df: pd.DataFrame) -> (pd.DataFrame, dict):
137
- """
138
- 0.
139
- 1. Define mappings
140
- 2. Create mappings
141
- 3. Modify the original data according to the mappings
142
- 4. Get type exception
143
- 5. Forced conversion of data types
144
- """
145
-
146
- info = {}
147
-
148
- # Define mappings
149
- str2int_mappings = {
150
- "player1": {},
151
- "player2": {},
152
- "winner_shot_type": {},
153
- "serve_width": {},
154
- "serve_depth": {},
155
- "return_depth": {}
156
- }
157
-
158
- # Create mappings
159
- for col in str2int_mappings.copy():
160
- keys = np.array(df[col].drop_duplicates())
161
- values = [x for x in range(len(keys))]
162
- str2int_mappings[col] = dict(zip(keys, values))
163
-
164
- # Modify the original data according to the mappings
165
- for col, mapping in str2int_mappings.items():
166
- series = df[col]
167
-
168
- for k, v in mapping.items():
169
- series.replace(k, v, inplace=True)
170
- df[col] = series
171
-
172
- df.replace('Not A Number', 0, inplace=True)
173
-
174
- # Get type exception
175
-
176
- # abnormal_type_values = []
177
- #
178
- # for col in df.columns.values:
179
- # if col not in str2int_mappings.keys():
180
- # for row in df[col]:
181
-
182
- # if not (0 <= row <= sys.maxsize):
183
- # abnormal_type_values.append(row)
184
- #
185
- # info["Number of abnormal type value"] = sorted(abnormal_type_values)
186
-
187
-
188
- # # Forced conversion of data types
189
- # for col in df.columns.values:
190
- # df[col] = df[col].astype("float")
191
- #
192
- # # Save the mappings to a json format file
193
- # with open("./mappings.json", "w", encoding="utf-8") as f:
194
- # json.dump(str2int_mappings, f, indent=4, ensure_ascii=False)
195
-
196
-
197
- # 0202:
198
- df = data_transformation_extra(df, str2int_mappings)
199
-
200
- return df, info
201
-
202
-
203
- # Get descriptive indicators and filtered data based on boxplpot
204
- def get_descriptive_indicators_related(df):
205
- info = {}
206
-
207
- descriptive_indicators_df = pd.DataFrame(
208
- index=list(df.columns.values),
209
- columns=[
210
- "Min",
211
- "Max",
212
- "Avg",
213
- "Standard Deviation",
214
- "Standard Error",
215
- "Upper Quartile",
216
- "Median",
217
- "Lower Quartile",
218
- "Interquartile Distance",
219
- "Kurtosis",
220
- "Skewness",
221
- "Coefficient of Variation"
222
- ]
223
- )
224
-
225
- for col in df.columns.values:
226
- descriptive_indicators_df["Min"][col] = df[col].min()
227
- descriptive_indicators_df["Max"][col] = df[col].max()
228
- descriptive_indicators_df["Avg"][col] = df[col].mean()
229
- descriptive_indicators_df["Standard Deviation"][col] = df[col].std()
230
- descriptive_indicators_df["Standard Error"][col] = descriptive_indicators_df["Standard Deviation"][col] / \
231
- math.sqrt(len(df[col]))
232
- descriptive_indicators_df["Upper Quartile"][col] = df[col].quantile(0.75)
233
- descriptive_indicators_df["Median"][col] = df[col].quantile(0.5)
234
- descriptive_indicators_df["Lower Quartile"][col] = df[col].quantile(0.25)
235
- descriptive_indicators_df["Interquartile Distance"][col] = descriptive_indicators_df["Lower Quartile"][col] - \
236
- descriptive_indicators_df["Upper Quartile"][col]
237
- descriptive_indicators_df["Kurtosis"][col] = df[col].kurt()
238
- descriptive_indicators_df["Skewness"][col] = df[col].skew()
239
- descriptive_indicators_df["Coefficient of Variation"][col] = descriptive_indicators_df["Standard Deviation"][col] \
240
- / descriptive_indicators_df["Avg"][col]
241
-
242
- # draw_heat_map(descriptive_indicators_df.to_numpy(), "descriptive indicators", True)
243
- #
244
- # draw_boxplot(df, "descriptive indicators boxplot")
245
-
246
- len_0 = len(df)
247
-
248
- # tmp_df = \
249
- # df[(df >= (descriptive_indicators_df["Lower Quartile"] - 1.5 * (descriptive_indicators_df["Upper Quartile"] -
250
- # descriptive_indicators_df["Lower Quartile"])))
251
- # & (df <= (descriptive_indicators_df["Upper Quartile"] + 1.5 * (descriptive_indicators_df["Upper Quartile"] -
252
- # descriptive_indicators_df["Lower Quartile"])))][[
253
- # "ProductChoice", "MembershipPoints", "ModeOfPayment", "ResidentCity", "PurchaseTenure", "IncomeClass",
254
- # "CustomerPropensity", "CustomerAge", "LastPurchaseDuration"
255
- # ]]
256
-
257
- # tmp_df.dropna(inplace=True)
258
-
259
- # df = pd.concat([tmp_df, df[["ProductChoice", "Channel", "MartialStatus"]]], axis=1, join="inner")
260
-
261
- # df = pd.concat([df.iloc[:, :9], df.iloc[:, 10:]], axis=1)
262
-
263
- # info["Number of offsetting value"] = len_0 - len(df)
264
- #
265
- # info["Total size of filtered data after descriptive analysis"] = len(df)
266
-
267
- return df, info
268
-
269
-
270
- # Create images of the distribution of the number of each variable
271
- def variable_distribution(df):
272
- counts_mappings = {}
273
- print("counts analysis")
274
- for col in tqdm(df.columns.values, desc='columns:'):
275
- counts_mapping = {}
276
- for x in tqdm(df[col], desc='cells'):
277
- if x in counts_mapping.keys():
278
- counts_mapping[x] += 1
279
- else:
280
- counts_mapping[x] = 1
281
- counts_mappings[col] = counts_mapping
282
-
283
- total_data_for_plot = []
284
- print("plotting")
285
- for col, mapping in tqdm(counts_mappings.items(), desc='columns'):
286
- if col in ["set_no", 'game_no']:
287
- sorting = sorted(mapping.items(), reverse=True, key=lambda m: m[0])
288
- data = [x[1] for x in sorting]
289
- labels = [x[0] for x in sorting]
290
-
291
- total_data_for_plot.append(["line_graph", labels, data, col])
292
- draw_line_graph(labels, data, col)
293
- else:
294
- sorting = sorted(mapping.items(), reverse=True, key=lambda m: m[1])
295
- data = [x[1] for x in sorting]
296
- labels = [x[0] for x in sorting]
297
-
298
- will_rotate = True if col in ["player1","player2", "match_id"] else False
299
- will_show_text = False if col in ["ResidentCity"] else True
300
-
301
- total_data_for_plot.append(["histogram", data, labels, will_rotate, will_show_text, col])
302
- draw_histogram(data, labels, will_rotate, will_show_text, col)
303
- # draw_histogram_line_subgraph(total_data_for_plot)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/distance_model.py DELETED
@@ -1,115 +0,0 @@
1
- from sklearn.model_selection import learning_curve
2
-
3
- from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
4
- from analysis.shap_model import *
5
- from metrics.calculate_classification_metrics import calculate_classification_metrics
6
- from metrics.calculate_regression_metrics import calculate_regression_metrics
7
- from static.new_class import *
8
- from static.process import grid_search, bayes_search
9
-
10
-
11
- class KNNClassifierParams:
12
- @classmethod
13
- def get_params(cls):
14
- return {
15
- "n_neighbors": [3, 5, 7, 9],
16
- "weights": ['uniform', 'distance'],
17
- "p": [1, 2]
18
- }
19
-
20
-
21
- # KNN分类
22
- def knn_classifier(container: Container):
23
- x_train = container.x_train
24
- y_train = container.y_train
25
- x_test = container.x_test
26
- y_test = container.y_test
27
- hyper_params_optimize = container.hyper_params_optimize
28
- info = {}
29
-
30
- knn_classifier_model = KNeighborsClassifier()
31
- params = KNNClassifierParams.get_params()
32
-
33
- if hyper_params_optimize == "grid_search":
34
- best_model = grid_search(params, knn_classifier_model, x_train, y_train)
35
- elif hyper_params_optimize == "bayes_search":
36
- best_model = bayes_search(params, knn_classifier_model, x_train, y_train)
37
- else:
38
- best_model = knn_classifier_model
39
- best_model.fit(x_train, y_train)
40
-
41
- info["参数"] = best_model.get_params()
42
-
43
- y_pred = best_model.predict(x_test)
44
- container.set_y_pred(y_pred)
45
-
46
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
47
-
48
- train_scores_mean = np.mean(train_scores, axis=1)
49
- train_scores_std = np.std(train_scores, axis=1)
50
- test_scores_mean = np.mean(test_scores, axis=1)
51
- test_scores_std = np.std(test_scores, axis=1)
52
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
53
- test_scores_std)
54
-
55
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
56
-
57
- container.set_info(info)
58
- container.set_status("trained")
59
- container.set_model(best_model)
60
-
61
- return container
62
-
63
-
64
- class KNNRegressionParams:
65
- @classmethod
66
- def get_params(cls):
67
- return {
68
- "n_neighbors": [3, 5, 7, 9],
69
- "weights": ['uniform', 'distance'],
70
- "p": [1, 2]
71
- }
72
-
73
-
74
- # KNN回归
75
- def knn_regression(container: Container):
76
- x_train = container.x_train
77
- y_train = container.y_train
78
- x_test = container.x_test
79
- y_test = container.y_test
80
- hyper_params_optimize = container.hyper_params_optimize
81
- info = {}
82
-
83
- knn_regression_model = KNeighborsRegressor()
84
- params = KNNRegressionParams.get_params()
85
-
86
- if hyper_params_optimize == "grid_search":
87
- best_model = grid_search(params, knn_regression_model, x_train, y_train)
88
- elif hyper_params_optimize == "bayes_search":
89
- best_model = bayes_search(params, knn_regression_model, x_train, y_train)
90
- else:
91
- best_model = knn_regression_model
92
- best_model.fit(x_train, y_train)
93
-
94
- info["参数"] = best_model.get_params()
95
-
96
- y_pred = best_model.predict(x_test)
97
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
98
- container.set_y_pred(y_pred)
99
-
100
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
101
-
102
- train_scores_mean = np.mean(train_scores, axis=1)
103
- train_scores_std = np.std(train_scores, axis=1)
104
- test_scores_mean = np.mean(test_scores, axis=1)
105
- test_scores_std = np.std(test_scores, axis=1)
106
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
107
- test_scores_std)
108
-
109
- info["指标"] = calculate_regression_metrics(y_pred, y_test)
110
-
111
- container.set_info(info)
112
- container.set_status("trained")
113
- container.set_model(best_model)
114
-
115
- return container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/evaluation_model.py DELETED
@@ -1,99 +0,0 @@
1
- import numpy as np
2
- import skfuzzy as fuzz
3
- from skfuzzy import control as ctrl
4
- import matplotlib.pyplot as plt
5
-
6
-
7
- def fuzzy_comprehensive_evaluation_model():
8
- # 创建模糊变量和模糊集合
9
- technical_skill = ctrl.Antecedent(np.arange(0, 101, 1), 'technical_skill')
10
- physical_condition = ctrl.Antecedent(np.arange(0, 101, 1), 'physical_condition')
11
- mental_toughness = ctrl.Antecedent(np.arange(0, 101, 1), 'mental_toughness')
12
- opponent_strength = ctrl.Antecedent(np.arange(0, 101, 1), 'opponent_strength')
13
-
14
- performance = ctrl.Consequent(np.arange(0, 101, 1), 'performance')
15
-
16
- # 设定模糊隶属度函数
17
- technical_skill['low'] = fuzz.trimf(technical_skill.universe, [0, 0, 50])
18
- technical_skill['medium'] = fuzz.trimf(technical_skill.universe, [0, 50, 100])
19
- technical_skill['high'] = fuzz.trimf(technical_skill.universe, [50, 100, 100])
20
-
21
- physical_condition['low'] = fuzz.trimf(physical_condition.universe, [0, 0, 50])
22
- physical_condition['medium'] = fuzz.trimf(physical_condition.universe, [0, 50, 100])
23
- physical_condition['high'] = fuzz.trimf(physical_condition.universe, [50, 100, 100])
24
-
25
- mental_toughness['low'] = fuzz.trimf(mental_toughness.universe, [0, 0, 50])
26
- mental_toughness['medium'] = fuzz.trimf(mental_toughness.universe, [0, 50, 100])
27
- mental_toughness['high'] = fuzz.trimf(mental_toughness.universe, [50, 100, 100])
28
-
29
- opponent_strength['low'] = fuzz.trimf(opponent_strength.universe, [0, 0, 50])
30
- opponent_strength['medium'] = fuzz.trimf(opponent_strength.universe, [0, 50, 100])
31
- opponent_strength['high'] = fuzz.trimf(opponent_strength.universe, [50, 100, 100])
32
-
33
- performance['poor'] = fuzz.trimf(performance.universe, [0, 0, 50])
34
- performance['average'] = fuzz.trimf(performance.universe, [0, 50, 100])
35
- performance['excellent'] = fuzz.trimf(performance.universe, [50, 100, 100])
36
-
37
- # 设定输出的解模糊方法——质心解模糊方式
38
- performance.defuzzify_method = 'centroid'
39
-
40
- # 设定规则
41
- rule1 = ctrl.Rule(
42
- technical_skill['low'] | physical_condition['low'] | mental_toughness['low'] | opponent_strength['low'],
43
- performance['poor']
44
- )
45
- rule2 = ctrl.Rule(
46
- technical_skill['medium'] | physical_condition['medium'] | mental_toughness['medium'] | opponent_strength['medium'],
47
- performance['average']
48
- )
49
- rule3 = ctrl.Rule(
50
- technical_skill['high'] | physical_condition['high'] | mental_toughness['high'] | opponent_strength['high'],
51
- performance['excellent']
52
- )
53
-
54
- # 创建控制系统
55
- performance_evaluation = ctrl.ControlSystem([rule1, rule2, rule3])
56
- performance_evaluator = ctrl.ControlSystemSimulation(performance_evaluation)
57
-
58
- # 输入数据
59
- performance_evaluator.input['technical_skill'] = 75
60
- performance_evaluator.input['physical_condition'] = 80
61
- performance_evaluator.input['mental_toughness'] = 85
62
- performance_evaluator.input['opponent_strength'] = 60
63
-
64
- # 计算模糊综合评分
65
- performance_evaluator.compute()
66
-
67
- # 输出结果
68
- print("模糊综合评分:", performance_evaluator.output['performance'])
69
-
70
- # 打印模糊集合的可视化图表
71
- technical_skill.view("technical_skill", sim=performance_evaluator)
72
- physical_condition.view("physical_condition", sim=performance_evaluator)
73
- mental_toughness.view("mental_toughness", sim=performance_evaluator)
74
- opponent_strength.view("opponent_strength", sim=performance_evaluator)
75
- performance.view("performance", sim=performance_evaluator)
76
-
77
- # Perform sensitivity analyze (to change input value)
78
-
79
- # input_var_1:
80
-
81
- # input_values = np.arange(0, 11, 1)
82
- # output_values = []
83
- #
84
- # for val in input_values:
85
- # fuzzy_control_sys_simulation.input["input_var_1"] = val
86
- # fuzzy_control_sys_simulation.compute()
87
- # output_values.append(fuzzy_control_sys_simulation.output["output_var"])
88
- #
89
- # plt.plot(
90
- # input_values,
91
- # output_values,
92
- # label="Sensitivity Analysis"
93
- # )
94
- # plt.xlabel("Input Variable 1")
95
- # plt.ylabel("Output Variable")
96
- # plt.legend()
97
- # plt.show()
98
- #
99
- # return fuzzy_control_sys_simulation.output["output_var"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/exploratory_analysis.py DELETED
@@ -1,130 +0,0 @@
1
- import numpy as np
2
- import sklearn.metrics
3
- from sklearn.cluster import KMeans
4
- from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
5
- from factor_analyzer.factor_analyzer import calculate_kmo
6
-
7
- from coding.llh.visualization.draw_heat_map import draw_heat_map
8
- from coding.llh.visualization.draw_scatter import draw_scatter_2D, draw_scatter_2D_1, draw_scatter_3D_1, draw_scatter_3D
9
-
10
-
11
- # K-means
12
- def k_means(array: np.ndarray):
13
- info = {}
14
-
15
- draw_scatter_2D_1(array, "2D scatter data before k-means")
16
- draw_scatter_3D_1(array, "3D scatter data before k-means")
17
-
18
- K = 60
19
-
20
- info["Number of clustering centers"] = K
21
-
22
- k_means_model = KMeans(n_clusters=K, init='k-means++')
23
-
24
- k_means_model.fit(array)
25
-
26
- sum_of_squared_errors = k_means_model.inertia_
27
-
28
- info["SSE"] = sum_of_squared_errors
29
-
30
- draw_scatter_2D(array, k_means_model.labels_, k_means_model.cluster_centers_, "2D scatter data after k-means")
31
- draw_scatter_3D(array, k_means_model.labels_, k_means_model.cluster_centers_, "3D scatter data after k-means")
32
-
33
- result = k_means_model.fit_predict(array[:200])
34
-
35
- silhouette_score = sklearn.metrics.silhouette_score(array[:200], result)
36
-
37
- info["Silhouette score"] = silhouette_score
38
-
39
- return info
40
-
41
-
42
- # Bartlett sphericity test
43
- def bartlett_test(df):
44
- _, p_value = calculate_bartlett_sphericity(df)
45
-
46
- return p_value
47
-
48
-
49
- # KMO test
50
- def kmo_test(df):
51
- _, kmo_score = calculate_kmo(df)
52
-
53
- return kmo_score
54
-
55
-
56
- # Principal component analysis
57
- def pca(df):
58
- # Only consider the correlation of the independent variables
59
- info = {}
60
-
61
- # array_x = df.iloc[:, 1:]
62
- array_x = df.iloc[:, :]
63
- array_y = df.iloc[:, :1]
64
-
65
- # Bartlett sphericity test
66
- p_value = bartlett_test(array_x)
67
- info["p value of bartlett sphericity test"] = p_value
68
- if p_value < 0.05:
69
- info["Result of bartlett sphericity test"] = "Accept"
70
- else:
71
- info["Result of bartlett sphericity test"] = "Reject"
72
-
73
- # KMO test
74
- kmo_score = kmo_test(array_x)
75
- info["Score of KMO test"] = kmo_score
76
- if kmo_score > 0.5:
77
- info["Result of KMO test"] = "Accept"
78
- else:
79
- info["Result of KMO test"] = "Reject"
80
-
81
- # get the matrix of correlation coefficients
82
- covX = np.around(np.corrcoef(array_x.T), decimals=3)
83
-
84
- # 计算协方差矩阵的对角线元素的标准差
85
- std_dev = np.sqrt(np.diag(covX))
86
-
87
- # 计算皮尔逊相关系数矩阵
88
- pearson_matrix = covX / np.outer(std_dev, std_dev)
89
-
90
- # draw_heat_map(pearson_matrix, "pearson matrix", True, df.columns.values)
91
-
92
- # Solve the eigenvalues and eigenvectors of the coefficient correlation matrix
93
- eigenvalues, eigenvectors = np.linalg.eig(covX.T)
94
-
95
- eigenvalues = np.around(eigenvalues, decimals=3)
96
-
97
- eigenvalues_dict = dict(zip(eigenvalues.tolist(), list(range(0, len(eigenvalues)))))
98
-
99
- # Sort feature values in descending order
100
- eigenvalues = sorted(eigenvalues, reverse=True)
101
-
102
- for i, value in enumerate(eigenvalues):
103
- if i == 0:
104
- sorted_eigenvectors = eigenvectors[:, eigenvalues_dict[value]].reshape(-1, 1)
105
- else:
106
- sorted_eigenvectors = np.concatenate((sorted_eigenvectors, eigenvectors[:, eigenvalues_dict[value]].reshape(-1, 1)), axis=1)
107
-
108
- # draw_line_graph(range(1, len(eigenvalues) + 1), eigenvalues, "Eigenvalue")
109
-
110
- # get the contribution of the eigenvalues
111
- contribution = eigenvalues / np.sum(eigenvalues)
112
-
113
- # get the cumulative contribution of the eigenvalues
114
- cumulative_contribution = np.cumsum(contribution)
115
-
116
- # Selection of principal components
117
- main_factors_index = [i for i in range(len(cumulative_contribution)) if cumulative_contribution[i] < 0.80]
118
-
119
- main_factor_num = len(main_factors_index)
120
-
121
- info["Main factor num"] = main_factor_num
122
-
123
- # Get the projection matrix
124
- projected_array = array_x.dot(sorted_eigenvectors[:, :main_factor_num])
125
- projected_array = np.concatenate((array_y.values, projected_array), axis=1)
126
-
127
- return projected_array, info
128
-
129
-
130
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/gaussian_model.py DELETED
@@ -1,28 +0,0 @@
1
- import numpy as np
2
- import matplotlib.pyplot as plt
3
- from sklearn.mixture import GaussianMixture
4
-
5
-
6
- def gaussian_mix(x):
7
- x = x.reshape(-1, 1)
8
- n_components = 2000 # 你可以根据需要调整混合组件的数量
9
- gmm = GaussianMixture(n_components=n_components, covariance_type='full')
10
-
11
- # 拟合模型
12
- gmm.fit(x)
13
-
14
- # 预测每个数据点所属的组件
15
- continuous_data = gmm.sample(len(x))[0].reshape(-1)
16
-
17
- return continuous_data
18
-
19
- # 使用高斯混合模型拟合数据
20
- # gmm = GaussianMixture(n_components=50) # 选择混合成分的数量
21
- # gmm.fit(x.reshape(-1, 1))
22
-
23
- # 生成连续数据
24
- # return np.linspace(min(x), max(x), len(x)).flatten()
25
-
26
- # z = np.exp(gmm.score_samples(y.reshape(-1, 1)))
27
-
28
- # return z
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/gradient_model.py DELETED
@@ -1,65 +0,0 @@
1
- import numpy as np
2
- from sklearn.ensemble import GradientBoostingRegressor
3
- from sklearn.model_selection import learning_curve
4
-
5
- from analysis.shap_model import draw_shap_beeswarm
6
- from metrics.calculate_regression_metrics import calculate_regression_metrics
7
- from static.config import Config
8
- from static.new_class import Container
9
- from static.process import grid_search, bayes_search
10
-
11
-
12
- class GradientBoostingParams:
13
- @classmethod
14
- def get_params(cls):
15
- return {
16
- 'n_estimators': [50, 100, 150],
17
- 'learning_rate': [0.01, 0.1, 0.2],
18
- 'max_depth': [3, 5, 7],
19
- 'min_samples_split': [2, 5, 10],
20
- 'min_samples_leaf': [1, 2, 4]
21
- }
22
-
23
-
24
- # 梯度提升回归
25
- def gradient_boosting_regression(container: Container):
26
- x_train = container.x_train
27
- y_train = container.y_train
28
- x_test = container.x_test
29
- y_test = container.y_test
30
- hyper_params_optimize = container.hyper_params_optimize
31
- info = {}
32
-
33
- gradient_boosting_regression_model = GradientBoostingRegressor(random_state=Config.RANDOM_STATE)
34
- params = GradientBoostingParams.get_params()
35
-
36
- if hyper_params_optimize == "grid_search":
37
- best_model = grid_search(params, gradient_boosting_regression_model, x_train, y_train)
38
- elif hyper_params_optimize == "bayes_search":
39
- best_model = bayes_search(params, gradient_boosting_regression_model, x_train, y_train)
40
- else:
41
- best_model = gradient_boosting_regression_model
42
- best_model.fit(x_train, y_train)
43
-
44
- info["参数"] = best_model.get_params()
45
-
46
- y_pred = best_model.predict(x_test)
47
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
48
- container.set_y_pred(y_pred)
49
-
50
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
51
-
52
- train_scores_mean = np.mean(train_scores, axis=1)
53
- train_scores_std = np.std(train_scores, axis=1)
54
- test_scores_mean = np.mean(test_scores, axis=1)
55
- test_scores_std = np.std(test_scores, axis=1)
56
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
57
- test_scores_std)
58
-
59
- info["指标"] = calculate_regression_metrics(y_pred, y_test)
60
-
61
- container.set_info(info)
62
- container.set_status("trained")
63
- container.set_model(best_model)
64
-
65
- return container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/kernel_model.py DELETED
@@ -1,119 +0,0 @@
1
- import numpy as np
2
- from sklearn.model_selection import learning_curve
3
- from sklearn.svm import SVC
4
- from sklearn.svm import SVR
5
-
6
- from metrics.calculate_classification_metrics import calculate_classification_metrics
7
- from metrics.calculate_regression_metrics import calculate_regression_metrics
8
- from static.config import Config
9
- from static.new_class import Container
10
- from static.process import grid_search, bayes_search
11
-
12
-
13
- class SVMRegressionParams:
14
- @classmethod
15
- def get_params(cls):
16
- return {
17
- 'kernel': ['linear', 'rbf'],
18
- 'C': [0.1, 1, 10, 100],
19
- 'gamma': [0.01, 0.1, 1, 10],
20
- 'epsilon': [0.01, 0.1, 1]
21
- }
22
-
23
-
24
- # 支持向量机回归
25
- def svm_regression(container: Container):
26
- x_train = container.x_train
27
- y_train = container.y_train
28
- x_test = container.x_test
29
- y_test = container.y_test
30
- hyper_params_optimize = container.hyper_params_optimize
31
- info = {}
32
-
33
- svm_regression_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
34
- params = SVMRegressionParams.get_params()
35
-
36
- if hyper_params_optimize == "grid_search":
37
- best_model = grid_search(params, svm_regression_model, x_train, y_train)
38
- elif hyper_params_optimize == "bayes_search":
39
- best_model = bayes_search(params, svm_regression_model, x_train, y_train)
40
- else:
41
- best_model = svm_regression_model
42
- best_model.fit(x_train, y_train)
43
-
44
- info["参数"] = best_model.get_params()
45
-
46
- y_pred = best_model.predict(x_test)
47
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
48
- container.set_y_pred(y_pred)
49
-
50
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
51
-
52
- train_scores_mean = np.mean(train_scores, axis=1)
53
- train_scores_std = np.std(train_scores, axis=1)
54
- test_scores_mean = np.mean(test_scores, axis=1)
55
- test_scores_std = np.std(test_scores, axis=1)
56
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
57
- test_scores_std)
58
-
59
- info["指标"] = calculate_regression_metrics(y_pred, y_test)
60
-
61
- container.set_info(info)
62
- container.set_status("trained")
63
- container.set_model(best_model)
64
-
65
- return container
66
-
67
-
68
- class SVMClassifierParams:
69
- @classmethod
70
- def get_params(cls):
71
- return {
72
- "C": [0.1, 1, 10, 100],
73
- "kernel": ['linear', 'rbf', 'poly'],
74
- "gamma": [0.1, 1, 10]
75
- }
76
-
77
-
78
- # 支持向量机分类
79
- def svm_classifier(container: Container):
80
- x_train = container.x_train
81
- y_train = container.y_train
82
- x_test = container.x_test
83
- y_test = container.y_test
84
- hyper_params_optimize = container.hyper_params_optimize
85
- info = {}
86
-
87
- svm_classifier_model = SVC(kernel="rbf")
88
- params = SVMClassifierParams.get_params()
89
-
90
- if hyper_params_optimize == "grid_search":
91
- best_model = grid_search(params, svm_classifier_model, x_train, y_train)
92
- elif hyper_params_optimize == "bayes_search":
93
- best_model = bayes_search(params, svm_classifier_model, x_train, y_train)
94
- else:
95
- best_model = svm_classifier_model
96
- best_model.fit(x_train, y_train)
97
-
98
- info["参数"] = best_model.get_params()
99
-
100
- y_pred = best_model.predict(x_test)
101
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
102
- container.set_y_pred(y_pred)
103
-
104
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
105
-
106
- train_scores_mean = np.mean(train_scores, axis=1)
107
- train_scores_std = np.std(train_scores, axis=1)
108
- test_scores_mean = np.mean(test_scores, axis=1)
109
- test_scores_std = np.std(test_scores, axis=1)
110
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
111
- test_scores_std)
112
-
113
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
114
-
115
- container.set_info(info)
116
- container.set_status("trained")
117
- container.set_model(best_model)
118
-
119
- return container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/linear_model.py DELETED
@@ -1,217 +0,0 @@
1
- import numpy as np
2
- from sklearn.linear_model import LinearRegression
3
- from sklearn.preprocessing import PolynomialFeatures
4
- from sklearn.linear_model import Lasso
5
- from sklearn.linear_model import Ridge
6
- from sklearn.linear_model import ElasticNet
7
- from sklearn.linear_model import LogisticRegression
8
- from sklearn.pipeline import Pipeline
9
- from sklearn.model_selection import learning_curve
10
-
11
- from static.process import grid_search, bayes_search
12
- from metrics.calculate_classification_metrics import calculate_classification_metrics
13
- from metrics.calculate_regression_metrics import calculate_regression_metrics
14
- from static.new_class import *
15
- from static.config import Config
16
-
17
-
18
- class LinearRegressionParams:
19
- @classmethod
20
- def get_params(cls, sort):
21
- if sort in ["Lasso", "Ridge", "ElasticNet"]:
22
- return {
23
- "fit_intercept": [True, False],
24
- "alpha": [0.001, 0.01, 0.1, 1.0, 10.0],
25
- "random_state": [Config.RANDOM_STATE]
26
- }
27
- else:
28
- return {
29
- "fit_intercept": [True, False]
30
- }
31
-
32
-
33
- # 线性回归
34
- def linear_regression(container: Container, model=None):
35
- x_train = container.x_train
36
- y_train = container.y_train
37
- x_test = container.x_test
38
- y_test = container.y_test
39
- hyper_params_optimize = container.hyper_params_optimize
40
- info = {}
41
-
42
- if model == "Lasso":
43
- linear_regression_model = Lasso(alpha=0.1, random_state=Config.RANDOM_STATE)
44
- params = LinearRegressionParams.get_params(model)
45
- elif model == "Ridge":
46
- linear_regression_model = Ridge(alpha=0.1, random_state=Config.RANDOM_STATE)
47
- params = LinearRegressionParams.get_params(model)
48
- elif model == "ElasticNet":
49
- linear_regression_model = ElasticNet(alpha=0.1, random_state=Config.RANDOM_STATE)
50
- params = LinearRegressionParams.get_params(model)
51
- elif model == "LinearRegression":
52
- linear_regression_model = LinearRegression()
53
- params = LinearRegressionParams.get_params(model)
54
- else:
55
- linear_regression_model = LinearRegression()
56
- params = LinearRegressionParams.get_params(model)
57
-
58
- if hyper_params_optimize == "grid_search":
59
- best_model = grid_search(params, linear_regression_model, x_train, y_train)
60
- elif hyper_params_optimize == "bayes_search":
61
- best_model = bayes_search(params, linear_regression_model, x_train, y_train)
62
- else:
63
- best_model = linear_regression_model
64
- best_model.fit(x_train, y_train)
65
-
66
- info["参数"] = best_model.get_params()
67
-
68
- # lr_intercept = best_model.intercept_
69
- # info["Intercept of linear regression equation"] = lr_intercept
70
- #
71
- # lr_coef = best_model.coef_
72
- # info["Coefficients of linear regression equation"] = lr_coef
73
-
74
- y_pred = best_model.predict(x_test)
75
- container.set_y_pred(y_pred)
76
-
77
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
78
-
79
- train_scores_mean = np.mean(train_scores, axis=1)
80
- train_scores_std = np.std(train_scores, axis=1)
81
- test_scores_mean = np.mean(test_scores, axis=1)
82
- test_scores_std = np.std(test_scores, axis=1)
83
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
84
- test_scores_std)
85
-
86
- info["参数"] = calculate_regression_metrics(y_pred, y_test)
87
-
88
- container.set_info(info)
89
- container.set_status("trained")
90
- container.set_model(best_model)
91
-
92
- return container
93
-
94
-
95
- class PolynomialRegressionParams:
96
- @classmethod
97
- def get_params(cls):
98
- return {
99
- "polynomial_features__degree": [2, 3],
100
- "linear_regression_model__fit_intercept": [True, False]
101
- }
102
-
103
-
104
- # 多项式回归
105
- def polynomial_regression(container: Container):
106
- x_train = container.x_train
107
- y_train = container.y_train
108
- x_test = container.x_test
109
- y_test = container.y_test
110
- hyper_params_optimize = container.hyper_params_optimize
111
- info = {}
112
-
113
- polynomial_features = PolynomialFeatures(degree=2)
114
- linear_regression_model = LinearRegression()
115
-
116
- polynomial_regression_model = Pipeline([("polynomial_features", polynomial_features),
117
- ("linear_regression_model", linear_regression_model)])
118
- params = PolynomialRegressionParams.get_params()
119
-
120
- if hyper_params_optimize == "grid_search":
121
- best_model = grid_search(params, polynomial_regression_model, x_train, y_train)
122
- elif hyper_params_optimize == "bayes_search":
123
- best_model = bayes_search(params, polynomial_regression_model, x_train, y_train)
124
- else:
125
- best_model = polynomial_regression_model
126
- best_model.fit(x_train, y_train)
127
-
128
- info["参数"] = best_model.get_params()
129
-
130
- # feature_names = best_model["polynomial_features"].get_feature_names_out()
131
- # info["Feature names of polynomial regression"] = feature_names
132
- #
133
- # lr_intercept = best_model["linear_regression_model"].intercept_
134
- # info["Intercept of polynomial regression equation"] = lr_intercept
135
- #
136
- # lr_coef = best_model["linear_regression_model"].coef_
137
- # info["Coefficients of polynomial regression equation"] = lr_coef
138
-
139
- x_test_ = best_model["polynomial_features"].fit_transform(x_test)
140
- y_pred = best_model["linear_regression_model"].predict(x_test_)
141
- container.set_y_pred(y_pred)
142
-
143
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
144
-
145
- train_scores_mean = np.mean(train_scores, axis=1)
146
- train_scores_std = np.std(train_scores, axis=1)
147
- test_scores_mean = np.mean(test_scores, axis=1)
148
- test_scores_std = np.std(test_scores, axis=1)
149
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
150
-
151
- info["指标"] = calculate_regression_metrics(y_pred, y_test)
152
-
153
- container.set_info(info)
154
- container.set_status("trained")
155
- container.set_model(best_model)
156
-
157
- return container
158
-
159
-
160
- class LogisticRegressionParams:
161
- @classmethod
162
- def get_params(cls):
163
- return {
164
- "C": [0.001, 0.01, 0.1, 1.0, 10.0],
165
- "max_iter": [100, 200, 300],
166
- "solver": ["liblinear", "lbfgs", "newton-cg", "sag", "saga"],
167
- "random_state": [Config.RANDOM_STATE]
168
- }
169
-
170
-
171
- # 逻辑斯谛分类
172
- def logistic_regression(container: Container):
173
- x_train = container.x_train
174
- y_train = container.y_train
175
- x_test = container.x_test
176
- y_test = container.y_test
177
- hyper_params_optimize = container.hyper_params_optimize
178
- info = {}
179
-
180
- logistic_regression_model = LogisticRegression(random_state=Config.RANDOM_STATE)
181
- params = LogisticRegressionParams.get_params()
182
-
183
- if hyper_params_optimize == "grid_search":
184
- best_model = grid_search(params, logistic_regression_model, x_train, y_train)
185
- elif hyper_params_optimize == "bayes_search":
186
- best_model = bayes_search(params, logistic_regression_model, x_train, y_train)
187
- else:
188
- best_model = logistic_regression_model
189
- best_model.fit(x_train, y_train)
190
-
191
- info["参数"] = best_model.get_params()
192
-
193
- # lr_intercept = best_model.intercept_
194
- # info["Intercept of logistic regression equation"] = lr_intercept.tolist()
195
- #
196
- # lr_coef = best_model.coef_
197
- # info["Coefficients of logistic regression equation"] = lr_coef.tolist()
198
-
199
- y_pred = best_model.predict(x_test)
200
- container.set_y_pred(y_pred)
201
-
202
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
203
-
204
- train_scores_mean = np.mean(train_scores, axis=1)
205
- train_scores_std = np.std(train_scores, axis=1)
206
- test_scores_mean = np.mean(test_scores, axis=1)
207
- test_scores_std = np.std(test_scores, axis=1)
208
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
209
- test_scores_std)
210
-
211
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
212
-
213
- container.set_info(info)
214
- container.set_status("trained")
215
- container.set_model(best_model)
216
-
217
- return container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/markov_model.py DELETED
@@ -1,98 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from hmmlearn import hmm
4
-
5
-
6
- def train_and_predict_hidden_markov_model(df):
7
- window_size = 10
8
-
9
- # train_df = df[['point_won', 'point_loss', 'ace', 'winner', 'double_fault', 'unf_err', 'net_point', 'net_point_won', 'break_pt', 'break_pt_won', 'break_pt_miss']]
10
-
11
- train_df = df
12
- # "p1_winner",
13
- # "p2_winner",
14
- # "winner_shot_type",
15
- # "p1_double_fault",
16
- # "p2_double_fault",
17
- # "p1_unf_err",
18
- # "p2_unf_err",
19
- # "p1_net_pt_won",
20
- # "p2_net_pt_won",
21
- # "p1_break_pt_won",
22
- # "p2_break_pt_won",
23
- # "rally_count",
24
- # "serve_width",
25
- # "serve_depth",
26
- # "return_depth"
27
- df["observation"] = 0
28
-
29
- # mapping = {}
30
- # counter = 0
31
- # for i in range(len(train_df)):
32
- # cur_combination = train_df.iloc[i].to_list()
33
- #
34
- # if str(cur_combination) not in mapping.keys():
35
- # mapping[str(cur_combination)] = counter
36
- # df.loc[i, "observation"] = counter
37
- # counter += 1
38
- # else:
39
- # df.loc[i, "observation"] = mapping[str(cur_combination)]
40
-
41
- observation_list = df["observation"].to_list()
42
-
43
- # value_separated_observation_list = [observation_list[i - window_size: i] for i in range(window_size, len(observation_list))]
44
- # value_separated_observation_list = [[0] * window_size] * window_size + value_separated_observation_list
45
-
46
- observations = np.array([np.sum(np.array([train_df.iloc[j].to_list() for j in range(i-window_size, i)]).astype(int), axis=0) for i in range(window_size, len(train_df))])
47
-
48
- observations = abs(np.min(observations)) + observations
49
-
50
- observations = observations.astype(int)
51
-
52
- m_observations = np.concatenate(
53
- (np.array([observations[0].tolist()] * window_size), observations),
54
- axis=0
55
- )
56
-
57
- df = pd.concat([df, pd.DataFrame({"window_observation": m_observations.tolist()})], axis=1)
58
-
59
- hidden_markov_model = hmm.MultinomialHMM(n_components=5, n_iter=50, tol=0.01)
60
-
61
- hidden_markov_model.fit(observations)
62
-
63
- start_prob = hidden_markov_model.startprob_
64
- transition_prob = hidden_markov_model.transmat_
65
- emission_prob = hidden_markov_model.emissionprob_
66
-
67
- neg_log_likelihood, pred = calculate_momentum(df, hidden_markov_model, m_observations)
68
-
69
- _, hidden2observation = hidden_markov_model.score_samples(observations)
70
-
71
- state_impacts = np.sum(hidden2observation, axis=0)
72
-
73
- return state_impacts, neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
74
-
75
- state_impacts = np.zeros((num_states, num_obs))
76
-
77
- for t in range(num_obs):
78
- for i in range(num_states):
79
- state_impacts[i, t] = (forward_prob[t, i] * backward_prob[t, i]) / np.sum(
80
- forward_prob[t, :] * backward_prob[t, :])
81
-
82
- return neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
83
-
84
-
85
- def calculate_momentum(df, hidden_markov_model, m_observations):
86
- # pred_list = []
87
- # neg_log_likelihood_list = []
88
- # for i in range(len(df)):
89
- # neg_log_likelihood, pred = hidden_markov_model.decode(np.array([df.loc[i, "window_observation"]]))
90
- # pred_list.append(pred[0])
91
- # neg_log_likelihood_list.append(neg_log_likelihood)
92
- #
93
- # return pred_list, neg_log_likelihood_list
94
-
95
- neg_log_likelihood, pred = hidden_markov_model.decode(m_observations)
96
-
97
- return neg_log_likelihood, pred
98
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/model_train/tree_model.py CHANGED
@@ -99,21 +99,12 @@ class DecisionTreeClassifierParams:
99
 
100
  # 决策树分类
101
  def decision_tree_classifier(container, params):
102
- import logging
103
- logging.basicConfig(level=logging.NOTSET)
104
- logging.info(str(params), logging.getLevelName(logging.INFO))
105
- print(str(params))
106
-
107
-
108
  x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
109
  info = {}
110
 
111
  params = transform_params_list(DecisionTreeClassifierParams, params)
112
  params['random_state'] = [StaticValue.RANDOM_STATE]
113
 
114
- logging.info(str(params), logging.getLevelName(logging.INFO))
115
- print(str(params))
116
-
117
  random_forest_regression_model = DecisionTreeClassifier(random_state=StaticValue.RANDOM_STATE)
118
 
119
  if hyper_params_optimize == "grid_search":
 
99
 
100
  # 决策树分类
101
  def decision_tree_classifier(container, params):
 
 
 
 
 
 
102
  x_train, y_train, x_test, y_test, hyper_params_optimize = get_values_from_container_class(container)
103
  info = {}
104
 
105
  params = transform_params_list(DecisionTreeClassifierParams, params)
106
  params['random_state'] = [StaticValue.RANDOM_STATE]
107
 
 
 
 
108
  random_forest_regression_model = DecisionTreeClassifier(random_state=StaticValue.RANDOM_STATE)
109
 
110
  if hyper_params_optimize == "grid_search":
analysis/my_learning_curve.py DELETED
@@ -1,33 +0,0 @@
1
- import numpy as np
2
- import matplotlib.pyplot as plt
3
- from sklearn.metrics import r2_score
4
- from sklearn.model_selection import train_test_split
5
- from sklearn.metrics import accuracy_score
6
-
7
- from coding.llh.metrics.calculate_regression_metrics import calculate_ar2
8
-
9
-
10
- def my_learning_curve(estimator, X, y, cv=5):
11
- train_sizes = np.linspace(0.1, 1.0, 10)[:-1]
12
- train_scores = []
13
- val_scores = []
14
-
15
- for train_size in train_sizes:
16
- # Split the dataset into training and validation sets
17
- X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=train_size, random_state=42)
18
-
19
- # Train the model on the training set
20
- # estimator.fit(X_train, y_train)
21
-
22
- # Evaluate the model on the training set
23
- y_train_pred = estimator.predict(X_train)
24
- train_accuracy = r2_score(y_train, y_train_pred)
25
- train_scores.append(train_accuracy)
26
-
27
- # Evaluate the model on the validation set
28
- y_val_pred = estimator.predict(X_val)
29
- val_accuracy = r2_score(y_val, y_val_pred)
30
- val_scores.append(val_accuracy)
31
-
32
- return train_sizes, train_scores, val_scores
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/neural_model.py DELETED
@@ -1,321 +0,0 @@
1
- import matplotlib.pyplot as plt
2
- import numpy as np
3
- import pandas as pd
4
- import torch
5
- import torch.nn as nn
6
- from sklearn import preprocessing
7
- from torch.utils.data import TensorDataset
8
- from tqdm import tqdm
9
- import json
10
- import os
11
- import warnings
12
- from sklearn.neural_network import MLPRegressor
13
-
14
- from coding.llh.analysis.shap_model import shap_calculate
15
- from coding.llh.static.process import grid_search, bayes_search
16
- from coding.llh.visualization.draw_line_graph import draw_line_graph
17
- from sklearn.tree import DecisionTreeClassifier
18
- from sklearn.ensemble import RandomForestClassifier
19
- from xgboost import XGBClassifier
20
- from sklearn.model_selection import learning_curve
21
- import numpy as np
22
-
23
- from coding.llh.static.config import Config
24
- from coding.llh.static.process import grid_search, bayes_search
25
- from coding.llh.visualization.draw_learning_curve import draw_learning_curve
26
- from coding.llh.visualization.draw_line_graph import draw_line_graph
27
- from coding.llh.visualization.draw_scatter_line_graph import draw_scatter_line_graph
28
- from coding.llh.metrics.calculate_classification_metrics import calculate_classification_metrics
29
- from coding.llh.metrics.calculate_regression_metrics import calculate_regression_metrics
30
- from sklearn.ensemble import RandomForestRegressor
31
-
32
- warnings.filterwarnings("ignore")
33
-
34
-
35
- def mlp_regression(feature_names, x, y, x_train_and_validate, y_train_and_validate, x_test, y_test, train_and_validate_data_list=None, hyper_params_optimize=None):
36
- info = {}
37
- model_name = "mlp regression model"
38
-
39
- model = MLPRegressor()
40
- params = {
41
- 'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
42
- 'activation': ['relu', 'tanh', 'logistic'],
43
- 'alpha': [0.0001, 0.001, 0.01],
44
- 'learning_rate': ['constant', 'invscaling', 'adaptive'],
45
- 'max_iter': [100, 200, 300]
46
- }
47
-
48
- if hyper_params_optimize == "grid_search":
49
- best_model = grid_search(params, model, x_train_and_validate, y_train_and_validate)
50
- elif hyper_params_optimize == "bayes_search":
51
- best_model = bayes_search(params, model, x_train_and_validate, y_train_and_validate)
52
- else:
53
- best_model = model
54
- best_model.fit(x, y)
55
-
56
- info["{} Params".format(model_name)] = best_model.get_params()
57
-
58
- y_pred = best_model.predict(x_test).reshape(-1, 1)
59
-
60
- # 0202:
61
-
62
- train_sizes, train_scores, test_scores = learning_curve(best_model, x[:500], y[:500], cv=5, scoring="r2")
63
-
64
- train_scores_mean = np.mean(train_scores, axis=1)
65
- train_scores_std = np.std(train_scores, axis=1)
66
- test_scores_mean = np.mean(test_scores, axis=1)
67
- test_scores_std = np.std(test_scores, axis=1)
68
-
69
- # draw_learning_curve(train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std)
70
-
71
- # draw_scatter_line_graph(x_test, y_pred, y_test, lr_coef, lr_intercept, ["pred", "real"], "logistic regression model residual plot")
72
-
73
- info.update(calculate_regression_metrics(y_pred, y_test, model_name))
74
- # info.update(calculate_classification_metrics(y_pred, y_test, "logistic regression"))
75
- # mae, mse, rsme, r2, ar2 = calculate_regression_metrics(y_pred, y_test, model_name)
76
-
77
- # shap_calculate(best_model, x_test, feature_names)
78
-
79
- return info, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std
80
-
81
-
82
- def ann(df):
83
- # 参数初始化
84
- lr = 0.0001
85
- batch_size = 32
86
- input_dim = 10
87
- output_dim = 4
88
- epochs = 40
89
- best_acc = 0
90
- save_path = "./model/model.pth"
91
-
92
- # 硬件定义
93
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
94
- print("Device loaded for training: [{}]".format(device))
95
-
96
- # 数据集分割
97
- def split_data(data: pd.DataFrame):
98
- data = np.array(data)
99
-
100
- dataX = data[:, 1:]
101
- dataY = data[:, :1]
102
-
103
- dataX = np.array(dataX)
104
- dataY = np.array(dataY)
105
-
106
- total_size = dataX.shape[0]
107
- train_size = int(np.round(0.8 * total_size))
108
-
109
- x_train = dataX[: train_size, :]
110
- y_train = dataY[: train_size]
111
-
112
- x_test = dataX[train_size:, :]
113
- y_test = dataY[train_size:]
114
-
115
- return x_train, y_train, x_test, y_test, total_size, train_size
116
-
117
- x_train, y_train, x_test, y_test, total_size, train_size = split_data(df)
118
-
119
- # 数据预处理
120
- x_train = preprocessing.scale(x_train)
121
- x_test = preprocessing.scale(x_test)
122
-
123
- y_train = y_train - 1
124
- y_test = y_test - 1
125
-
126
- # 数据格式转换
127
- x_train_tensor = torch.from_numpy(x_train).to(torch.float32)
128
- y_train_tensor = torch.from_numpy(y_train).to(torch.float32)
129
- x_test_tensor = torch.from_numpy(x_test).to(torch.float32)
130
- y_test_tensor = torch.from_numpy(y_test).to(torch.float32)
131
-
132
- train_data = TensorDataset(x_train_tensor, y_train_tensor)
133
- test_data = TensorDataset(x_test_tensor, y_test_tensor)
134
-
135
- train_loader = torch.utils.data.DataLoader(train_data, batch_size, True)
136
- test_loader = torch.utils.data.DataLoader(test_data, batch_size, False)
137
-
138
- print("Data loaded for training: [{}]".format(len(train_data)))
139
- print("Data loaded for testing: [{}]".format(len(test_data)))
140
-
141
- # 模型定义
142
- class ANN(nn.Module):
143
- def __init__(self, input_dim, output_dim):
144
- super(ANN, self).__init__()
145
-
146
- self.hidden1 = nn.Sequential(
147
- nn.Linear(input_dim, 16, bias=True),
148
- nn.ReLU()
149
- )
150
- self.hidden2 = nn.Sequential(
151
- nn.Linear(16, 32, bias=True),
152
- nn.ReLU()
153
- )
154
- self.hidden3 = nn.Sequential(
155
- nn.Linear(32, 64, bias=True),
156
- nn.ReLU()
157
- )
158
- self.hidden4 = nn.Sequential(
159
- nn.Linear(64, 128, bias=True),
160
- nn.ReLU()
161
- )
162
- self.hidden5 = nn.Sequential(
163
- nn.Linear(128, 256, bias=True),
164
- nn.ReLU()
165
- )
166
- self.hidden6 = nn.Sequential(
167
- nn.Linear(256, 512, bias=True),
168
- nn.ReLU()
169
- )
170
- self.hidden7 = nn.Sequential(
171
- nn.Linear(512, 1024, bias=True),
172
- nn.ReLU()
173
- )
174
- self.hidden8 = nn.Sequential(
175
- nn.Linear(1024, output_dim, bias=True),
176
- nn.Softmax()
177
- )
178
-
179
- def forward(self, x):
180
- x = self.hidden1(x)
181
- x = self.hidden2(x)
182
- x = self.hidden3(x)
183
- x = self.hidden4(x)
184
- x = self.hidden5(x)
185
- x = self.hidden6(x)
186
- x = self.hidden7(x)
187
- x = self.hidden8(x)
188
-
189
- return x
190
-
191
- model = ANN(input_dim, output_dim).to(device)
192
- print("Model set: [{}]".format(model))
193
-
194
- # 损失函数定义
195
- criterion = nn.CrossEntropyLoss()
196
- print("Criterion set: [{}]".format(type(criterion)))
197
-
198
- # 优化器定义
199
- optimizer = torch.optim.Adam(model.parameters(), lr)
200
- print("Optimizer set: [{}]".format(type(optimizer)))
201
- print()
202
-
203
- if os.path.isfile(save_path):
204
- # 模型加载
205
- state_dict = torch.load(save_path)
206
- model.load_state_dict(state_dict, strict=False)
207
- print("!Model loaded")
208
-
209
- with open("./model/best_acc.json", "r") as f:
210
- print("Best accuracy of current model: [{}]".format(json.load(f)))
211
-
212
- else:
213
- print("!Training starting\n")
214
-
215
- train_loss_list = []
216
- train_acc_list = []
217
- test_loss_list = []
218
- test_acc_list = []
219
-
220
- y_pred_list = []
221
- y_real_list = []
222
-
223
- for epoch in range(epochs):
224
- # 模型训练
225
- model.train()
226
-
227
- train_loss = 0
228
- train_acc = 0
229
- train_acc_count = 0
230
- train_count = 0
231
- train_bar = tqdm(train_loader)
232
- for data in train_bar:
233
- x_train, y_train = data
234
- x_train = x_train.to(device)
235
- y_train = y_train.to(device)
236
- # 优化器重置
237
- optimizer.zero_grad()
238
- # 前向传播
239
- output = model(x_train)
240
- # 计算误差
241
- loss = criterion(output, y_train.reshape(-1).long())
242
- # 反向传播:更新梯度
243
- loss.backward()
244
- # 反向传播:更新参数
245
- optimizer.step()
246
-
247
- train_loss += loss.item()
248
- train_bar.desc = "Train epoch[{}/{}] loss: {:.3f}".format(epoch + 1, epochs, loss)
249
- train_acc_count += (output.argmax(axis=1) == y_train.view(-1).int()).sum().item()
250
- train_count += len(x_train)
251
-
252
- train_acc = train_acc_count / train_count
253
-
254
- # 模型测试
255
- model.eval()
256
-
257
- test_loss = 0
258
- test_acc = 0
259
- test_acc_count = 0
260
- test_count = 0
261
- with torch.no_grad():
262
- test_bar = tqdm(test_loader)
263
- for data in test_bar:
264
- x_test, y_test = data
265
- x_test = x_test.to(device)
266
- y_test = y_test.to(device)
267
- # 前向传播
268
- output = model(x_test)
269
-
270
- y_pred_list.append(output.tolist())
271
- y_real_list.append(y_test.tolist())
272
-
273
- # 计算误差
274
- loss = criterion(output, y_test.reshape(-1).long())
275
-
276
- test_loss += loss.item()
277
- test_bar.desc = "Test epoch[{}/{}] loss: {:.3f}".format(epoch + 1, epochs, loss)
278
- test_acc_count += (output.argmax(axis=1) == y_test.view(-1).int()).sum().item()
279
- test_count += len(x_test)
280
-
281
- test_acc = test_acc_count / test_count
282
-
283
- print("\nEpoch: {}".format(epoch + 1))
284
- print("Train_loss: {:.4f}".format(train_loss))
285
- print("Train_accuracy: {:.4f}".format(train_acc))
286
- print("Test_loss: {:.4f}".format(test_loss))
287
- print("Test_accuracy: {:.4f}".format(test_acc))
288
- print("\n")
289
-
290
- train_loss_list.append(train_loss)
291
- train_acc_list.append(train_acc)
292
- test_loss_list.append(test_loss)
293
- test_acc_list.append(test_acc)
294
-
295
- # 保存当前最优模型和最优准确率值
296
- if test_acc > best_acc:
297
- best_acc = test_acc
298
- with open("./model/info.json", "w") as f:
299
- json.dump({
300
- "best_acc": [best_acc],
301
- "train_loss_list": train_loss_list,
302
- "train_acc_list": train_acc_list,
303
- "test_loss_list": test_loss_list,
304
- "test_acc_list": test_acc_list,
305
- "y_pred_list": y_pred_list,
306
- "y_real_list": y_real_list
307
- }, f)
308
-
309
- torch.save(model.state_dict(), save_path)
310
-
311
- print("\n!Training finished")
312
- print("Best accuracy: {:.4f}".format(best_acc))
313
-
314
- # 数据可视化
315
- draw_line_graph(
316
- range(len(y_pred_list)),
317
- [y_pred_list, y_real_list],
318
- "ANN prediction",
319
- ["predict, real"]
320
- )
321
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/others/evaluation_model.py DELETED
@@ -1,99 +0,0 @@
1
- import numpy as np
2
- import skfuzzy as fuzz
3
- from skfuzzy import control as ctrl
4
- import matplotlib.pyplot as plt
5
-
6
-
7
- def fuzzy_comprehensive_evaluation_model():
8
- # 创建模糊变量和模糊集合
9
- technical_skill = ctrl.Antecedent(np.arange(0, 101, 1), 'technical_skill')
10
- physical_condition = ctrl.Antecedent(np.arange(0, 101, 1), 'physical_condition')
11
- mental_toughness = ctrl.Antecedent(np.arange(0, 101, 1), 'mental_toughness')
12
- opponent_strength = ctrl.Antecedent(np.arange(0, 101, 1), 'opponent_strength')
13
-
14
- performance = ctrl.Consequent(np.arange(0, 101, 1), 'performance')
15
-
16
- # 设定模糊隶属度函数
17
- technical_skill['low'] = fuzz.trimf(technical_skill.universe, [0, 0, 50])
18
- technical_skill['medium'] = fuzz.trimf(technical_skill.universe, [0, 50, 100])
19
- technical_skill['high'] = fuzz.trimf(technical_skill.universe, [50, 100, 100])
20
-
21
- physical_condition['low'] = fuzz.trimf(physical_condition.universe, [0, 0, 50])
22
- physical_condition['medium'] = fuzz.trimf(physical_condition.universe, [0, 50, 100])
23
- physical_condition['high'] = fuzz.trimf(physical_condition.universe, [50, 100, 100])
24
-
25
- mental_toughness['low'] = fuzz.trimf(mental_toughness.universe, [0, 0, 50])
26
- mental_toughness['medium'] = fuzz.trimf(mental_toughness.universe, [0, 50, 100])
27
- mental_toughness['high'] = fuzz.trimf(mental_toughness.universe, [50, 100, 100])
28
-
29
- opponent_strength['low'] = fuzz.trimf(opponent_strength.universe, [0, 0, 50])
30
- opponent_strength['medium'] = fuzz.trimf(opponent_strength.universe, [0, 50, 100])
31
- opponent_strength['high'] = fuzz.trimf(opponent_strength.universe, [50, 100, 100])
32
-
33
- performance['poor'] = fuzz.trimf(performance.universe, [0, 0, 50])
34
- performance['average'] = fuzz.trimf(performance.universe, [0, 50, 100])
35
- performance['excellent'] = fuzz.trimf(performance.universe, [50, 100, 100])
36
-
37
- # 设定输出的解模糊方法——质心解模糊方式
38
- performance.defuzzify_method = 'centroid'
39
-
40
- # 设定规则
41
- rule1 = ctrl.Rule(
42
- technical_skill['low'] | physical_condition['low'] | mental_toughness['low'] | opponent_strength['low'],
43
- performance['poor']
44
- )
45
- rule2 = ctrl.Rule(
46
- technical_skill['medium'] | physical_condition['medium'] | mental_toughness['medium'] | opponent_strength['medium'],
47
- performance['average']
48
- )
49
- rule3 = ctrl.Rule(
50
- technical_skill['high'] | physical_condition['high'] | mental_toughness['high'] | opponent_strength['high'],
51
- performance['excellent']
52
- )
53
-
54
- # 创建控制系统
55
- performance_evaluation = ctrl.ControlSystem([rule1, rule2, rule3])
56
- performance_evaluator = ctrl.ControlSystemSimulation(performance_evaluation)
57
-
58
- # 输入数据
59
- performance_evaluator.input['technical_skill'] = 75
60
- performance_evaluator.input['physical_condition'] = 80
61
- performance_evaluator.input['mental_toughness'] = 85
62
- performance_evaluator.input['opponent_strength'] = 60
63
-
64
- # 计算模糊综合评分
65
- performance_evaluator.compute()
66
-
67
- # 输出结果
68
- print("模糊综合评分:", performance_evaluator.output['performance'])
69
-
70
- # 打印模糊集合的可视化图表
71
- technical_skill.view("technical_skill", sim=performance_evaluator)
72
- physical_condition.view("physical_condition", sim=performance_evaluator)
73
- mental_toughness.view("mental_toughness", sim=performance_evaluator)
74
- opponent_strength.view("opponent_strength", sim=performance_evaluator)
75
- performance.view("performance", sim=performance_evaluator)
76
-
77
- # Perform sensitivity analyze (to change input value)
78
-
79
- # input_var_1:
80
-
81
- # input_values = np.arange(0, 11, 1)
82
- # output_values = []
83
- #
84
- # for val in input_values:
85
- # fuzzy_control_sys_simulation.input["input_var_1"] = val
86
- # fuzzy_control_sys_simulation.compute()
87
- # output_values.append(fuzzy_control_sys_simulation.output["output_var"])
88
- #
89
- # plt.plot(
90
- # input_values,
91
- # output_values,
92
- # label="Sensitivity Analysis"
93
- # )
94
- # plt.xlabel("Input Variable 1")
95
- # plt.ylabel("Output Variable")
96
- # plt.legend()
97
- # plt.show()
98
- #
99
- # return fuzzy_control_sys_simulation.output["output_var"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/others/gaussian_model.py DELETED
@@ -1,28 +0,0 @@
1
- import numpy as np
2
- import matplotlib.pyplot as plt
3
- from sklearn.mixture import GaussianMixture
4
-
5
-
6
- def gaussian_mix(x):
7
- x = x.reshape(-1, 1)
8
- n_components = 2000 # 你可以根据需要调整混合组件的数量
9
- gmm = GaussianMixture(n_components=n_components, covariance_type='full')
10
-
11
- # 拟合模型
12
- gmm.fit(x)
13
-
14
- # 预测每个数据点所属的组件
15
- continuous_data = gmm.sample(len(x))[0].reshape(-1)
16
-
17
- return continuous_data
18
-
19
- # 使用高斯混合模型拟合数据
20
- # gmm = GaussianMixture(n_components=50) # 选择混合成分的数量
21
- # gmm.fit(x.reshape(-1, 1))
22
-
23
- # 生成连续数据
24
- # return np.linspace(min(x), max(x), len(x)).flatten()
25
-
26
- # z = np.exp(gmm.score_samples(y.reshape(-1, 1)))
27
-
28
- # return z
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/others/markov_model.py DELETED
@@ -1,98 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- from hmmlearn import hmm
4
-
5
-
6
- def train_and_predict_hidden_markov_model(df):
7
- window_size = 10
8
-
9
- # train_df = df[['point_won', 'point_loss', 'ace', 'winner', 'double_fault', 'unf_err', 'net_point', 'net_point_won', 'break_pt', 'break_pt_won', 'break_pt_miss']]
10
-
11
- train_df = df
12
- # "p1_winner",
13
- # "p2_winner",
14
- # "winner_shot_type",
15
- # "p1_double_fault",
16
- # "p2_double_fault",
17
- # "p1_unf_err",
18
- # "p2_unf_err",
19
- # "p1_net_pt_won",
20
- # "p2_net_pt_won",
21
- # "p1_break_pt_won",
22
- # "p2_break_pt_won",
23
- # "rally_count",
24
- # "serve_width",
25
- # "serve_depth",
26
- # "return_depth"
27
- df["observation"] = 0
28
-
29
- # mapping = {}
30
- # counter = 0
31
- # for i in range(len(train_df)):
32
- # cur_combination = train_df.iloc[i].to_list()
33
- #
34
- # if str(cur_combination) not in mapping.keys():
35
- # mapping[str(cur_combination)] = counter
36
- # df.loc[i, "observation"] = counter
37
- # counter += 1
38
- # else:
39
- # df.loc[i, "observation"] = mapping[str(cur_combination)]
40
-
41
- observation_list = df["observation"].to_list()
42
-
43
- # value_separated_observation_list = [observation_list[i - window_size: i] for i in range(window_size, len(observation_list))]
44
- # value_separated_observation_list = [[0] * window_size] * window_size + value_separated_observation_list
45
-
46
- observations = np.array([np.sum(np.array([train_df.iloc[j].to_list() for j in range(i-window_size, i)]).astype(int), axis=0) for i in range(window_size, len(train_df))])
47
-
48
- observations = abs(np.min(observations)) + observations
49
-
50
- observations = observations.astype(int)
51
-
52
- m_observations = np.concatenate(
53
- (np.array([observations[0].tolist()] * window_size), observations),
54
- axis=0
55
- )
56
-
57
- df = pd.concat([df, pd.DataFrame({"window_observation": m_observations.tolist()})], axis=1)
58
-
59
- hidden_markov_model = hmm.MultinomialHMM(n_components=5, n_iter=50, tol=0.01)
60
-
61
- hidden_markov_model.fit(observations)
62
-
63
- start_prob = hidden_markov_model.startprob_
64
- transition_prob = hidden_markov_model.transmat_
65
- emission_prob = hidden_markov_model.emissionprob_
66
-
67
- neg_log_likelihood, pred = calculate_momentum(df, hidden_markov_model, m_observations)
68
-
69
- _, hidden2observation = hidden_markov_model.score_samples(observations)
70
-
71
- state_impacts = np.sum(hidden2observation, axis=0)
72
-
73
- return state_impacts, neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
74
-
75
- state_impacts = np.zeros((num_states, num_obs))
76
-
77
- for t in range(num_obs):
78
- for i in range(num_states):
79
- state_impacts[i, t] = (forward_prob[t, i] * backward_prob[t, i]) / np.sum(
80
- forward_prob[t, :] * backward_prob[t, :])
81
-
82
- return neg_log_likelihood, pred, start_prob, transition_prob, emission_prob
83
-
84
-
85
- def calculate_momentum(df, hidden_markov_model, m_observations):
86
- # pred_list = []
87
- # neg_log_likelihood_list = []
88
- # for i in range(len(df)):
89
- # neg_log_likelihood, pred = hidden_markov_model.decode(np.array([df.loc[i, "window_observation"]]))
90
- # pred_list.append(pred[0])
91
- # neg_log_likelihood_list.append(neg_log_likelihood)
92
- #
93
- # return pred_list, neg_log_likelihood_list
94
-
95
- neg_log_likelihood, pred = hidden_markov_model.decode(m_observations)
96
-
97
- return neg_log_likelihood, pred
98
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/others/poly_model.py DELETED
@@ -1,12 +0,0 @@
1
- import numpy as np
2
- import matplotlib.pyplot as plt
3
-
4
-
5
- def poly_fit(x_values, y_values, degree=60):
6
- # 使用 numpy 的 polyfit 函数进行多项式拟合
7
- coefficients = np.polyfit(x_values, y_values, degree)
8
-
9
- # 生成拟合的多项式函数
10
- fitted_curve = np.poly1d(coefficients)
11
-
12
- return fitted_curve(x_values)
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/poly_model.py DELETED
@@ -1,12 +0,0 @@
1
- import numpy as np
2
- import matplotlib.pyplot as plt
3
-
4
-
5
- def poly_fit(x_values, y_values, degree=60):
6
- # 使用 numpy 的 polyfit 函数进行多项式拟合
7
- coefficients = np.polyfit(x_values, y_values, degree)
8
-
9
- # 生成拟合的多项式函数
10
- fitted_curve = np.poly1d(coefficients)
11
-
12
- return fitted_curve(x_values)
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/shap_model.py DELETED
@@ -1,55 +0,0 @@
1
- import matplotlib.pyplot as plt
2
- import numpy as np
3
- import shap
4
-
5
-
6
- def draw_shap_beeswarm(model, x, feature_names, type, paint_object):
7
- explainer = shap.KernelExplainer(model.predict, x)
8
- shap_values = explainer(x)
9
-
10
- shap.summary_plot(shap_values, x, feature_names=feature_names, plot_type=type, show=False)
11
-
12
- plt.title(paint_object.get_name())
13
- plt.tight_layout()
14
-
15
- return plt, paint_object
16
-
17
-
18
- def draw_waterfall(model, x, feature_names, number, paint_object):
19
- explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
20
- shap_values = explainer(x)
21
-
22
- shap.waterfall_plot(shap_values[number], show=False)
23
-
24
- plt.title(paint_object.get_name())
25
- plt.tight_layout()
26
-
27
- return plt, paint_object
28
-
29
-
30
- def draw_force(model, x, feature_names, number, paint_object):
31
- explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
32
- shap_values = explainer(x[number])
33
-
34
- shap.force_plot(explainer.expected_value, shap_values.values, feature_names=feature_names, show=False, matplotlib=True)
35
-
36
- plt.title(paint_object.get_name())
37
- plt.tight_layout()
38
-
39
- return plt, paint_object
40
-
41
-
42
- def draw_dependence(model, x, feature_names, col, paint_object):
43
- explainer = shap.KernelExplainer(model.predict, x, feature_names=feature_names)
44
- shap_values = explainer(x)
45
-
46
- shap.dependence_plot(feature_names.index(col), shap_values.values, x, feature_names=feature_names, show=False)
47
-
48
- plt.title(paint_object.get_name())
49
- plt.tight_layout()
50
-
51
- return plt, paint_object
52
-
53
-
54
-
55
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/tree_model.py DELETED
@@ -1,290 +0,0 @@
1
- from metrics.calculate_regression_metrics import calculate_regression_metrics
2
- from sklearn.ensemble import RandomForestClassifier
3
- from sklearn.ensemble import RandomForestRegressor
4
- from sklearn.model_selection import learning_curve
5
- from sklearn.tree import DecisionTreeClassifier
6
- from xgboost import XGBClassifier
7
- import lightgbm as lightGBMClassifier
8
-
9
- from analysis.shap_model import *
10
- from metrics.calculate_classification_metrics import calculate_classification_metrics
11
- from static.config import Config
12
- from static.process import grid_search, bayes_search
13
- from static.new_class import *
14
-
15
-
16
- class RandomForestRegressionParams:
17
- @classmethod
18
- def get_params(cls):
19
- return {
20
- 'n_estimators': [10, 50, 100, 200],
21
- 'max_depth': [None, 10, 20, 30],
22
- 'min_samples_split': [2, 5, 10],
23
- 'min_samples_leaf': [1, 2, 4]
24
- }
25
-
26
-
27
- # 随机森林回归
28
- def random_forest_regression(container: Container):
29
- x_train = container.x_train
30
- y_train = container.y_train
31
- x_test = container.x_test
32
- y_test = container.y_test
33
- hyper_params_optimize = container.hyper_params_optimize
34
- info = {}
35
-
36
- random_forest_regression_model = RandomForestRegressor(n_estimators=5, random_state=Config.RANDOM_STATE)
37
- params = RandomForestRegressionParams.get_params()
38
-
39
- if hyper_params_optimize == "grid_search":
40
- best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
41
- elif hyper_params_optimize == "bayes_search":
42
- best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
43
- else:
44
- best_model = random_forest_regression_model
45
- best_model.fit(x_train, y_train)
46
-
47
- info["参数"] = best_model.get_params()
48
-
49
- y_pred = best_model.predict(x_test)
50
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
51
- container.set_y_pred(y_pred)
52
-
53
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
54
-
55
- train_scores_mean = np.mean(train_scores, axis=1)
56
- train_scores_std = np.std(train_scores, axis=1)
57
- test_scores_mean = np.mean(test_scores, axis=1)
58
- test_scores_std = np.std(test_scores, axis=1)
59
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
60
- test_scores_std)
61
-
62
- info["指标"] = calculate_regression_metrics(y_pred, y_test)
63
-
64
- container.set_info(info)
65
- container.set_status("trained")
66
- container.set_model(best_model)
67
-
68
- return container
69
-
70
-
71
- class DecisionTreeClassifierParams:
72
- @classmethod
73
- def get_params(cls):
74
- return {
75
- "criterion": ["gini", "entropy"],
76
- "splitter": ["best", "random"],
77
- "max_depth": [None, 5, 10, 15],
78
- "min_samples_split": [2, 5, 10],
79
- "min_samples_leaf": [1, 2, 4]
80
- }
81
-
82
-
83
- # 决策树分类
84
- def decision_tree_classifier(container: Container):
85
- x_train = container.x_train
86
- y_train = container.y_train
87
- x_test = container.x_test
88
- y_test = container.y_test
89
- hyper_params_optimize = container.hyper_params_optimize
90
- info = {}
91
-
92
- random_forest_regression_model = DecisionTreeClassifier(random_state=Config.RANDOM_STATE)
93
- params = DecisionTreeClassifierParams.get_params()
94
-
95
- if hyper_params_optimize == "grid_search":
96
- best_model = grid_search(params, random_forest_regression_model, x_train, y_train)
97
- elif hyper_params_optimize == "bayes_search":
98
- best_model = bayes_search(params, random_forest_regression_model, x_train, y_train)
99
- else:
100
- best_model = random_forest_regression_model
101
- best_model.fit(x_train, y_train)
102
-
103
- info["参数"] = best_model.get_params()
104
-
105
- y_pred = best_model.predict(x_test)
106
- container.set_y_pred(y_pred)
107
-
108
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
109
-
110
- train_scores_mean = np.mean(train_scores, axis=1)
111
- train_scores_std = np.std(train_scores, axis=1)
112
- test_scores_mean = np.mean(test_scores, axis=1)
113
- test_scores_std = np.std(test_scores, axis=1)
114
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
115
- test_scores_std)
116
-
117
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
118
-
119
- container.set_info(info)
120
- container.set_status("trained")
121
- container.set_model(best_model)
122
-
123
- return container
124
-
125
-
126
- class RandomForestClassifierParams:
127
- @classmethod
128
- def get_params(cls):
129
- return {
130
- "criterion": ["gini", "entropy"],
131
- "n_estimators": [50, 100, 150],
132
- "max_depth": [None, 5, 10, 15],
133
- "min_samples_split": [2, 5, 10],
134
- "min_samples_leaf": [1, 2, 4]
135
- }
136
-
137
-
138
- # 随机森林分类
139
- def random_forest_classifier(container: Container):
140
- x_train = container.x_train
141
- y_train = container.y_train
142
- x_test = container.x_test
143
- y_test = container.y_test
144
- hyper_params_optimize = container.hyper_params_optimize
145
- info = {}
146
-
147
- random_forest_classifier_model = RandomForestClassifier(n_estimators=5, random_state=Config.RANDOM_STATE)
148
- params = RandomForestClassifierParams.get_params()
149
-
150
- if hyper_params_optimize == "grid_search":
151
- best_model = grid_search(params, random_forest_classifier_model, x_train, y_train)
152
- elif hyper_params_optimize == "bayes_search":
153
- best_model = bayes_search(params, random_forest_classifier_model, x_train, y_train)
154
- else:
155
- best_model = random_forest_classifier_model
156
- best_model.fit(x_train, y_train)
157
-
158
- info["参数"] = best_model.get_params()
159
-
160
- y_pred = best_model.predict(x_test)
161
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
162
- container.set_y_pred(y_pred)
163
-
164
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
165
-
166
- train_scores_mean = np.mean(train_scores, axis=1)
167
- train_scores_std = np.std(train_scores, axis=1)
168
- test_scores_mean = np.mean(test_scores, axis=1)
169
- test_scores_std = np.std(test_scores, axis=1)
170
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
171
- test_scores_std)
172
-
173
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
174
-
175
- container.set_info(info)
176
- container.set_status("trained")
177
- container.set_model(best_model)
178
-
179
- return container
180
-
181
-
182
- class XgboostClassifierParams:
183
- @classmethod
184
- def get_params(cls):
185
- return {
186
- "n_estimators": [50, 100, 150],
187
- "learning_rate": [0.01, 0.1, 0.2],
188
- "max_depth": [3, 4, 5],
189
- "min_child_weight": [1, 2, 3],
190
- "gamma": [0, 0.1, 0.2],
191
- "subsample": [0.5, 0.8, 0.9, 1.0],
192
- "colsample_bytree": [0.8, 0.9, 1.0]
193
- }
194
-
195
-
196
- # xgboost分类
197
- def xgboost_classifier(container: Container):
198
- x_train = container.x_train
199
- y_train = container.y_train
200
- x_test = container.x_test
201
- y_test = container.y_test
202
- hyper_params_optimize = container.hyper_params_optimize
203
- info = {}
204
-
205
- xgboost_classifier_model = XGBClassifier(random_state=Config.RANDOM_STATE)
206
- params = XgboostClassifierParams.get_params()
207
-
208
- if hyper_params_optimize == "grid_search":
209
- best_model = grid_search(params, xgboost_classifier_model, x_train, y_train)
210
- elif hyper_params_optimize == "bayes_search":
211
- best_model = bayes_search(params, xgboost_classifier_model, x_train, y_train)
212
- else:
213
- best_model = xgboost_classifier_model
214
- best_model.fit(x_train, y_train)
215
-
216
- info["参数"] = best_model.get_params()
217
-
218
- y_pred = best_model.predict(x_test)
219
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
220
- container.set_y_pred(y_pred)
221
-
222
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
223
-
224
- train_scores_mean = np.mean(train_scores, axis=1)
225
- train_scores_std = np.std(train_scores, axis=1)
226
- test_scores_mean = np.mean(test_scores, axis=1)
227
- test_scores_std = np.std(test_scores, axis=1)
228
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
229
- test_scores_std)
230
-
231
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
232
-
233
- container.set_info(info)
234
- container.set_status("trained")
235
- container.set_model(best_model)
236
-
237
- return container
238
-
239
-
240
- class LightGBMClassifierParams:
241
- @classmethod
242
- def get_params(cls):
243
- return
244
-
245
-
246
- # lightGBM分类
247
- def lightGBM_classifier(container: Container):
248
- x_train = container.x_train
249
- y_train = container.y_train
250
- x_test = container.x_test
251
- y_test = container.y_test
252
- hyper_params_optimize = container.hyper_params_optimize
253
- info = {}
254
-
255
- lightgbm_classifier_model = lightGBMClassifier
256
- params = LightGBMClassifierParams.get_params()
257
-
258
- if hyper_params_optimize == "grid_search":
259
- best_model = grid_search(params, lightgbm_classifier_model, x_train, y_train)
260
- elif hyper_params_optimize == "bayes_search":
261
- best_model = bayes_search(params, lightgbm_classifier_model, x_train, y_train)
262
- else:
263
- best_model = lightgbm_classifier_model
264
- best_model.train(x_train, y_train)
265
-
266
- info["参数"] = best_model.get_params()
267
-
268
- y_pred = best_model.predict(x_test)
269
- # y_pred = best_model.predict(x_test).reshape(-1, 1)
270
- container.set_y_pred(y_pred)
271
-
272
- train_sizes, train_scores, test_scores = learning_curve(best_model, x_train, y_train, cv=5)
273
-
274
- train_scores_mean = np.mean(train_scores, axis=1)
275
- train_scores_std = np.std(train_scores, axis=1)
276
- test_scores_mean = np.mean(test_scores, axis=1)
277
- test_scores_std = np.std(test_scores, axis=1)
278
- container.set_learning_curve_values(train_sizes, train_scores_mean, train_scores_std, test_scores_mean,
279
- test_scores_std)
280
-
281
- info["指标"] = calculate_classification_metrics(y_pred, y_test)
282
-
283
- container.set_info(info)
284
- container.set_status("trained")
285
- container.set_model(best_model)
286
-
287
- return container
288
-
289
-
290
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analysis/two_exponential_smoothing_model.py DELETED
@@ -1,48 +0,0 @@
1
- import matplotlib.pyplot as plt
2
-
3
-
4
- # 双指数平滑
5
- def double_exponential_smoothing(series, alpha, beta):
6
- """
7
- series - dataset with timeseries
8
- alpha - float [0.0, 1.0], smoothing parameter for level
9
- beta - float [0.0, 1.0], smoothing parameter for trend
10
- """
11
- # first value is same as series
12
- result = [series[0]]
13
- for n in range(1, len(series) + 1):
14
- if n == 1:
15
- level, trend = series[0], series[1] - series[0]
16
- if n >= len(series): # forecasting
17
- value = result[-1]
18
- else:
19
- value = series[n]
20
- last_level, level = level, alpha * value + (1 - alpha) * (level + trend)
21
- trend = beta * (level - last_level) + (1 - beta) * trend
22
- result.append(level + trend)
23
- return result
24
-
25
-
26
- def plotDoubleExponentialSmoothing(series, alphas, betas):
27
- """
28
- Plots double exponential smoothing with different alphas and betas
29
-
30
- series - dataset with timestamps
31
- alphas - list of floats, smoothing parameters for level
32
- betas - list of floats, smoothing parameters for trend
33
- """
34
-
35
- with plt.style.context('seaborn-white'):
36
- plt.figure(figsize=(13, 5))
37
- for alpha in alphas:
38
- for beta in betas:
39
- plt.plot(double_exponential_smoothing(series, alpha, beta),
40
- label="Alpha {}, beta {}".format(alpha, beta))
41
- plt.plot(series.values, label="Actual")
42
- plt.legend(loc="best")
43
- plt.axis('tight')
44
- plt.title("Double Exponential Smoothing")
45
- plt.grid(True)
46
-
47
-
48
- plotDoubleExponentialSmoothing(data['trend'], alphas=[0.5, 0.3], betas=[0.9, 0.3])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
functions/process.py CHANGED
@@ -3,6 +3,9 @@ def get_values_from_container_class(container):
3
 
4
 
5
  def transform_params_list(params_class, params_list, model=None):
 
 
 
6
  input_params_keys = []
7
  input_params_values = []
8
  inner_value_list = []
@@ -18,21 +21,24 @@ def transform_params_list(params_class, params_list, model=None):
18
  inner_value_list.append(param)
19
  else:
20
  input_params_values.append(inner_value_list)
21
- input_params = dict(zip(input_params_keys, input_params_values))
22
 
23
- for k, v in input_params.items():
24
  if k in keys:
25
  value_type = params_class.get_params_type(model)[k] if model else params_class.get_params_type()[k]
26
  try:
27
  if value_type == "int":
28
- input_params[k] = [int(x) for x in input_params[k]]
29
  elif value_type == "float":
30
- input_params[k] = [float(x) for x in input_params[k]]
31
  elif value_type == "bool":
32
- input_params[k] = [x == "True" for x in input_params[k]]
33
  elif value_type == "str":
34
- input_params[k] = [str(x) for x in input_params[k]]
35
  except Exception:
36
- input_params[k] = [str(x) for x in input_params[k]]
 
 
 
37
 
38
- return input_params
 
3
 
4
 
5
  def transform_params_list(params_class, params_list, model=None):
6
+ # test
7
+ print("params_class: {}, params_list: {}".format(str(params_class), str(params_list)))
8
+
9
  input_params_keys = []
10
  input_params_values = []
11
  inner_value_list = []
 
21
  inner_value_list.append(param)
22
  else:
23
  input_params_values.append(inner_value_list)
24
+ params = dict(zip(input_params_keys, input_params_values))
25
 
26
+ for k, v in params.items():
27
  if k in keys:
28
  value_type = params_class.get_params_type(model)[k] if model else params_class.get_params_type()[k]
29
  try:
30
  if value_type == "int":
31
+ params[k] = [int(x) for x in params[k]]
32
  elif value_type == "float":
33
+ params[k] = [float(x) for x in params[k]]
34
  elif value_type == "bool":
35
+ params[k] = [x == "True" for x in params[k]]
36
  elif value_type == "str":
37
+ params[k] = [str(x) for x in params[k]]
38
  except Exception:
39
+ params[k] = [str(x) for x in params[k]]
40
+
41
+ # test
42
+ print("params: {}".format(str(params)))
43
 
44
+ return params
static/__init__.py DELETED
File without changes
static/col.py DELETED
@@ -1,68 +0,0 @@
1
- def get_pca_col():
2
- return [
3
- "p1_momentum_value_better",
4
- "elapsed_time",
5
- "server",
6
- "serve_no",
7
- "p1_ace",
8
- "p2_ace",
9
- "p1_winner",
10
- "p2_winner",
11
- "winner_shot_type",
12
- # "p1_double_fault",
13
- "p2_double_fault",
14
- "p1_unf_err",
15
- "p2_unf_err",
16
- "p1_net_pt",
17
- "p2_net_pt",
18
- "p1_net_pt_won",
19
- "p2_net_pt_won",
20
- "p1_break_pt",
21
- "p2_break_pt",
22
- "p1_break_pt_won",
23
- "p2_break_pt_won",
24
- "p1_break_pt_missed",
25
- "p2_break_pt_missed",
26
- "p1_distance_run",
27
- "p2_distance_run",
28
- "rally_count",
29
- "speed_mph",
30
- "serve_width",
31
- "serve_depth",
32
- "return_depth"
33
- ]
34
-
35
-
36
- def get_momentum_col(p):
37
- return [
38
- "point_victor",
39
- "elapsed_time",
40
- "server",
41
- "serve_no",
42
- "{}_ace".format(p),
43
- # "p2_ace",
44
- "{}_winner".format(p),
45
- # "p2_winner",
46
- "winner_shot_type",
47
- # "p1_double_fault",
48
- # "p2_double_fault",
49
- "{}_unf_err".format(p),
50
- # "p2_unf_err",
51
- "{}_net_pt".format(p),
52
- # "p2_net_pt",
53
- "{}_net_pt_won".format(p),
54
- # "p2_net_pt_won",
55
- "{}_break_pt".format(p),
56
- # "p2_break_pt",
57
- "{}_break_pt_won".format(p),
58
- # "p2_break_pt_won",
59
- "{}_break_pt_missed".format(p),
60
- # "p2_break_pt_missed",
61
- "{}_distance_run".format(p),
62
- # "p2_distance_run",
63
- "rally_count",
64
- "speed_mph",
65
- "serve_width",
66
- "serve_depth",
67
- "return_depth"
68
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/config.py DELETED
@@ -1,136 +0,0 @@
1
- class Config:
2
- # 随机种子
3
- RANDOM_STATE = 123
4
-
5
- # 预测图展示的点个数
6
- DISPLAY_RANGE = 100
7
-
8
- # 绘图颜色组
9
- COLOR_ITER_NUM = 3
10
-
11
- COLORS = [
12
- "#ca5353",
13
- "#c874a5",
14
- "#b674c8",
15
- "#8274c8",
16
- "#748dc8",
17
- "#74acc8",
18
- "#74c8b7",
19
- "#74c88d",
20
- "#a6c874",
21
- "#e0e27e",
22
- "#df9b77",
23
- "#404040",
24
- "#999999",
25
- "#d4d4d4"
26
- ] * COLOR_ITER_NUM
27
-
28
- COLORS_0 = [
29
- "#8074C8",
30
- "#7895C1",
31
- "#A8CBDF",
32
- "#992224",
33
- "#B54764",
34
- "#E3625D",
35
- "#EF8B67",
36
- "#F0C284"
37
- ] * COLOR_ITER_NUM
38
-
39
- COLORS_1 = [
40
- "#4A5F7E",
41
- "#719AAC",
42
- "#72B063",
43
- "#94C6CD",
44
- "#B8DBB3",
45
- "#E29135"
46
- ] * COLOR_ITER_NUM
47
-
48
- COLORS_2 = [
49
- "#4485C7",
50
- "#D4562E",
51
- "#DBB428",
52
- "#682487",
53
- "#84BA42",
54
- "#7ABBDB",
55
- "#A51C36"
56
- ] * COLOR_ITER_NUM
57
-
58
- COLORS_3 = [
59
- "#8074C8",
60
- "#7895C1",
61
- "#A8CBDF",
62
- "#F5EBAE",
63
- "#F0C284",
64
- "#EF8B67",
65
- "#E3625D",
66
- "#B54764"
67
- ] * COLOR_ITER_NUM
68
-
69
- COLORS_4 = [
70
- "#979998",
71
- "#C69287",
72
- "#E79A90",
73
- "#EFBC91",
74
- "#E4CD87",
75
- "#FAE5BB",
76
- "#DDDDDF"
77
- ] * COLOR_ITER_NUM
78
-
79
- COLORS_5 = [
80
- "#91CCC0",
81
- "#7FABD1",
82
- "#F7AC53",
83
- "#EC6E66",
84
- "#B5CE4E",
85
- "#BD7795",
86
- "#7C7979"
87
- ] * COLOR_ITER_NUM
88
-
89
- COLORS_6 = [
90
- "#E9687A",
91
- "#F58F7A",
92
- "#FDE2D8",
93
- "#CFCFD0",
94
- "#B6B3D6"
95
- ] * COLOR_ITER_NUM
96
-
97
- JS_0 = """
98
- function createGradioAnimation() {
99
- var container = document.createElement('div');
100
- container.id = 'gradio-animation';
101
- container.style.fontSize = '2em';
102
- container.style.fontWeight = 'bold';
103
- container.style.textAlign = 'center';
104
- container.style.marginBottom = '20px';
105
-
106
- var text = 'Welcome to EasyMachineLearning!';
107
- for (var i = 0; i < text.length; i++) {
108
- (function(i){
109
- setTimeout(function(){
110
- var letter = document.createElement('span');
111
- letter.style.opacity = '0';
112
- letter.style.transition = 'opacity 0.5s';
113
- letter.innerText = text[i];
114
-
115
- container.appendChild(letter);
116
-
117
- setTimeout(function() {
118
- letter.style.opacity = '1';
119
- }, 50);
120
- }, i * 250);
121
- })(i);
122
- }
123
-
124
- var gradioContainer = document.querySelector('.gradio-container');
125
- gradioContainer.insertBefore(container, gradioContainer.firstChild);
126
-
127
- return 'Animation created';
128
- }
129
- """
130
-
131
-
132
-
133
-
134
-
135
-
136
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/new_class.py DELETED
@@ -1,195 +0,0 @@
1
- class Container:
2
- def __init__(self, x_train=None, y_train=None, x_test=None, y_test=None, hyper_params_optimize=None):
3
- self.x_train = x_train
4
- self.y_train = y_train
5
- self.x_test = x_test
6
- self.y_test = y_test
7
- self.hyper_params_optimize = hyper_params_optimize
8
- self.info = {"参数": {}, "指标": {}}
9
- self.y_pred = None
10
- self.train_sizes = None
11
- self.train_scores_mean = None
12
- self.train_scores_std = None
13
- self.test_scores_mean = None
14
- self.test_scores_std = None
15
- self.status = None
16
- self.model = None
17
-
18
- def get_info(self):
19
- return self.info
20
-
21
- def set_info(self, info: dict):
22
- self.info = info
23
-
24
- def set_y_pred(self, y_pred):
25
- self.y_pred = y_pred
26
-
27
- def get_data_fit_values(self):
28
- return [
29
- self.y_pred,
30
- self.y_test
31
- ]
32
-
33
- def get_learning_curve_values(self):
34
- return [
35
- self.train_sizes,
36
- self.train_scores_mean,
37
- self.train_scores_std,
38
- self.test_scores_mean,
39
- self.test_scores_std
40
- ]
41
-
42
- def set_learning_curve_values(self, train_sizes, train_scores_mean, train_scores_std, test_scores_mean, test_scores_std):
43
- self.train_sizes = train_sizes
44
- self.train_scores_mean = train_scores_mean
45
- self.train_scores_std = train_scores_std
46
- self.test_scores_mean = test_scores_mean
47
- self.test_scores_std = test_scores_std
48
-
49
- def get_status(self):
50
- return self.status
51
-
52
- def set_status(self, status: str):
53
- self.status = status
54
-
55
- def get_model(self):
56
- return self.model
57
-
58
- def set_model(self, model):
59
- self.model = model
60
-
61
-
62
- class PaintObject:
63
- def __init__(self):
64
- self.color_cur_num = 0
65
- self.color_cur_list = []
66
- self.label_cur_num = 0
67
- self.label_cur_list = []
68
- self.x_cur_label = ""
69
- self.y_cur_label = ""
70
- self.name = ""
71
-
72
- def get_color_cur_num(self):
73
- return self.color_cur_num
74
-
75
- def set_color_cur_num(self, color_cur_num):
76
- self.color_cur_num = color_cur_num
77
-
78
- def get_color_cur_list(self):
79
- return self.color_cur_list
80
-
81
- def set_color_cur_list(self, color_cur_list):
82
- self.color_cur_list = color_cur_list
83
-
84
- def get_label_cur_num(self):
85
- return self.label_cur_num
86
-
87
- def set_label_cur_num(self, label_cur_num):
88
- self.label_cur_num = label_cur_num
89
-
90
- def get_label_cur_list(self):
91
- return self.label_cur_list
92
-
93
- def set_label_cur_list(self, label_cur_list):
94
- self.label_cur_list = label_cur_list
95
-
96
- def get_x_cur_label(self):
97
- return self.x_cur_label
98
-
99
- def set_x_cur_label(self, x_cur_label):
100
- self.x_cur_label = x_cur_label
101
-
102
- def get_y_cur_label(self):
103
- return self.y_cur_label
104
-
105
- def set_y_cur_label(self, y_cur_label):
106
- self.y_cur_label = y_cur_label
107
-
108
- def get_name(self):
109
- return self.name
110
-
111
- def set_name(self, name):
112
- self.name = name
113
-
114
-
115
- class SelectModel:
116
- def __init__(self):
117
- self.models = None
118
- self.waterfall_number = None
119
- self.force_number = None
120
- self.beeswarm_plot_type = None
121
- self.dependence_col = None
122
- self.data_distribution_col = None
123
- self.data_distribution_is_rotate = None
124
- self.descriptive_indicators_col = None
125
- self.descriptive_indicators_is_rotate = None
126
- self.heatmap_col = None
127
- self.heatmap_is_rotate = None
128
-
129
- def get_heatmap_col(self):
130
- return self.heatmap_col
131
-
132
- def set_heatmap_col(self, heatmap_col):
133
- self.heatmap_col = heatmap_col
134
-
135
- def get_heatmap_is_rotate(self):
136
- return self.heatmap_is_rotate
137
-
138
- def set_heatmap_is_rotate(self, heatmap_is_rotate):
139
- self.heatmap_is_rotate = heatmap_is_rotate
140
-
141
- def get_models(self):
142
- return self.models
143
-
144
- def set_models(self, models):
145
- self.models = models
146
-
147
- def get_waterfall_number(self):
148
- return self.waterfall_number
149
-
150
- def set_waterfall_number(self, waterfall_number):
151
- self.waterfall_number = waterfall_number
152
-
153
- def get_force_number(self):
154
- return self.force_number
155
-
156
- def set_force_number(self, force_number):
157
- self.force_number = force_number
158
-
159
- def get_beeswarm_plot_type(self):
160
- return self.beeswarm_plot_type
161
-
162
- def set_beeswarm_plot_type(self, beeswarm_plot_type):
163
- self.beeswarm_plot_type = beeswarm_plot_type
164
-
165
- def get_dependence_col(self):
166
- return self.dependence_col
167
-
168
- def set_dependence_col(self, dependence_col):
169
- self.dependence_col = dependence_col
170
-
171
- def get_data_distribution_col(self):
172
- return self.data_distribution_col
173
-
174
- def set_data_distribution_col(self, data_distribution_col):
175
- self.data_distribution_col = data_distribution_col
176
-
177
- def get_data_distribution_is_rotate(self):
178
- return self.data_distribution_is_rotate
179
-
180
- def set_data_distribution_is_rotate(self, data_distribution_is_rotate):
181
- self.data_distribution_is_rotate = data_distribution_is_rotate
182
-
183
- def get_descriptive_indicators_is_rotate(self):
184
- return self.descriptive_indicators_is_rotate
185
-
186
- def set_descriptive_indicators_is_rotate(self, descriptive_indicators_is_rotate):
187
- self.descriptive_indicators_is_rotate = descriptive_indicators_is_rotate
188
-
189
- def get_descriptive_indicators_col(self):
190
- return self.descriptive_indicators_col
191
-
192
- def set_descriptive_indicators_col(self, descriptive_indicators_col):
193
- self.descriptive_indicators_col = descriptive_indicators_col
194
-
195
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/paint.py DELETED
@@ -1,51 +0,0 @@
1
- class PaintObject:
2
- def __init__(self):
3
- self.color_cur_num = 0
4
- self.color_cur_list = []
5
- self.label_cur_num = 0
6
- self.label_cur_list = []
7
- self.x_cur_label = ""
8
- self.y_cur_label = ""
9
- self.name = ""
10
-
11
- def get_color_cur_num(self):
12
- return self.color_cur_num
13
-
14
- def set_color_cur_num(self, color_cur_num):
15
- self.color_cur_num = color_cur_num
16
-
17
- def get_color_cur_list(self):
18
- return self.color_cur_list
19
-
20
- def set_color_cur_list(self, color_cur_list):
21
- self.color_cur_list = color_cur_list
22
-
23
- def get_label_cur_num(self):
24
- return self.label_cur_num
25
-
26
- def set_label_cur_num(self, label_cur_num):
27
- self.label_cur_num = label_cur_num
28
-
29
- def get_label_cur_list(self):
30
- return self.label_cur_list
31
-
32
- def set_label_cur_list(self, label_cur_list):
33
- self.label_cur_list = label_cur_list
34
-
35
- def get_x_cur_label(self):
36
- return self.x_cur_label
37
-
38
- def set_x_cur_label(self, x_cur_label):
39
- self.x_cur_label = x_cur_label
40
-
41
- def get_y_cur_label(self):
42
- return self.y_cur_label
43
-
44
- def set_y_cur_label(self, y_cur_label):
45
- self.y_cur_label = y_cur_label
46
-
47
- def get_name(self):
48
- return self.name
49
-
50
- def set_name(self, name):
51
- self.name = name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/process.py DELETED
@@ -1,326 +0,0 @@
1
- import numpy as np
2
- from sklearn.model_selection import train_test_split
3
- from sklearn.model_selection import KFold
4
- from sklearn import preprocessing
5
- from sklearn.model_selection import GridSearchCV
6
- from skopt import BayesSearchCV
7
- import copy
8
- import pandas as pd
9
- from scipy.stats import spearmanr
10
- from io import StringIO
11
- from contextlib import redirect_stdout
12
-
13
- from sklearn.datasets import load_iris, load_wine, load_breast_cancer, load_diabetes
14
- from scipy.linalg import eig
15
-
16
- from static.config import Config
17
-
18
-
19
- def match_split(df: pd.DataFrame):
20
- return df.groupby("match_id")
21
-
22
-
23
- # 斯皮尔曼秩相关系数
24
- def calculate_spearmanr(x, y):
25
- rho, p_value = spearmanr(x, y)
26
-
27
- return rho, p_value
28
-
29
-
30
- def calculate_remain_positive_points(df: pd.DataFrame):
31
- # remain_positive距离无限远设置为len(df)
32
-
33
- df["p1_remain_positive"] = 0
34
- df["p2_remain_positive"] = 0
35
- p1_zero_distance_list = []
36
- p2_zero_distance_list = []
37
-
38
- for i in range(1, len(df)):
39
- if (df.loc[i, "p1_momentum_value_better"] > 0
40
- and i != 0):
41
- p1_zero_distance_list.append(i)
42
- elif (df.loc[i, "p1_momentum_value_better"] < 0
43
- and i != 0):
44
- p2_zero_distance_list.append(i)
45
-
46
- for j in range(len(df)):
47
- for x in p1_zero_distance_list:
48
- if j <= x:
49
- df.loc[j, "p1_remain_positive"] = x - j
50
- break
51
- else:
52
- continue
53
-
54
- for j in range(len(df)):
55
- for x in p2_zero_distance_list:
56
- if j <= x:
57
- df.loc[j, "p2_remain_positive"] = x - j
58
- break
59
- else:
60
- continue
61
-
62
- return df
63
-
64
-
65
- def calculate_swing_point(df:pd.DataFrame):
66
- # swing距离无限远设置为len(df)
67
-
68
- df["swing"] = 0
69
- zero_distance_list = []
70
-
71
- for i in range(1, len(df)):
72
- if (df.loc[i, "p1_momentum_value_better"] > 0 and df.loc[i-1, "p1_momentum_value_better"] < 0
73
- and i != 0) or (df.loc[i, "p1_momentum_value_better"] < 0 and df.loc[i - 1, "p1_momentum_value_better"] > 0
74
- and i != 0):
75
- zero_distance_list.append(i)
76
-
77
- for j in range(len(df)):
78
- for x in zero_distance_list:
79
- if j <= x:
80
- df.loc[j, "swing"] = x - j
81
- break
82
- else:
83
- continue
84
-
85
- return df
86
-
87
-
88
- def replace_na_to_label(df: pd.DataFrame):
89
- return df.fillna("Not A Number")
90
-
91
-
92
- def get_state_distribution(data):
93
- # get the matrix of correlation coefficients
94
- covX = np.around(np.corrcoef(data.T), decimals=3)
95
-
96
- # draw_heat_map(covX, "related", False)
97
-
98
- # Solve the eigenvalues and eigenvectors of the coefficient correlation matrix
99
- eigenvalues, eigenvectors = np.linalg.eig(covX.T)
100
-
101
- eigenvalues = np.around(eigenvalues, decimals=3)
102
-
103
- eigenvalues_dict = dict(zip(eigenvalues.tolist(), list(range(0, len(eigenvalues)))))
104
-
105
- # Sort feature values in descending order
106
- eigenvalues = sorted(eigenvalues, reverse=True)
107
-
108
- for i, value in enumerate(eigenvalues):
109
- if i == 0:
110
- sorted_eigenvectors = eigenvectors[:, eigenvalues_dict[value]].reshape(-1, 1)
111
- else:
112
- sorted_eigenvectors = np.concatenate((sorted_eigenvectors, eigenvectors[:, eigenvalues_dict[value]].reshape(-1, 1)), axis=1)
113
-
114
- # draw_line_graph(range(1, len(eigenvalues) + 1), eigenvalues, "Eigenvalue")
115
-
116
- # get the contribution of the eigenvalues
117
- contribution = eigenvalues / np.sum(eigenvalues)
118
-
119
- return contribution
120
-
121
-
122
- # 指数加权平均
123
- def exponential_moving_average(df):
124
- alpha = 0.3
125
-
126
- ema = [df[0]]
127
-
128
- for i in range(1, len(df)):
129
- ema_value = alpha * df[i] + (1 - alpha) * ema[i-1]
130
- ema.append(ema_value)
131
-
132
- return ema
133
-
134
-
135
- def need_to_mark_in_plot(df, col_name):
136
- return df.where(df[col_name] == 1).dropna()
137
-
138
-
139
- def point_victor_mapping(df):
140
- mapping = {
141
- 1: 0.0,
142
- 2: 1.0
143
- }
144
- df["point_victor"] = df["point_victor"].map(mapping)
145
-
146
- return df
147
-
148
-
149
- def pick_matches_with_name(df, name):
150
- df = df.where(df["match_id"] == name).dropna()
151
-
152
- p1_name = df["player1"].iloc[0]
153
- p2_name = df["player2"].iloc[0]
154
-
155
- return df, p1_name, p2_name
156
-
157
-
158
- def pick_matches_with_longest(df):
159
- target_match_id = df.groupby("match_id").size().idxmax()
160
-
161
- df = df.where(df["match_id"] == target_match_id).dropna()
162
-
163
- p1_name = df["player1"].iloc[0]
164
- p2_name = df["player2"].iloc[0]
165
-
166
- return df, p1_name, p2_name
167
-
168
-
169
- def choose_y_col_in_dataframe(df: pd.DataFrame, y_col: str):
170
- y_data = df[y_col]
171
- df.drop(y_col, axis=1, inplace=True)
172
- df.insert(0, y_col, y_data)
173
-
174
- return df
175
-
176
-
177
- def load_data(sort):
178
- type = ""
179
- if sort == "Iris Dataset":
180
- sk_data = load_iris()
181
- type = "classification"
182
- elif sort == "Wine Dataset":
183
- sk_data = load_wine()
184
- type = "classification"
185
- elif sort == "Breast Cancer Dataset":
186
- sk_data = load_breast_cancer()
187
- type = "classification"
188
- elif sort == "Diabetes Dataset":
189
- sk_data = load_diabetes()
190
- type = "regression"
191
- elif sort == "California Housing Dataset":
192
- df = pd.read_csv("./data/fetch_california_housing.csv")
193
- return df
194
- else:
195
- sk_data = load_iris()
196
- type = "classification"
197
-
198
- if type == "classification":
199
- target_data = sk_data.target.astype(str)
200
- for i in range(len(sk_data.target_names)):
201
- target_data = np.where(target_data == str(i), sk_data.target_names[i], target_data)
202
- else:
203
- target_data = sk_data.target
204
-
205
- feature_names = sk_data.feature_names
206
- sk_feature_names = ["target"] + feature_names.tolist() if isinstance(feature_names, np.ndarray) else ["target"] + feature_names
207
- sk_data = np.concatenate((target_data.reshape(-1, 1), sk_data.data), axis=1)
208
-
209
- df = pd.DataFrame(data=sk_data, columns=sk_feature_names)
210
-
211
- return df
212
-
213
-
214
- def load_custom_data(file):
215
- if "xlsx" in file or "xls" in file:
216
- return pd.read_excel(file)
217
- elif "csv" in file:
218
- return pd.read_csv(file)
219
-
220
-
221
- def preprocess_raw_data_filtering(df):
222
- info = {}
223
-
224
- len_0 = len(df)
225
- info["Total size of raw data"] = len_0
226
-
227
- # Delete the column "CUSTOMER_ID"
228
- # df.drop("CUSTOMER_ID", axis=1, inplace=True)
229
-
230
- # Remove duplicate data
231
- df.drop_duplicates()
232
- len_1 = len_0 - len(df)
233
- info["Number of duplicates in the raw data"] = len_1
234
-
235
- # Remove "nan" data
236
- # df = remove_nan_from_data(df)
237
- # len_2 = len_0 - len_1 - len(df)
238
- # info["Number of nan in the raw data"] = len_2
239
-
240
- info["Total size of filtered data after data preprocessing"] = len(df)
241
-
242
- # Save the cleaned data to a csv format file
243
- # df.to_csv("../data/filtered_data.csv", index=False)
244
-
245
- return df, info
246
-
247
-
248
- def remove_nan_from_data(df):
249
- # Remove "nan" data
250
- df.dropna(inplace=True)
251
-
252
- return df
253
-
254
-
255
- # Get standardized data
256
- def get_standardized_data(df):
257
- array = np.concatenate(((df.iloc[:, :1]).values, preprocessing.scale(df.iloc[:, 1:])), axis=1)
258
-
259
- return array
260
-
261
-
262
- def split_dataset(array):
263
- x_train_and_validate, x_test, y_train_and_validate, y_test = train_test_split(
264
- array[:, 1:],
265
- array[:, :1],
266
- random_state=Config.RANDOM_STATE,
267
- train_size=0.8
268
- )
269
-
270
- return x_train_and_validate, x_test, y_train_and_validate, y_test
271
-
272
-
273
- def k_fold_cross_validation_data_segmentation(x_train, y_train):
274
- k = 5
275
-
276
- train_data_array = np.concatenate((y_train, x_train), axis=1)
277
-
278
- k_fold = KFold(n_splits=k, shuffle=True, random_state=Config.RANDOM_STATE)
279
-
280
- train_data_list = []
281
- validate_data_list = []
282
- for train_index, validate_index in k_fold.split(train_data_array):
283
- train_data_list.append(train_data_array[train_index])
284
- validate_data_list.append(train_data_array[validate_index])
285
-
286
- train_and_validate_data_list = []
287
-
288
- for i in range(k):
289
- train_and_validate_data_list.append((
290
- train_data_list[i][:, 1:],
291
- validate_data_list[i][:, 1:],
292
- train_data_list[i][:, 0],
293
- validate_data_list[i][:, 0]
294
- ))
295
-
296
- return train_and_validate_data_list
297
-
298
-
299
- def grid_search(params, model, x_train, y_train, scoring=None):
300
- info = {}
301
-
302
- grid_search_model = GridSearchCV(model, params, cv=3, n_jobs=-1)
303
-
304
- grid_search_model.fit(x_train, y_train.ravel())
305
-
306
- info["Optimal hyperparameters"] = grid_search_model.best_params_
307
-
308
- best_model = grid_search_model.best_estimator_
309
-
310
- return best_model
311
-
312
-
313
- def bayes_search(params, model, x_train, y_train, scoring=None):
314
- info = {}
315
-
316
- bayes_search_model = BayesSearchCV(model, params, cv=3, n_iter=50, n_jobs=-1)
317
-
318
- bayes_search_model.fit(x_train, y_train)
319
-
320
- info["Optimal hyperparameters"] = bayes_search_model.best_params_
321
-
322
- best_model = bayes_search_model.best_estimator_
323
-
324
- return best_model
325
-
326
-