Spaces:

IVSD
/

EasyMachineLearningDemo

Sleeping

App Files Files Community

LLH commited on Feb 16, 2024

Commit

11b81b9

1 Parent(s): 0136ac6

2024/02/16/14:00

Browse files

Files changed (14) hide show

.idea/.gitignore +8 -0
.idea/EasyMachineLearningDemo.iml +12 -0
.idea/inspectionProfiles/Project_Default.xml +19 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
README.md +1 -1
analysis/shap_model.py +3 -3
app.py +346 -61
{diagram → buffer}/__init__.py +0 -0
static/config.py +3 -0
static/paint.py +51 -0
static/process.py +4 -1
visualization/draw_learning_curve_total.py +18 -23

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/EasyMachineLearningDemo.iml ADDED Viewed

	@@ -0,0 +1,12 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="PyDocumentationSettings">
+    <option name="format" value="PLAIN" />
+    <option name="myDocStringFormat" value="Plain" />
+  </component>
+</module>

.idea/inspectionProfiles/Project_Default.xml ADDED Viewed

	@@ -0,0 +1,19 @@

+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E501" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredIdentifiers">
+        <list>
+          <option value="object.pop" />
+        </list>
+      </option>
+    </inspection_tool>
+  </profile>
+</component>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/EasyMachineLearningDemo.iml" filepath="$PROJECT_DIR$/.idea/EasyMachineLearningDemo.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: EasyMachineLearning v0.0
 emoji: 🔥
 colorFrom: red
 colorTo: red

 ---
+title: EasyMachineLearning test
 emoji: 🔥
 colorFrom: red
 colorTo: red

analysis/shap_model.py CHANGED Viewed

@@ -3,15 +3,15 @@ import matplotlib.pyplot as plt
 import shap
-def shap_calculate(model, x, feature_names):
     explainer = shap.Explainer(model.predict, x)
     shap_values = explainer(x)
     shap.summary_plot(shap_values, x, feature_names=feature_names, show=False)
-    return plt
-    # title = "shap"

 import shap
+def shap_calculate(model, x, feature_names, paint_object):
     explainer = shap.Explainer(model.predict, x)
     shap_values = explainer(x)
     shap.summary_plot(shap_values, x, feature_names=feature_names, show=False)
+    plt.title(paint_object.get_name())
+    return plt, paint_object

app.py CHANGED Viewed

@@ -11,8 +11,10 @@ from analysis.shap_model import shap_calculate
 from static.process import *
 from analysis.linear_model import *
 from visualization.draw_learning_curve_total import draw_learning_curve_total
 import warnings
 warnings.filterwarnings("ignore")
@@ -68,18 +70,34 @@ class Container:
         self.model = model
 class FilePath:
-    base = "./diagram/{}.png"
     shap_beeswarm_plot = "shap_beeswarm_plot"
 class MN:  # ModelName
     classification = "classification"
     regression = "regression"
     linear_regression = "linear_regression"
     polynomial_regression = "polynomial_regression"
     logistic_regression = "logistic_regression"
 class LN:  # LabelName
     choose_dataset_radio = "选择所需数据源 [必选]"
@@ -104,19 +122,54 @@ class LN:  # LabelName
     linear_regression_model_radio = "选择线性回归的模型"
     model_optimize_radio = "选择超参数优化方法"
     model_train_button = "训练"
     learning_curve_checkboxgroup = "选择所需绘制学习曲线的模型"
     learning_curve_train_button = "绘制训练集学习曲线"
     learning_curve_validation_button = "绘制验证集学习曲线"
-    learning_curve_train_plot = "绘制训练集学习曲线"
-    learning_curve_validation_plot = "绘制验证集学习曲线"
     shap_beeswarm_radio = "选择所需绘制蜂群特征图的模型"
     shap_beeswarm_button = "绘制蜂群特征图"
     shap_beeswarm_plot = "蜂群特征图"
-    select_as_model_radio = "选择所需训练的模型"
-def get_outputs():
     gr_dict = {
         choose_custom_dataset_file,
         display_dataset_dataframe,
         display_total_col_num_text,
@@ -141,26 +194,35 @@ def get_outputs():
         model_optimize_radio,
         model_train_button,
         model_train_checkbox,
         learning_curve_checkboxgroup,
         learning_curve_train_button,
         learning_curve_validation_button,
-        learning_curve_train_plot,
-        learning_curve_validation_plot,
         shap_beeswarm_radio,
         shap_beeswarm_button,
-        shap_beeswarm_plot,
-        shap_beeswarm_plot_file,
-        select_as_model_radio,
-        choose_assign_radio,
     }
-    return gr_dict
 def get_return(is_visible, extra_gr_dict: dict = None):
     if is_visible:
         gr_dict = {
             display_dataset_dataframe: gr.Dataframe(add_index_into_df(Dataset.data), type="pandas", visible=True),
             display_total_col_num_text: gr.Textbox(str(Dataset.get_total_col_num()), visible=True, label=LN.display_total_col_num_text),
             display_total_row_num_text: gr.Textbox(str(Dataset.get_total_row_num()), visible=True, label=LN.display_total_row_num_text),
             display_na_list_text: gr.Textbox(Dataset.get_na_list_str(), visible=True, label=LN.display_na_list_text),
@@ -188,14 +250,25 @@ def get_return(is_visible, extra_gr_dict: dict = None):
             model_train_button: gr.Button(LN.model_train_button, visible=Dataset.check_before_train()),
             model_train_checkbox: gr.Checkbox(Dataset.get_model_container_status(), visible=Dataset.check_select_model(), label=Dataset.get_model_label()),
             learning_curve_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.learning_curve_checkboxgroup),
             learning_curve_train_button: gr.Button(LN.learning_curve_train_button, visible=Dataset.check_before_train()),
             learning_curve_validation_button: gr.Button(LN.learning_curve_validation_button, visible=Dataset.check_before_train()),
             shap_beeswarm_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_radio),
             shap_beeswarm_button: gr.Button(LN.shap_beeswarm_button, visible=Dataset.check_before_train()),
-            shap_beeswarm_plot_file: gr.File(Dataset.after_get_shap_beeswarm_plot_file(), visible=Dataset.check_shap_beeswarm_plot_file()),
         }
         if extra_gr_dict:
             gr_dict.update(extra_gr_dict)
@@ -204,6 +277,7 @@ def get_return(is_visible, extra_gr_dict: dict = None):
     gr_dict = {
         choose_custom_dataset_file: gr.File(None, visible=True),
         display_dataset_dataframe: gr.Dataframe(visible=False),
         display_total_col_num_text: gr.Textbox(visible=False),
         display_total_row_num_text: gr.Textbox(visible=False),
         display_na_list_text: gr.Textbox(visible=False),
@@ -225,19 +299,27 @@ def get_return(is_visible, extra_gr_dict: dict = None):
         model_optimize_radio: gr.Radio(visible=False),
         model_train_button: gr.Button(visible=False),
         model_train_checkbox: gr.Checkbox(visible=False),
         learning_curve_checkboxgroup: gr.Checkboxgroup(visible=False),
         learning_curve_train_button: gr.Button(visible=False),
         learning_curve_validation_button: gr.Button(visible=False),
-        learning_curve_train_plot: gr.Plot(visible=False),
-        learning_curve_validation_plot: gr.Plot(visible=False),
         shap_beeswarm_radio: gr.Radio(visible=False),
         shap_beeswarm_button: gr.Button(visible=False),
-        shap_beeswarm_plot: gr.Plot(visible=False),
-        shap_beeswarm_plot_file: gr.File(visible=False),
-        select_as_model_radio: gr.Radio(visible=False),
-        choose_assign_radio: gr.Radio(visible=False),
     }
     return gr_dict
@@ -260,6 +342,8 @@ class Dataset:
         MN.logistic_regression: Container(),
     }
     @classmethod
     def get_dataset_list(cls):
         return ["Iris Dataset", "Wine Dataset", "Breast Cancer Dataset", "自定义"]
@@ -309,6 +393,23 @@ class Dataset:
         cls.file = ""
         cls.data = pd.DataFrame()
     @classmethod
     def del_col(cls, col_list: list):
         for col in col_list:
@@ -431,7 +532,8 @@ class Dataset:
         for col in cls.data.columns.values:
             if cls.data[col].dtype.name in ["int64", "float64"]:
-                if not np.array_equal(np.round(preprocessing.scale(cls.data[col]), decimals=2), np.round(cls.data[col].values.round(2), decimals=2)):
                     not_standardized_data_list.append(col)
         return not_standardized_data_list
@@ -443,7 +545,8 @@ class Dataset:
         for i, col in enumerate(cls.data.columns.values):
             if i == 0:
-                if not (all(isinstance(x, str) for x in cls.data.iloc[:, 0]) or all(isinstance(x, float) for x in cls.data.iloc[:, 0])):
                     return False
             else:
                 if cls.data[col].dtype.name != "float64":
@@ -541,43 +644,98 @@ class Dataset:
         return trained_model_list
     @classmethod
-    def draw_learning_curve_train_plot(cls, model_list: list) -> plt.Figure:
         learning_curve_dict = {}
         for model_name in model_list:
             model_name = cls.get_model_name_mapping_reverse()[model_name]
             learning_curve_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
-        return draw_learning_curve_total(learning_curve_dict, "train")
     @classmethod
-    def draw_learning_curve_validation_plot(cls, model_list: list) -> plt.Figure:
         learning_curve_dict = {}
         for model_name in model_list:
             model_name = cls.get_model_name_mapping_reverse()[model_name]
             learning_curve_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
-        return draw_learning_curve_total(learning_curve_dict, "validation")
     @classmethod
-    def draw_shap_beeswarm_plot(cls, model_name) -> plt.Figure:
         model_name = cls.get_model_name_mapping_reverse()[model_name]
         container = cls.container_dict[model_name]
-        return shap_calculate(container.get_model(), container.x_train, cls.data.columns.values)
     @classmethod
-    def get_shap_beeswarm_plot_file(cls):
-        return FilePath.base.format(FilePath.shap_beeswarm_plot)
     @classmethod
-    def check_shap_beeswarm_plot_file(cls):
-        return os.path.exists(cls.get_shap_beeswarm_plot_file())
     @classmethod
-    def after_get_shap_beeswarm_plot_file(cls):
-        return cls.get_shap_beeswarm_plot_file() if cls.check_shap_beeswarm_plot_file() else None
     @classmethod
     def get_model_list(cls):
@@ -614,13 +772,37 @@ class Dataset:
         data_copy = cls.data
         if cls.assign == MN.classification:
-            data_copy.iloc[0, :] = data_copy.iloc[0, :].astype(str)
         else:
-            data_copy.iloc[0, :] = data_copy.iloc[0, :].astype(float)
         cls.data = data_copy
         cls.change_data_type_to_float()
 def choose_assign(assign: str):
     Dataset.choose_assign(assign)
@@ -634,24 +816,85 @@ def select_as_model(model_name: str):
     return get_return(True)
-def draw_shap_beeswarm_plot(model_name):
-    cur_plt = Dataset.draw_shap_beeswarm_plot(model_name)
-    cur_plt.savefig(FilePath.base.format(FilePath.shap_beeswarm_plot), dpi=300)
-    return get_return(True, {shap_beeswarm_plot: gr.Plot(cur_plt, visible=True, label=LN.shap_beeswarm_plot)})
-def draw_learning_curve_validation_plot(model_list: list):
-    cur_plt = Dataset.draw_learning_curve_validation_plot(model_list)
-    return get_return(True, {learning_curve_validation_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_validation_plot)})
-def draw_learning_curve_train_plot(model_list: list):
-    cur_plt = Dataset.draw_learning_curve_train_plot(model_list)
-    return get_return(True, {learning_curve_train_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_train_plot)})
 def train_model(optimize, linear_regression_model_type):
@@ -681,7 +924,9 @@ def change_data_type_to_float():
 def encode_label(col_list: list):
     Dataset.encode_label(col_list)
-    return get_return(True, {display_encode_label_dataframe: gr.Dataframe(Dataset.get_str2int_mappings_df(), type="pandas", visible=True, label=LN.display_encode_label_dataframe)})
 def del_duplicate():
@@ -737,7 +982,6 @@ def choose_custom_dataset(file: str):
 with gr.Blocks() as demo:
     '''
         组件
     '''
@@ -752,6 +996,7 @@ with gr.Blocks() as demo:
         # 显示数据表信息
         with gr.Accordion("当前数据信息"):
             display_dataset_dataframe = gr.Dataframe(visible=False)
             with gr.Row():
                 display_total_col_num_text = gr.Textbox(visible=False)
                 display_total_row_num_text = gr.Textbox(visible=False)
@@ -794,17 +1039,43 @@ with gr.Blocks() as demo:
         # 可视化
         with gr.Accordion("数据可视化"):
-            learning_curve_checkboxgroup = gr.Checkboxgroup(visible=False)
-            with gr.Row():
-                learning_curve_train_button = gr.Button(visible=False)
-                learning_curve_validation_button = gr.Button(visible=False)
-            learning_curve_train_plot = gr.Plot(visible=False)
-            learning_curve_validation_plot = gr.Plot(visible=False)
-            shap_beeswarm_radio = gr.Radio(visible=False)
-            shap_beeswarm_button = gr.Button(visible=False)
-            with gr.Group():
-                shap_beeswarm_plot = gr.Plot(visible=False)
-                shap_beeswarm_plot_file = gr.File(visible=False)
     '''
         监听事件
@@ -840,9 +1111,23 @@ with gr.Blocks() as demo:
     model_train_button.click(fn=train_model, inputs=[model_optimize_radio, linear_regression_model_radio], outputs=get_outputs())
     # 可视化
-    learning_curve_train_button.click(fn=draw_learning_curve_train_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
-    learning_curve_validation_button.click(fn=draw_learning_curve_validation_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
-    shap_beeswarm_button.click(fn=draw_shap_beeswarm_plot, inputs=[shap_beeswarm_radio], outputs=get_outputs())
 if __name__ == "__main__":
     demo.launch()

 from static.process import *
 from analysis.linear_model import *
 from visualization.draw_learning_curve_total import draw_learning_curve_total
+from static.paint import *
 import warnings
 warnings.filterwarnings("ignore")
         self.model = model
+class StaticValue:
+    max_num = 10
 class FilePath:
+    png_base = "./buffer/{}.png"
+    excel_base = "./buffer/{}.xlsx"
+    # [绘图]
+    display_dataset = "current_excel_data"
+    learning_curve_train_plot = "learning_curve_train_plot"
+    learning_curve_validation_plot = "learning_curve_validation_plot"
     shap_beeswarm_plot = "shap_beeswarm_plot"
 class MN:  # ModelName
     classification = "classification"
     regression = "regression"
     linear_regression = "linear_regression"
     polynomial_regression = "polynomial_regression"
     logistic_regression = "logistic_regression"
+    # [绘图]
+    learning_curve_train = "learning_curve_train"
+    learning_curve_validation = "learning_curve_validation"
+    shap_beeswarm = "shap_beeswarm"
 class LN:  # LabelName
     choose_dataset_radio = "选择所需数据源 [必选]"
     linear_regression_model_radio = "选择线性回归的模型"
     model_optimize_radio = "选择超参数优化方法"
     model_train_button = "训练"
+    select_as_model_radio = "选择所需训练的模型"
+    title_name_textbox = "标题"
+    x_label_textbox = "x 轴名称"
+    y_label_textbox = "y 轴名称"
+    colors = ["颜色 {}".format(i) for i in range(StaticValue.max_num)]
+    labels = ["图例 {}".format(i) for i in range(StaticValue.max_num)]
+    # [绘图]
     learning_curve_checkboxgroup = "选择所需绘制学习曲线的模型"
     learning_curve_train_button = "绘制训练集学习曲线"
     learning_curve_validation_button = "绘制验证集学习曲线"
     shap_beeswarm_radio = "选择所需绘制蜂群特征图的模型"
     shap_beeswarm_button = "绘制蜂群特征图"
+    learning_curve_train_plot = "训练集学习曲线"
+    learning_curve_validation_plot = "验证集学习曲线"
     shap_beeswarm_plot = "蜂群特征图"
+def get_return_extra(is_visible, extra_gr_dict: dict = None):
+    if is_visible:
+        gr_dict = {
+            draw_file: gr.File(Dataset.after_get_file(), visible=Dataset.check_file()),
+        }
+        if extra_gr_dict:
+            gr_dict.update(extra_gr_dict)
+        return gr_dict
     gr_dict = {
+        draw_plot: gr.Plot(visible=False),
+        draw_file: gr.File(visible=False),
+    }
+    gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
+    gr_dict.update(dict(zip(color_textboxs, [gr.Textbox(visible=False)] * StaticValue.max_num)))
+    gr_dict.update(dict(zip(legend_labels_textboxs, [gr.Textbox(visible=False)] * StaticValue.max_num)))
+    gr_dict.update({title_name_textbox: gr.Textbox(visible=False)})
+    gr_dict.update({x_label_textbox: gr.Textbox(visible=False)})
+    gr_dict.update({y_label_textbox: gr.Textbox(visible=False)})
+    return gr_dict
+def get_outputs():
+    gr_set = {
         choose_custom_dataset_file,
         display_dataset_dataframe,
         display_total_col_num_text,
         model_optimize_radio,
         model_train_button,
         model_train_checkbox,
+        select_as_model_radio,
+        choose_assign_radio,
+        display_dataset,
+        draw_plot,
+        draw_file,
+        title_name_textbox,
+        x_label_textbox,
+        y_label_textbox,
+        # [绘图]
         learning_curve_checkboxgroup,
         learning_curve_train_button,
         learning_curve_validation_button,
         shap_beeswarm_radio,
         shap_beeswarm_button,
     }
+    gr_set.update(set(colorpickers))
+    gr_set.update(set(color_textboxs))
+    gr_set.update(set(legend_labels_textboxs))
+    return gr_set
 def get_return(is_visible, extra_gr_dict: dict = None):
     if is_visible:
         gr_dict = {
             display_dataset_dataframe: gr.Dataframe(add_index_into_df(Dataset.data), type="pandas", visible=True),
+            display_dataset: gr.File(Dataset.after_get_display_dataset_file(), visible=Dataset.check_display_dataset_file()),
             display_total_col_num_text: gr.Textbox(str(Dataset.get_total_col_num()), visible=True, label=LN.display_total_col_num_text),
             display_total_row_num_text: gr.Textbox(str(Dataset.get_total_row_num()), visible=True, label=LN.display_total_row_num_text),
             display_na_list_text: gr.Textbox(Dataset.get_na_list_str(), visible=True, label=LN.display_na_list_text),
             model_train_button: gr.Button(LN.model_train_button, visible=Dataset.check_before_train()),
             model_train_checkbox: gr.Checkbox(Dataset.get_model_container_status(), visible=Dataset.check_select_model(), label=Dataset.get_model_label()),
+            draw_plot: gr.Plot(visible=False),
+            draw_file: gr.File(visible=False),
+            title_name_textbox: gr.Textbox(visible=False),
+            x_label_textbox: gr.Textbox(visible=False),
+            y_label_textbox: gr.Textbox(visible=False),
+            # [绘图]
             learning_curve_checkboxgroup: gr.Checkboxgroup(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.learning_curve_checkboxgroup),
             learning_curve_train_button: gr.Button(LN.learning_curve_train_button, visible=Dataset.check_before_train()),
             learning_curve_validation_button: gr.Button(LN.learning_curve_validation_button, visible=Dataset.check_before_train()),
             shap_beeswarm_radio: gr.Radio(Dataset.get_trained_model_list(), visible=Dataset.check_before_train(), label=LN.shap_beeswarm_radio),
             shap_beeswarm_button: gr.Button(LN.shap_beeswarm_button, visible=Dataset.check_before_train()),
         }
+        gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
+        gr_dict.update(dict(zip(color_textboxs, [gr.Textbox(visible=False)] * StaticValue.max_num)))
+        gr_dict.update(dict(zip(legend_labels_textboxs, [gr.Textbox(visible=False)] * StaticValue.max_num)))
         if extra_gr_dict:
             gr_dict.update(extra_gr_dict)
     gr_dict = {
         choose_custom_dataset_file: gr.File(None, visible=True),
         display_dataset_dataframe: gr.Dataframe(visible=False),
+        display_dataset: gr.File(visible=False),
         display_total_col_num_text: gr.Textbox(visible=False),
         display_total_row_num_text: gr.Textbox(visible=False),
         display_na_list_text: gr.Textbox(visible=False),
         model_optimize_radio: gr.Radio(visible=False),
         model_train_button: gr.Button(visible=False),
         model_train_checkbox: gr.Checkbox(visible=False),
+        select_as_model_radio: gr.Radio(visible=False),
+        choose_assign_radio: gr.Radio(visible=False),
+        draw_plot: gr.Plot(visible=False),
+        draw_file: gr.File(visible=False),
+        title_name_textbox: gr.Textbox(visible=False),
+        x_label_textbox: gr.Textbox(visible=False),
+        y_label_textbox: gr.Textbox(visible=False),
+        # [绘图]
         learning_curve_checkboxgroup: gr.Checkboxgroup(visible=False),
         learning_curve_train_button: gr.Button(visible=False),
         learning_curve_validation_button: gr.Button(visible=False),
         shap_beeswarm_radio: gr.Radio(visible=False),
         shap_beeswarm_button: gr.Button(visible=False),
     }
+    gr_dict.update(dict(zip(colorpickers, [gr.ColorPicker(visible=False)] * StaticValue.max_num)))
+    gr_dict.update(dict(zip(color_textboxs, [gr.Textbox(visible=False)] * StaticValue.max_num)))
+    gr_dict.update(dict(zip(legend_labels_textboxs, [gr.Textbox(visible=False)] * StaticValue.max_num)))
     return gr_dict
         MN.logistic_regression: Container(),
     }
+    visualize = ""
     @classmethod
     def get_dataset_list(cls):
         return ["Iris Dataset", "Wine Dataset", "Breast Cancer Dataset", "自定义"]
         cls.file = ""
         cls.data = pd.DataFrame()
+    @classmethod
+    def get_display_dataset_file(cls):
+        file_path = FilePath.excel_base.format(FilePath.display_dataset)
+        return file_path
+    @classmethod
+    def check_display_dataset_file(cls):
+        return os.path.exists(cls.get_display_dataset_file())
+    @classmethod
+    def after_get_display_dataset_file(cls):
+        if not cls.data.empty:
+            cls.data.to_excel(cls.get_display_dataset_file(), index=False)
+        return cls.get_display_dataset_file() if cls.check_display_dataset_file() else None
     @classmethod
     def del_col(cls, col_list: list):
         for col in col_list:
         for col in cls.data.columns.values:
             if cls.data[col].dtype.name in ["int64", "float64"]:
+                if not np.array_equal(np.round(preprocessing.scale(cls.data[col]), decimals=2),
+                                      np.round(cls.data[col].values.round(2), decimals=2)):
                     not_standardized_data_list.append(col)
         return not_standardized_data_list
         for i, col in enumerate(cls.data.columns.values):
             if i == 0:
+                if not (all(isinstance(x, str) for x in cls.data.iloc[:, 0]) or all(
+                        isinstance(x, float) for x in cls.data.iloc[:, 0])):
                     return False
             else:
                 if cls.data[col].dtype.name != "float64":
         return trained_model_list
     @classmethod
+    def draw_plot(cls, select_model, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
+        # [绘图]
+        if cls.visualize == MN.learning_curve_train:
+            return cls.draw_learning_curve_train_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
+        elif cls.visualize == MN.learning_curve_validation:
+            return cls.draw_learning_curve_validation_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
+        elif cls.visualize == MN.shap_beeswarm:
+            return cls.draw_shap_beeswarm_plot(select_model, color_list, label_list, name, x_label, y_label, is_default)
+    @classmethod
+    def draw_learning_curve_train_plot(cls, model_list, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
         learning_curve_dict = {}
         for model_name in model_list:
             model_name = cls.get_model_name_mapping_reverse()[model_name]
             learning_curve_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
+        color_cur_list = Config.COLORS if is_default else color_list
+        label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
+        x_cur_label = "Train Sizes" if is_default else x_label
+        y_cur_label = "Accuracy" if is_default else y_label
+        cur_name = "" if is_default else name
+        paint_object = PaintObject()
+        paint_object.set_color_cur_list(color_cur_list)
+        paint_object.set_label_cur_list(label_cur_list)
+        paint_object.set_x_cur_label(x_cur_label)
+        paint_object.set_y_cur_label(y_cur_label)
+        paint_object.set_name(cur_name)
+        return draw_learning_curve_total(learning_curve_dict, "train", paint_object)
     @classmethod
+    def draw_learning_curve_validation_plot(cls, model_list, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
         learning_curve_dict = {}
         for model_name in model_list:
             model_name = cls.get_model_name_mapping_reverse()[model_name]
             learning_curve_dict[model_name] = cls.container_dict[model_name].get_learning_curve_values()
+        color_cur_list = Config.COLORS if is_default else color_list
+        label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
+        x_cur_label = "Train Sizes" if is_default else x_label
+        y_cur_label = "Accuracy" if is_default else y_label
+        cur_name = "" if is_default else name
+        paint_object = PaintObject()
+        paint_object.set_color_cur_list(color_cur_list)
+        paint_object.set_label_cur_list(label_cur_list)
+        paint_object.set_x_cur_label(x_cur_label)
+        paint_object.set_y_cur_label(y_cur_label)
+        paint_object.set_name(cur_name)
+        return draw_learning_curve_total(learning_curve_dict, "validation", paint_object)
     @classmethod
+    def draw_shap_beeswarm_plot(cls, model_name, color_list: list, label_list: list, name: str, x_label: str, y_label: str, is_default: bool):
         model_name = cls.get_model_name_mapping_reverse()[model_name]
         container = cls.container_dict[model_name]
+        # color_cur_list = Config.COLORS if is_default else color_list
+        # label_cur_list = [x for x in learning_curve_dict.keys()] if is_default else label_list
+        # x_cur_label = "Train Sizes" if is_default else x_label
+        # y_cur_label = "Accuracy" if is_default else y_label
+        cur_name = "" if is_default else name
+        paint_object = PaintObject()
+        # paint_object.set_color_cur_list(color_cur_list)
+        # paint_object.set_label_cur_list(label_cur_list)
+        # paint_object.set_x_cur_label(x_cur_label)
+        # paint_object.set_y_cur_label(y_cur_label)
+        paint_object.set_name(cur_name)
+        return shap_calculate(container.get_model(), container.x_train, cls.data.columns.values, paint_object)
     @classmethod
+    def get_file(cls):
+        # [绘图]
+        if cls.visualize == MN.learning_curve_train:
+            return FilePath.png_base.format(FilePath.learning_curve_train_plot)
+        elif cls.visualize == MN.learning_curve_validation:
+            return FilePath.png_base.format(FilePath.learning_curve_validation_plot)
+        elif cls.visualize == MN.shap_beeswarm:
+            return FilePath.png_base.format(FilePath.shap_beeswarm_plot)
     @classmethod
+    def check_file(cls):
+        return os.path.exists(cls.get_file())
     @classmethod
+    def after_get_file(cls):
+        return cls.get_file() if cls.check_file() else None
     @classmethod
     def get_model_list(cls):
         data_copy = cls.data
         if cls.assign == MN.classification:
+            data_copy.iloc[:, 0] = data_copy.iloc[:, 0].astype(str)
         else:
+            data_copy.iloc[:, 0] = data_copy.iloc[:, 0].astype(float)
         cls.data = data_copy
         cls.change_data_type_to_float()
+    @classmethod
+    def colorpickers_change(cls, paint_object):
+        cur_num = paint_object.get_color_cur_num()
+        true_list = [gr.ColorPicker(paint_object.get_color_cur_list()[i], visible=True, label=LN.colors[i]) for i in range(cur_num)]
+        return true_list + [gr.ColorPicker(visible=False)] * (StaticValue.max_num - cur_num)
+    @classmethod
+    def color_textboxs_change(cls, paint_object):
+        cur_num = paint_object.get_color_cur_num()
+        true_list = [gr.Textbox(paint_object.get_color_cur_list()[i], visible=True, show_label=False) for i in range(cur_num)]
+        return true_list + [gr.Textbox(visible=False)] * (StaticValue.max_num - cur_num)
+    @classmethod
+    def labels_change(cls, paint_object):
+        cur_num = paint_object.get_label_cur_num()
+        true_list = [gr.Textbox(paint_object.get_label_cur_list()[i], visible=True, label=LN.labels[i]) for i in range(cur_num)]
+        return true_list + [gr.Textbox(visible=False)] * (StaticValue.max_num - cur_num)
 def choose_assign(assign: str):
     Dataset.choose_assign(assign)
     return get_return(True)
+# [绘图]
+def shap_beeswarm_first_draw_plot(*inputs):
+    Dataset.visualize = MN.shap_beeswarm
+    return first_draw_plot(inputs)
+def learning_curve_validation_first_draw_plot(*inputs):
+    Dataset.visualize = MN.learning_curve_validation
+    return first_draw_plot(inputs)
+def learning_curve_train_first_draw_plot(*inputs):
+    Dataset.visualize = MN.learning_curve_train
+    return first_draw_plot(inputs)
+def first_draw_plot(inputs):
+    select_model = inputs[0]
+    x_label = ""
+    y_label = ""
+    name = ""
+    color_list = []
+    label_list = []
+    cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, True)
+    return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
+def out_non_first_draw_plot(*inputs):
+    return non_first_draw_plot(inputs)
+def non_first_draw_plot(inputs):
+    name = inputs[0]
+    x_label = inputs[1]
+    y_label = inputs[2]
+    color_list = list(inputs[3: StaticValue.max_num+3])
+    label_list = list(inputs[StaticValue.max_num+3: 2*StaticValue.max_num+3])
+    start_index = 2*StaticValue.max_num+3
+    # 绘图
+    if Dataset.visualize == MN.learning_curve_train:
+        select_model = inputs[start_index]
+    elif Dataset.visualize == MN.learning_curve_validation:
+        select_model = inputs[start_index]
+    elif Dataset.visualize == MN.shap_beeswarm:
+        select_model = inputs[start_index+1]
+    else:
+        select_model = inputs[start_index: start_index+1]
+    cur_plt, paint_object = Dataset.draw_plot(select_model, color_list, label_list, name, x_label, y_label, False)
+    return first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object)
+def first_draw_plot_with_non_first_draw_plot(cur_plt, paint_object):
+    extra_gr_dict = {}
+    # [绘图]
+    if Dataset.visualize == MN.learning_curve_train:
+        cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_train_plot), dpi=300)
+        extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_train_plot)})
+    elif Dataset.visualize == MN.learning_curve_validation:
+        cur_plt.savefig(FilePath.png_base.format(FilePath.learning_curve_validation_plot), dpi=300)
+        extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.learning_curve_validation_plot)})
+    elif Dataset.visualize == MN.shap_beeswarm:
+        cur_plt.savefig(FilePath.png_base.format(FilePath.shap_beeswarm_plot), dpi=300)
+        extra_gr_dict.update({draw_plot: gr.Plot(cur_plt, visible=True, label=LN.shap_beeswarm_plot)})
+    extra_gr_dict.update(dict(zip(colorpickers, Dataset.colorpickers_change(paint_object))))
+    extra_gr_dict.update(dict(zip(color_textboxs, Dataset.color_textboxs_change(paint_object))))
+    extra_gr_dict.update(dict(zip(legend_labels_textboxs, Dataset.labels_change(paint_object))))
+    extra_gr_dict.update({title_name_textbox: gr.Textbox(paint_object.get_name(), visible=True, label=LN.title_name_textbox)})
+    extra_gr_dict.update({x_label_textbox: gr.Textbox(paint_object.get_x_cur_label(), visible=True, label=LN.x_label_textbox)})
+    extra_gr_dict.update({y_label_textbox: gr.Textbox(paint_object.get_y_cur_label(), visible=True, label=LN.y_label_textbox)})
+    return get_return_extra(True, extra_gr_dict)
 def train_model(optimize, linear_regression_model_type):
 def encode_label(col_list: list):
     Dataset.encode_label(col_list)
+    return get_return(True, {
+        display_encode_label_dataframe: gr.Dataframe(Dataset.get_str2int_mappings_df(), type="pandas", visible=True,
+                                                     label=LN.display_encode_label_dataframe)})
 def del_duplicate():
 with gr.Blocks() as demo:
     '''
         组件
     '''
         # 显示数据表信息
         with gr.Accordion("当前数据信息"):
             display_dataset_dataframe = gr.Dataframe(visible=False)
+            display_dataset = gr.File(visible=False)
             with gr.Row():
                 display_total_col_num_text = gr.Textbox(visible=False)
                 display_total_row_num_text = gr.Textbox(visible=False)
         # 可视化
         with gr.Accordion("数据可视化"):
+            with gr.Tab("学习曲线图"):
+                learning_curve_checkboxgroup = gr.Checkboxgroup(visible=False)
+                with gr.Row():
+                    learning_curve_train_button = gr.Button(visible=False)
+                    learning_curve_validation_button = gr.Button(visible=False)
+            with gr.Tab("蜂群特征图"):
+                shap_beeswarm_radio = gr.Radio(visible=False)
+                shap_beeswarm_button = gr.Button(visible=False)
+            legend_labels_textboxs = []
+            with gr.Accordion("图例"):
+                with gr.Row():
+                    for i in range(StaticValue.max_num):
+                        with gr.Row():
+                            label = gr.Textbox(visible=False)
+                            legend_labels_textboxs.append(label)
+            with gr.Accordion("坐标轴"):
+                with gr.Row():
+                    title_name_textbox = gr.Textbox(visible=False)
+                    x_label_textbox = gr.Textbox(visible=False)
+                    y_label_textbox = gr.Textbox(visible=False)
+            colorpickers = []
+            color_textboxs = []
+            with gr.Accordion("颜色"):
+                with gr.Row():
+                    for i in range(StaticValue.max_num):
+                        with gr.Row():
+                            colorpicker = gr.ColorPicker(visible=False)
+                            colorpickers.append(colorpicker)
+                            color_textbox = gr.Textbox(visible=False)
+                            color_textboxs.append(color_textbox)
+            draw_plot = gr.Plot(visible=False)
+            draw_file = gr.File(visible=False)
     '''
         监听事件
     model_train_button.click(fn=train_model, inputs=[model_optimize_radio, linear_regression_model_radio], outputs=get_outputs())
     # 可视化
+    learning_curve_train_button.click(fn=learning_curve_train_first_draw_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
+    learning_curve_validation_button.click(fn=learning_curve_validation_first_draw_plot, inputs=[learning_curve_checkboxgroup], outputs=get_outputs())
+    shap_beeswarm_button.click(fn=shap_beeswarm_first_draw_plot, inputs=[shap_beeswarm_radio], outputs=get_outputs())
+    title_name_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
+                            + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
+    x_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
+                         + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
+    y_label_textbox.blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
+                         + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
+    for i in range(StaticValue.max_num):
+        colorpickers[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
+                             + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
+        color_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + color_textboxs + legend_labels_textboxs
+                               + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
+        legend_labels_textboxs[i].blur(fn=out_non_first_draw_plot, inputs=[title_name_textbox] + [x_label_textbox] + [y_label_textbox] + colorpickers + legend_labels_textboxs
+                                       + [learning_curve_checkboxgroup] + [shap_beeswarm_radio], outputs=get_outputs())
 if __name__ == "__main__":
     demo.launch()

{diagram → buffer}/__init__.py RENAMED Viewed

File without changes

static/config.py CHANGED Viewed

@@ -12,6 +12,9 @@ class Config:
         "#EF8B67",
         "#F0C284"
     ]
     COLORS_1 = [
         "#91CCC0",
         "#7FABD1",

         "#EF8B67",
         "#F0C284"
     ]
     COLORS_1 = [
         "#91CCC0",
         "#7FABD1",

static/paint.py ADDED Viewed

	@@ -0,0 +1,51 @@

+class PaintObject:
+    def __init__(self):
+        self.color_cur_num = 0
+        self.color_cur_list = []
+        self.label_cur_num = 0
+        self.label_cur_list = []
+        self.x_cur_label = ""
+        self.y_cur_label = ""
+        self.name = ""
+    def get_color_cur_num(self):
+        return self.color_cur_num
+    def set_color_cur_num(self, color_cur_num):
+        self.color_cur_num = color_cur_num
+    def get_color_cur_list(self):
+        return self.color_cur_list
+    def set_color_cur_list(self, color_cur_list):
+        self.color_cur_list = color_cur_list
+    def get_label_cur_num(self):
+        return self.label_cur_num
+    def set_label_cur_num(self, label_cur_num):
+        self.label_cur_num = label_cur_num
+    def get_label_cur_list(self):
+        return self.label_cur_list
+    def set_label_cur_list(self, label_cur_list):
+        self.label_cur_list = label_cur_list
+    def get_x_cur_label(self):
+        return self.x_cur_label
+    def set_x_cur_label(self, x_cur_label):
+        self.x_cur_label = x_cur_label
+    def get_y_cur_label(self):
+        return self.y_cur_label
+    def set_y_cur_label(self, y_cur_label):
+        self.y_cur_label = y_cur_label
+    def get_name(self):
+        return self.name
+    def set_name(self, name):
+        self.name = name

static/process.py CHANGED Viewed

@@ -196,7 +196,10 @@ def load_data(sort):
 def load_custom_data(file):
-    return pd.read_csv(file)
 def preprocess_raw_data_filtering(df):

 def load_custom_data(file):
+    if "xlsx" in file or "xls" in file:
+        return pd.read_excel(file)
+    elif "csv" in file:
+        return pd.read_csv(file)
 def preprocess_raw_data_filtering(df):

visualization/draw_learning_curve_total.py CHANGED Viewed

@@ -1,15 +1,15 @@
 import numpy as np
 from matplotlib import pyplot as plt
 from static.config import Config
-def draw_learning_curve_total(input_dict, type):
     plt.figure(figsize=(10, 6), dpi=300)
     if type == "train":
-        i = 0
-        for label_name, values in input_dict.items():
             train_sizes = values[0]
             train_scores_mean = values[1]
             train_scores_std = values[2]
@@ -21,25 +21,19 @@ def draw_learning_curve_total(input_dict, type):
                 train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std,
                 alpha=0.1,
-                color=Config.COLORS[i]
             )
             plt.plot(
                 train_sizes,
                 train_scores_mean,
                 "o-",
-                color=Config.COLORS[i],
-                label=label_name
             )
-            i += 1
-        title = "Training Learning curve"
-        # plt.title(title)
     else:
-        i = 0
-        for label_name, values in input_dict.items():
             train_sizes = values[0]
             train_scores_mean = values[1]
             train_scores_std = values[2]
@@ -51,26 +45,27 @@ def draw_learning_curve_total(input_dict, type):
                 test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std,
                 alpha=0.1,
-                color=Config.COLORS[i]
             )
             plt.plot(
                 train_sizes,
                 test_scores_mean,
                 "o-",
-                color=Config.COLORS[i],
-                label=label_name
             )
-            i += 1
-        title = "Cross-validation Learning curve"
-        # plt.title(title)
-    plt.xlabel("Sizes")
-    plt.ylabel("Adjusted R-square")
     plt.legend()
     # plt.savefig("./diagram/{}.png".format(title), dpi=300)
     # plt.show()
-    return plt

 import numpy as np
 from matplotlib import pyplot as plt
+from static.paint import PaintObject
 from static.config import Config
+def draw_learning_curve_total(input_dict, type, paint_object: PaintObject):
     plt.figure(figsize=(10, 6), dpi=300)
     if type == "train":
+        for i, values in enumerate(input_dict.values()):
             train_sizes = values[0]
             train_scores_mean = values[1]
             train_scores_std = values[2]
                 train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std,
                 alpha=0.1,
+                color=paint_object.get_color_cur_list()[i]
             )
             plt.plot(
                 train_sizes,
                 train_scores_mean,
                 "o-",
+                color=paint_object.get_color_cur_list()[i],
+                label=paint_object.get_label_cur_list()[i]
             )
     else:
+        for i, values in enumerate(input_dict.values()):
             train_sizes = values[0]
             train_scores_mean = values[1]
             train_scores_std = values[2]
                 test_scores_mean - test_scores_std,
                 test_scores_mean + test_scores_std,
                 alpha=0.1,
+                color=paint_object.get_color_cur_list()[i]
             )
             plt.plot(
                 train_sizes,
                 test_scores_mean,
                 "o-",
+                color=paint_object.get_color_cur_list()[i],
+                label=paint_object.get_label_cur_list()[i]
             )
+    plt.title(paint_object.get_name())
+    plt.xlabel(paint_object.get_x_cur_label())
+    plt.ylabel(paint_object.get_y_cur_label())
     plt.legend()
     # plt.savefig("./diagram/{}.png".format(title), dpi=300)
     # plt.show()
+    paint_object.set_color_cur_num(len(input_dict.keys()))
+    paint_object.set_label_cur_num(len(input_dict.keys()))
+    return plt, paint_object