Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import vlai_template | |
| # Import LightGBM core module | |
| try: | |
| from src import lightgbm_core | |
| LIGHTGBM_AVAILABLE = True | |
| except ImportError as e: | |
| print(f"❌ LightGBM module failed to load: {str(e)}") | |
| print("The demo requires LightGBM to be installed. Please run: pip install lightgbm>=4.0.0") | |
| LIGHTGBM_AVAILABLE = False | |
| lightgbm_core = None | |
| vlai_template.configure( | |
| project_name="LightGBM Demo", | |
| year="2025", | |
| module="03", | |
| description="Interactive demonstration of LightGBM (Light Gradient Boosting Machine) algorithms for classification and regression tasks. Explore efficient gradient boosting with leaf-wise tree growth through dynamic parameter adjustment and comprehensive visualizations.", | |
| colors = { | |
| "primary": "#2C3E50", # Dark slate blue - professional, mathematical | |
| "accent": "#34495E", # Darker slate - structured, academic | |
| "bg1": "#F8F9FA", # Light gray - clean, paper-like background | |
| "bg2": "#E5EBC9", # Pastel green highlight - warmer shade | |
| "bg3": "#E9EDD8", # Pastel green - cooler shade for subtle contrast | |
| }, | |
| font_family="'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif" | |
| ) | |
| current_dataframe = None | |
| current_target_column = None | |
| current_problem_type = None | |
| def load_sample_data_fallback(dataset_choice="Iris"): | |
| """Fallback data loading function when LightGBM is not available""" | |
| from sklearn.datasets import load_iris, load_wine, load_diabetes, load_breast_cancer | |
| import pandas as pd | |
| def sklearn_to_df(data): | |
| df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None)) | |
| if df.columns.isnull().any(): | |
| df.columns = [f"f{i}" for i in range(df.shape[1])] | |
| df["target"] = data.target | |
| return df | |
| def load_titanic_fallback(): | |
| # Create a simple fallback Titanic dataset | |
| import numpy as np | |
| np.random.seed(42) | |
| n_samples = 150 | |
| data = { | |
| 'age': np.random.normal(30, 10, n_samples), | |
| 'sex': np.random.choice([0, 1], n_samples), | |
| 'pclass': np.random.choice([1, 2, 3], n_samples), | |
| 'fare': np.random.exponential(20, n_samples), | |
| 'embarked': np.random.choice([0, 1, 2], n_samples), | |
| 'survived': np.random.choice([0, 1], n_samples) | |
| } | |
| return pd.DataFrame(data) | |
| datasets = { | |
| "Iris": lambda: sklearn_to_df(load_iris()), | |
| "Wine": lambda: sklearn_to_df(load_wine()), | |
| "Breast Cancer": lambda: sklearn_to_df(load_breast_cancer()), | |
| "Diabetes": lambda: sklearn_to_df(load_diabetes()), | |
| "Titanic": lambda: load_titanic_fallback(), | |
| } | |
| if dataset_choice not in datasets: | |
| raise ValueError(f"Unknown dataset: {dataset_choice}") | |
| return datasets[dataset_choice]() | |
| def create_input_components_fallback(df, target_col): | |
| """Fallback input components creation when LightGBM is not available""" | |
| feature_cols = [c for c in df.columns if c != target_col] | |
| components = [] | |
| for col in feature_cols: | |
| data = df[col] | |
| if data.dtype == "object": | |
| uniq = sorted(map(str, data.dropna().unique())) | |
| if not uniq: | |
| uniq = ["N/A"] | |
| components.append( | |
| {"name": col, "type": "dropdown", "choices": uniq, "value": uniq[0]} | |
| ) | |
| else: | |
| val = pd.to_numeric(data, errors="coerce").dropna().mean() | |
| val = 0.0 if pd.isna(val) else float(val) | |
| components.append( | |
| { | |
| "name": col, | |
| "type": "number", | |
| "value": round(val, 3), | |
| "minimum": None, | |
| "maximum": None, | |
| } | |
| ) | |
| return components | |
| SAMPLE_DATA_CONFIG = { | |
| "Iris": {"target_column": "target", "problem_type": "classification"}, | |
| "Wine": {"target_column": "target", "problem_type": "classification"}, | |
| "Breast Cancer": {"target_column": "target", "problem_type": "classification"}, | |
| "Diabetes": {"target_column": "target", "problem_type": "regression"}, | |
| "Titanic": {"target_column": "survived", "problem_type": "classification"}, | |
| } | |
| force_light_theme_js = """ | |
| () => { | |
| const params = new URLSearchParams(window.location.search); | |
| if (!params.has('__theme')) { | |
| params.set('__theme', 'light'); | |
| window.location.search = params.toString(); | |
| } | |
| } | |
| """ | |
| def validate_config(df, target_col): | |
| if not target_col or target_col not in df.columns: | |
| return False, "❌ Please select a valid target column from the dropdown.", None | |
| target_series = df[target_col] | |
| unique_vals = target_series.nunique() | |
| if target_series.dtype == "object" or unique_vals <= min(20, len(target_series) * 0.1): | |
| problem_type = "classification" | |
| if unique_vals > 50: | |
| return False, f"⚠️ Too many classes ({unique_vals}). Consider another target.", None | |
| if target_series.isnull().any(): | |
| return False, "⚠️ Target column has missing values. Please clean your data.", None | |
| else: | |
| problem_type = "regression" | |
| if unique_vals < 5: | |
| return False, f"⚠️ Too few unique values ({unique_vals}). Consider another target.", None | |
| return True, f"\n✅ Configuration is valid! Ready for {unique_vals} {'classes' if problem_type=='classification' else 'values'}.", problem_type | |
| def get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg): | |
| if is_sample: | |
| return f"✅ **Selected Dataset**: {dataset_choice} | **Target**: {target_col} | **Type**: {problem_type.title()}" | |
| elif target_col and problem_type: | |
| status_icon = "✅" if is_valid else "⚠️" | |
| return f"{status_icon} **Custom Data** | **Target**: {target_col} | **Type**: {problem_type.title()} | {validation_msg}" | |
| else: | |
| return "📁 **Custom data uploaded!** 👆 Please select target column above to continue." | |
| def load_and_configure_data_simple(dataset_choice="Iris"): | |
| global current_dataframe | |
| try: | |
| if not LIGHTGBM_AVAILABLE: | |
| # Fallback data loading without LightGBM | |
| df = load_sample_data_fallback(dataset_choice) | |
| else: | |
| df = lightgbm_core.load_data(None, dataset_choice) | |
| current_dataframe = df | |
| target_options = df.columns.tolist() | |
| cfg = SAMPLE_DATA_CONFIG.get(dataset_choice, {}) | |
| target_col = cfg.get("target_column") | |
| problem_type = cfg.get("problem_type") | |
| if target_col and target_col in target_options: | |
| is_valid, validation_msg, detected = validate_config(df, target_col) | |
| if detected: | |
| problem_type = detected | |
| status_msg = get_status_message(True, dataset_choice, target_col, problem_type, is_valid, validation_msg) | |
| else: | |
| # If target_col not in options, use first column as fallback | |
| target_col = target_options[0] if target_options else None | |
| status_msg = get_status_message(True, dataset_choice, target_col, problem_type, False, "") | |
| return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col), status_msg] | |
| except Exception as e: | |
| current_dataframe = None | |
| return [pd.DataFrame(), gr.Dropdown(choices=[], value=None), f"❌ **Error loading data**: {str(e)} | Please try a different dataset."] | |
| def load_and_configure_data(file_obj=None, dataset_choice="Iris"): | |
| global current_dataframe | |
| try: | |
| if not LIGHTGBM_AVAILABLE: | |
| # Fallback data loading without LightGBM | |
| if file_obj is not None: | |
| # Handle file upload fallback | |
| if file_obj.name.endswith(".csv"): | |
| df = pd.read_csv(file_obj.name) | |
| elif file_obj.name.endswith((".xlsx", ".xls")): | |
| df = pd.read_excel(file_obj.name) | |
| else: | |
| raise ValueError("Unsupported format. Upload CSV or Excel files.") | |
| else: | |
| df = load_sample_data_fallback(dataset_choice) | |
| else: | |
| df = lightgbm_core.load_data(file_obj, dataset_choice) | |
| current_dataframe = df | |
| target_options = df.columns.tolist() | |
| is_sample = file_obj is None | |
| if is_sample: | |
| cfg = SAMPLE_DATA_CONFIG.get(dataset_choice, {}) | |
| target_col = cfg.get("target_column") | |
| problem_type = cfg.get("problem_type") | |
| else: | |
| target_col, problem_type = None, None | |
| if target_col: | |
| is_valid, validation_msg, detected = validate_config(df, target_col) | |
| if detected: | |
| problem_type = detected | |
| status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg) | |
| else: | |
| status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, False, "") | |
| input_updates = [gr.update(visible=False)] * 40 | |
| inputs_visible = gr.update(visible=False) | |
| input_status = "⚙️ Configure target column above to enable feature inputs." | |
| if target_col and problem_type and (not is_sample or is_valid): | |
| try: | |
| if LIGHTGBM_AVAILABLE: | |
| components_info = lightgbm_core.create_input_components(df, target_col) | |
| else: | |
| components_info = create_input_components_fallback(df, target_col) | |
| for i in range(min(20, len(components_info))): | |
| comp = components_info[i] | |
| number_idx, dropdown_idx = i * 2, i * 2 + 1 | |
| if comp["type"] == "number": | |
| upd = {"visible": True, "label": comp["name"], "value": comp["value"]} | |
| if comp["minimum"] is not None: | |
| upd["minimum"] = comp["minimum"] | |
| if comp["maximum"] is not None: | |
| upd["maximum"] = comp["maximum"] | |
| input_updates[number_idx] = gr.update(**upd) | |
| input_updates[dropdown_idx] = gr.update(visible=False) | |
| else: | |
| input_updates[number_idx] = gr.update(visible=False) | |
| input_updates[dropdown_idx] = gr.update( | |
| visible=True, label=comp["name"], choices=comp["choices"], value=comp["value"] | |
| ) | |
| inputs_visible = gr.update(visible=True) | |
| input_status = f"📝 **Ready!** Enter values for {len(components_info)} features below, then click Run prediction. | {validation_msg}" | |
| except Exception as e: | |
| input_status = f"❌ Error generating inputs: {str(e)}" | |
| return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col), status_msg] + input_updates + [inputs_visible, input_status] | |
| except Exception as e: | |
| current_dataframe = None | |
| empty = [pd.DataFrame(), gr.Dropdown(choices=[], value=None), f"❌ **Error loading data**: {str(e)} | Please try a different file or dataset."] | |
| return empty + [gr.update(visible=False)] * 40 + [gr.update(visible=False), "No data loaded."] | |
| def update_configuration(df_preview, target_col): | |
| global current_dataframe | |
| df = current_dataframe | |
| if df is None or df.empty: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), "No data available.", "No data available."] | |
| if not target_col: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), "Select target column.", "Select target column."] | |
| try: | |
| is_valid, validation_msg, problem_type = validate_config(df, target_col) | |
| if not is_valid: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), f"⚠️ {validation_msg}", f"⚠️ {validation_msg}"] | |
| if LIGHTGBM_AVAILABLE: | |
| components_info = lightgbm_core.create_input_components(df, target_col) | |
| else: | |
| components_info = create_input_components_fallback(df, target_col) | |
| input_updates = [gr.update(visible=False)] * 40 | |
| for i in range(min(20, len(components_info))): | |
| comp = components_info[i] | |
| number_idx, dropdown_idx = i * 2, i * 2 + 1 | |
| if comp["type"] == "number": | |
| upd = {"visible": True, "label": comp["name"], "value": comp["value"]} | |
| if comp["minimum"] is not None: | |
| upd["minimum"] = comp["minimum"] | |
| if comp["maximum"] is not None: | |
| upd["maximum"] = comp["maximum"] | |
| input_updates[number_idx] = gr.update(**upd) | |
| input_updates[dropdown_idx] = gr.update(visible=False) | |
| else: | |
| input_updates[number_idx] = gr.update(visible=False) | |
| input_updates[dropdown_idx] = gr.update( | |
| visible=True, label=comp["name"], choices=comp["choices"], value=comp["value"] | |
| ) | |
| input_status = f"📝 Enter values for {len(components_info)} features | {validation_msg}" | |
| status_msg = f"✅ **Selected Dataset**: Custom Data | **Target**: {target_col} | **Type**: {problem_type.title()}" | |
| return input_updates + [gr.update(visible=True), input_status, status_msg] | |
| except Exception as e: | |
| return [gr.update(visible=False)] * 40 + [gr.update(visible=False), f"❌ Error: {str(e)}", f"❌ Error: {str(e)}"] | |
| # AdaBoost-specific functions | |
| def execute_prediction(df_preview, target_col, n_estimators, num_leaves, min_data_in_leaf, learning_rate, train_test_split_ratio, show_split_info, use_early_stopping, early_stopping_rounds, *input_values): | |
| global current_dataframe, current_target_column, current_problem_type | |
| df = current_dataframe | |
| EMPTY_PLOT = None | |
| error_style = "<div style='background:#FFF4F4;border-left:6px solid #C4314B;padding:14px 16px;border-radius:10px;'><strong>⚡ LightGBM Process</strong><br><br>{}</div>" | |
| default_dropdown = gr.Dropdown(choices=["Tree 1"], value="Tree 1") | |
| # Check if LightGBM is available | |
| if not LIGHTGBM_AVAILABLE: | |
| return (EMPTY_PLOT, EMPTY_PLOT, EMPTY_PLOT, error_style.format("❌ LightGBM module is not available!<br><br>Please ensure LightGBM is installed:<br><code>pip install lightgbm>=4.0.0</code><br><br>Then restart the application."), default_dropdown) | |
| if df is None or df.empty: | |
| return (EMPTY_PLOT, EMPTY_PLOT, EMPTY_PLOT, error_style.format("No data available."), default_dropdown) | |
| if not target_col: | |
| return (EMPTY_PLOT, EMPTY_PLOT, EMPTY_PLOT, error_style.format("Configuration incomplete."), default_dropdown) | |
| is_valid, validation_msg, problem_type = validate_config(df, target_col) | |
| if not is_valid: | |
| return (EMPTY_PLOT, EMPTY_PLOT, EMPTY_PLOT, error_style.format("Configuration issue."), default_dropdown) | |
| # Store the current target column and problem type globally | |
| current_target_column = target_col | |
| current_problem_type = problem_type | |
| try: | |
| if LIGHTGBM_AVAILABLE: | |
| components_info = lightgbm_core.create_input_components(df, target_col) | |
| else: | |
| components_info = create_input_components_fallback(df, target_col) | |
| new_point_dict = {} | |
| for i, comp in enumerate(components_info): | |
| number_idx, dropdown_idx = i * 2, i * 2 + 1 | |
| if comp["type"] == "number": | |
| v = input_values[number_idx] if number_idx < len(input_values) and input_values[number_idx] is not None else comp["value"] | |
| else: | |
| v = input_values[dropdown_idx] if dropdown_idx < len(input_values) and input_values[dropdown_idx] is not None else comp["value"] | |
| new_point_dict[comp["name"]] = v | |
| boosting_progress_fig, loss_chart_fig, importance_fig, prediction, pred_details, summary, aggregation_display = lightgbm_core.run_lightgbm_and_visualize( | |
| df, target_col, new_point_dict, n_estimators, num_leaves, min_data_in_leaf, learning_rate, train_test_split_ratio, problem_type, use_early_stopping, early_stopping_rounds | |
| ) | |
| feature_cols = [c for c in df.columns if c != target_col] | |
| first_tree_fig = lightgbm_core.get_individual_tree_visualization( | |
| lightgbm_core._get_current_model(), 0, feature_cols, problem_type, num_leaves | |
| ) | |
| updated_tree_selector = update_tree_selector_choices(n_estimators) | |
| return (loss_chart_fig, first_tree_fig, importance_fig, aggregation_display, updated_tree_selector) | |
| except Exception as e: | |
| print(f"Execution error: {str(e)}") # For debugging | |
| return (EMPTY_PLOT, EMPTY_PLOT, EMPTY_PLOT, error_style.format(f"Execution error: {str(e)}"), default_dropdown) | |
| def update_tree_selector_choices(n_estimators): | |
| # Only show trees that were actually trained (respect early stopping) | |
| try: | |
| model = lightgbm_core._get_current_model() | |
| actual_trees = 0 | |
| if model is not None: | |
| # Prefer evals_result_ count if available | |
| if hasattr(model, 'evals_result_') and model.evals_result_: | |
| eval_results = model.evals_result_ | |
| if 'train' in eval_results and eval_results['train']: | |
| metric_name = list(eval_results['train'].keys())[0] | |
| actual_trees = len(eval_results['train'][metric_name]) | |
| print(f"Tree selector: eval history reports {actual_trees} trees trained") | |
| # Fallback to best_iteration if present | |
| if actual_trees == 0 and hasattr(model, 'best_iteration') and model.best_iteration is not None: | |
| actual_trees = int(model.best_iteration) + 1 | |
| print(f"Tree selector: using best_iteration -> {actual_trees} trees") | |
| # Final fallback to model.num_trees() | |
| if actual_trees == 0 and hasattr(model, 'num_trees'): | |
| actual_trees = int(model.num_trees()) | |
| print(f"Tree selector: using num_trees() -> {actual_trees} trees") | |
| # Ensure at least one option to avoid empty dropdown | |
| actual_trees = max(1, actual_trees) | |
| # For UI performance, cap at 100 | |
| trees_to_show = min(actual_trees, 100) | |
| # Debug | |
| print(f"Tree selector: requested={n_estimators}, available={actual_trees}, showing={trees_to_show}") | |
| except Exception as e: | |
| trees_to_show = min(max(1, int(n_estimators)), 100) | |
| print(f"Tree selector error: {e}, falling back to requested count {trees_to_show}") | |
| choices = [f"Tree {i+1}" for i in range(trees_to_show)] | |
| return gr.Dropdown(choices=choices, value="Tree 1") | |
| def update_tree_visualization(tree_selector, num_leaves=31): | |
| global current_dataframe, current_target_column, current_problem_type | |
| if current_dataframe is None or current_dataframe.empty: | |
| return None | |
| if current_target_column is None or current_problem_type is None: | |
| return None | |
| try: | |
| model = lightgbm_core._get_current_model() | |
| if model is None: | |
| return None | |
| tree_index = int(tree_selector.split()[-1]) - 1 | |
| # Use the stored target column and problem type | |
| feature_cols = [c for c in current_dataframe.columns if c != current_target_column] | |
| # Use the num_leaves parameter from the UI | |
| tree_fig = lightgbm_core.get_individual_tree_visualization(model, tree_index, feature_cols, current_problem_type, num_leaves) | |
| return tree_fig | |
| except Exception as e: | |
| print(f"Tree visualization error: {str(e)}") # For debugging | |
| return None | |
| with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=True, js=force_light_theme_js) as demo: | |
| vlai_template.create_header() | |
| gr.HTML(vlai_template.render_info_card( | |
| icon="⚡", | |
| title="About this LightGBM Demo", | |
| description="This interactive demo showcases LightGBM (Light Gradient Boosting Machine) algorithms for both classification and regression tasks. Explore efficient gradient boosting with leaf-wise tree growth through dynamic parameter adjustment and comprehensive visualizations." | |
| )) | |
| gr.Markdown("### ⚡ **How to Use**: Select data → Configure target → Set LightGBM parameters → Enter new point → Run prediction!") | |
| with gr.Row(equal_height=False, variant="panel"): | |
| with gr.Column(scale=45): | |
| with gr.Accordion("📊 Data & Configuration", open=True): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("Start with sample datasets or upload your own CSV/Excel files.") | |
| file_upload = gr.File(label="📁 Upload Your Data", file_types=[".csv", ".xlsx", ".xls"]) | |
| with gr.Column(scale=3): | |
| sample_dataset = gr.Dropdown(choices=list(SAMPLE_DATA_CONFIG.keys()), value="Titanic", label="🗂️ Sample Datasets") | |
| with gr.Row(): | |
| target_column = gr.Dropdown(choices=[], label="🎯 Target Column", interactive=True) | |
| status_message = gr.Markdown("🔄 Loading sample data...") | |
| data_preview = gr.DataFrame(label="📋 Data Preview (First 5 Rows)", row_count=5, interactive=False, max_height=250) | |
| with gr.Accordion("⚡ LightGBM Parameters & Input", open=True): | |
| gr.Markdown("**⚡ LightGBM Parameters**") | |
| with gr.Row(): | |
| n_estimators = gr.Number( | |
| label="Number of Trees", | |
| value=100, minimum=1, maximum=1000, precision=0, | |
| info="Requested number of trees (up to 1000). Actual trained trees may be fewer due to early stopping." | |
| ) | |
| learning_rate = gr.Slider( | |
| label="Learning Rate", | |
| value=0.1, minimum=0.001, maximum=1.0, step=0.001, | |
| info="Step size shrinkage for each tree" | |
| ) | |
| with gr.Row(): | |
| num_leaves = gr.Number( | |
| label="Number of Leaves", | |
| value=31, minimum=2, maximum=127, precision=0, | |
| info="Maximum number of leaves in one tree (controls complexity, typically 31-70)" | |
| ) | |
| min_data_in_leaf = gr.Number( | |
| label="Min Data in Leaf", | |
| value=20, minimum=1, maximum=1000, precision=0, | |
| info="Minimum number of data points in one leaf (prevents overfitting)" | |
| ) | |
| gr.Markdown("**📊 Data Split Configuration**") | |
| with gr.Row(): | |
| train_test_split_ratio = gr.Slider( | |
| label="Train/Validation Split Ratio", | |
| value=0.8, minimum=0.6, maximum=0.9, step=0.05, | |
| info="Proportion of data used for training (e.g., 0.8 = 80% train, 20% validation)" | |
| ) | |
| show_split_info = gr.Checkbox( | |
| label="Show Split Details", | |
| value=True, | |
| info="Display train/validation set information" | |
| ) | |
| with gr.Row(): | |
| use_early_stopping = gr.Checkbox( | |
| label="Use Early Stopping", | |
| value=False, | |
| info="Disabled by default to train all requested trees for visualization" | |
| ) | |
| early_stopping_rounds = gr.Number( | |
| label="Early Stopping Rounds", | |
| value=20, minimum=5, maximum=100, precision=0, | |
| info="Used only if early stopping is enabled" | |
| ) | |
| inputs_group = gr.Group(visible=False) | |
| with inputs_group: | |
| input_status = gr.Markdown("Configure inputs above.") | |
| gr.Markdown("**📝 New Data Point** - Enter feature values for prediction:") | |
| input_components = [] | |
| for row in range(5): | |
| with gr.Row(): | |
| for col in range(4): | |
| idx = row * 4 + col | |
| if idx < 20: | |
| number_comp = gr.Number(label=f"Feature {idx+1}", visible=False) | |
| dropdown_comp = gr.Dropdown(label=f"Feature {idx+1}", visible=False) | |
| input_components.extend([number_comp, dropdown_comp]) | |
| run_prediction_btn = gr.Button("⚡ Run Prediction", variant="primary", size="lg") | |
| with gr.Column(scale=55): | |
| gr.Markdown("### ⚡ **LightGBM Results & Visualization**") | |
| loss_chart = gr.Plot(label="Training/Validation Loss Evolution", visible=True) | |
| with gr.Row(): | |
| tree_selector = gr.Dropdown( | |
| choices=["Tree 1"], | |
| value="Tree 1", | |
| label="🌳 Select Tree to Visualize", | |
| interactive=True | |
| ) | |
| individual_tree_plot = gr.Plot(label="Individual Tree Structure", visible=True) | |
| feature_importance_plot = gr.Plot(label="Feature Importance", visible=True) | |
| aggregation_display = gr.HTML("**⚡ LightGBM Process**<br><br>LightGBM details will appear here showing how the prediction builds up.", label="⚡ LightGBM Process") | |
| gr.Markdown("""⚡ **LightGBM Leaf-wise Tree Tips**: | |
| - **📉 Loss Evolution Chart**: Monitor training and validation loss to understand model convergence with early stopping. | |
| - **🌳 Individual Tree Visualization**: Select any tree to see its leaf-wise structure and contribution. | |
| - **📊 Feature Importance**: Displays which features are most influential using gradient-based importance. | |
| - **🎯 Parameter Tuning**: Try different **number of trees** (up to 1000) and **learning rate** (0.001-1.0). | |
| - **⚡ Learning Rate**: Default 0.1 works well; lower values (0.01-0.05) for more conservative models, higher values (0.2-0.3) for faster convergence. | |
| - **🍃 Number of Leaves**: Controls tree complexity (default 31). For depth-7 equivalent, use ~70-80 leaves instead of 127 to prevent overfitting. | |
| - **📊 Min Data in Leaf**: Prevents overfitting by requiring minimum samples per leaf (default 20). Increase for larger datasets. | |
| - **🎯 Leaf-wise Growth**: LightGBM grows trees leaf-by-leaf for faster convergence compared to depth-wise growth. | |
| - **🔍 Tree Analysis**: Use the tree selector to understand how each tree contributes to gradient boosting ensemble. | |
| - **⏹️ Early Stopping**: Tree selector shows requested trees, but only actually trained trees can be visualized. Check console for actual vs requested tree counts. | |
| """) | |
| vlai_template.create_footer() | |
| load_evt = demo.load( | |
| fn=lambda: load_and_configure_data(None, "Titanic"), | |
| outputs=[data_preview, target_column, status_message] + input_components + [inputs_group, input_status], | |
| ) | |
| upload_evt = file_upload.upload( | |
| fn=lambda file: load_and_configure_data(file, "Iris"), | |
| inputs=[file_upload], | |
| outputs=[data_preview, target_column, status_message] + input_components + [inputs_group, input_status], | |
| ) | |
| sample_dataset.change( | |
| fn=lambda choice: load_and_configure_data_simple(choice), | |
| inputs=[sample_dataset], | |
| outputs=[data_preview, target_column, status_message], | |
| ).then( | |
| fn=update_configuration, inputs=[data_preview, target_column], | |
| outputs=input_components + [inputs_group, input_status, status_message], | |
| ) | |
| target_column.change( | |
| fn=update_configuration, inputs=[data_preview, target_column], | |
| outputs=input_components + [inputs_group, input_status, status_message], | |
| ) | |
| run_prediction_btn.click( | |
| fn=execute_prediction, | |
| inputs=[data_preview, target_column, n_estimators, num_leaves, min_data_in_leaf, learning_rate, train_test_split_ratio, show_split_info, use_early_stopping, early_stopping_rounds] + input_components, | |
| outputs=[loss_chart, individual_tree_plot, feature_importance_plot, aggregation_display, tree_selector], | |
| ) | |
| tree_selector.change( | |
| fn=update_tree_visualization, | |
| inputs=[tree_selector, num_leaves], | |
| outputs=[individual_tree_plot], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(allowed_paths=["static/aivn_logo.png", "static/vlai_logo.png", "static"]) | |