Spaces:

VLAI-AIVN
/

AIO2025M03_HEART_DISEASE_PREDICTION

Running

App Files Files Community

wjnwjn59 commited on Aug 18

Commit

53227fd

1 Parent(s): 6fab39f

modify logic app

Browse files

Files changed (3) hide show

app.py +190 -104
src/heart_disease_core.py +92 -41
vlai_template.py +147 -39

app.py CHANGED Viewed

@@ -2,14 +2,15 @@ import os
 import gradio as gr
 import plotly.graph_objects as go
 import pandas as pd
 from src.heart_disease_core import (
-    CLEVELAND_FEATURES_ORDER, TARGET_COL,
-    load_cleveland_dataframe, fit_all_models, predict_all, example_patient
 )
-APP_PRIMARY = "#0F6CBD"   # medical calm blue
-APP_ACCENT = "#C4314B"    # medical alert red
 APP_BG = "#F7FAFC"
 STATE = {
@@ -20,21 +21,35 @@ STATE = {
 DATA_PATH = "data/cleveland.csv"
-# -----------------------------
-# Startup / init
-# -----------------------------
 def init_page():
-    """
-    Load dataset from disk, fit models once, and return preview + metrics.
-    Returns plain values (no .update), to maximize Gradio compatibility.
-    """
     if not os.path.exists(DATA_PATH):
         msg = f"❌ Dataset not found at '{DATA_PATH}'. Please place Cleveland CSV there."
         return msg, pd.DataFrame(), pd.DataFrame()
-    raw = pd.read_csv(DATA_PATH)
-    df = load_cleveland_dataframe(uploaded_df=raw)  # cleans, binarizes target
     models, metrics = fit_all_models(df)
     STATE["df"] = df
@@ -46,42 +61,60 @@ def init_page():
     return msg, head, metrics
-# -----------------------------
-# Helpers
-# -----------------------------
 def fill_example(idx_text: str):
-    idx = {
-        "Example 1 (likely negative)": 0,
-        "Example 2 (borderline)": 1,
-        "Example 3 (likely positive)": 2
-    }[idx_text]
     ex = example_patient(idx)
-    # Return in the strict feature order so Gradio can assign to outputs 1:1
     return [ex[c] for c in CLEVELAND_FEATURES_ORDER]
 def _bar_for_models(results: dict):
     names = list(results.keys())
-    probs = [results[n]["prob_1"] for n in names]
     fig = go.Figure()
-    fig.add_bar(x=names, y=probs, text=[f"{p:.2f}" for p in probs], textposition="auto")
     fig.update_layout(
-        title="Model Confidence (P[Heart Disease = 1])",
-        yaxis_title="Probability",
         xaxis_title="Model",
         yaxis=dict(range=[0, 1]),
         plot_bgcolor="white",
         height=420,
         margin=dict(l=30, r=20, t=60, b=40)
     )
-    # Emphasize ensemble bar
-    colors = ["#9BB8D3"] * len(names)
-    if "Ensemble (Soft Voting)" in names:
-        colors[names.index("Ensemble (Soft Voting)")] = APP_ACCENT
-    elif len(colors) > 0:
-        colors[-1] = APP_ACCENT
-    fig.data[0].marker.color = colors
     return fig
@@ -93,17 +126,43 @@ def run_predict(*vals):
     results = predict_all(STATE["models"], input_dict)
     final = results["Ensemble (Soft Voting)"]
-    title_md = (
-        f"### 🫀 Cleveland Heart Disease Diagnosis\n"
-        f"**Ensemble Prediction**: **{'Positive' if final['label'] == 1 else 'Negative'}**  \n"
-        f"**Confidence (P=1)**: `{final['prob_1']:.3f}`"
-    )
     rows = []
     for name, r in results.items():
         rows.append({
             "Model": name,
-            "Predicted label": "Positive" if r["label"] == 1 else "Negative",
             "P(No disease)": round(r["prob_0"], 3),
             "P(Heart disease)": round(r["prob_1"], 3),
         })
@@ -111,80 +170,107 @@ def run_predict(*vals):
     fig = _bar_for_models(results)
-    # Return plain values for Markdown, Plot, Markdown, DataFrame
-    return title_md, fig, "**Per-Model Predictions**", table_df
-# -----------------------------
-# UI (no gr.Box to avoid older-Gradio issues)
-# -----------------------------
-with gr.Blocks(theme="soft", css=f"""
-:root {{
-  --primary-600: {APP_PRIMARY};
-}}
-.gradio-container {{ background: {APP_BG}; }}
-h1, h2, h3, h4 {{ color: {APP_PRIMARY}; }}
-""") as demo:
-    gr.Markdown("# 🫀 Cleveland Heart Disease Diagnosis (Ensemble Demo)")
-    with gr.Row(equal_height=False):
         # LEFT: data preview + inputs
         with gr.Column(scale=45):
-            gr.Markdown("### 📁 Dataset & Model Status")
-            status_md = gr.Markdown("Loading dataset and training models...")
-            preview = gr.DataFrame(label="Cleveland Preview (first rows)", interactive=False)
-            metrics_df = gr.DataFrame(label="Validation ROC-AUC (80/20 split)", interactive=False)
-            gr.Markdown("### ✍️ Enter Patient Features")
-            with gr.Row():
-                age = gr.Number(label="age (years)", value=58)
-                sex = gr.Dropdown(label="sex (0=female, 1=male)", choices=[0, 1], value=1)
-                cp = gr.Dropdown(label="cp (chest pain type 0..3)", choices=[0, 1, 2, 3], value=2)
-                trestbps = gr.Number(label="trestbps (resting BP mmHg)", value=130)
-            with gr.Row():
-                chol = gr.Number(label="chol (serum cholesterol mg/dl)", value=250)
-                fbs = gr.Dropdown(label="fbs (>120 mg/dl? 1/0)", choices=[0, 1], value=0)
-                restecg = gr.Dropdown(label="restecg (0..2)", choices=[0, 1, 2], value=1)
-                thalach = gr.Number(label="thalach (max heart rate)", value=150)
-            with gr.Row():
-                exang = gr.Dropdown(label="exang (exercise angina 1/0)", choices=[0, 1], value=0)
-                oldpeak = gr.Number(label="oldpeak (ST depression)", value=1.0)
-                slope = gr.Dropdown(label="slope (0..2)", choices=[0, 1, 2], value=1)
-                ca = gr.Dropdown(label="ca (major vessels 0..3)", choices=[0, 1, 2, 3], value=0)
-            thal = gr.Dropdown(label="thal (1=normal, 2=fixed, 3=reversible)", choices=[1, 2, 3], value=2)
-            with gr.Row():
-                ex_selector = gr.Dropdown(
-                    label="Fill Example",
-                    choices=["Example 1 (likely negative)", "Example 2 (borderline)", "Example 3 (likely positive)"],
-                    value="Example 2 (borderline)"
-                )
-                fill_btn = gr.Button("🧪 Use Example")
-                predict_btn = gr.Button("🔍 Predict", variant="primary")
         # RIGHT: outputs
         with gr.Column(scale=55):
-            gr.Markdown("### 📈 Predictions")
-            title_out = gr.Markdown("Ensemble Prediction will appear here.")
-            bar_out = gr.Plot(label="Model Confidence")
-            sub_md = gr.Markdown(visible=False)
-            table_out = gr.DataFrame(visible=False)
-            with gr.Accordion("ℹ️ Notes", open=False):
-                gr.Markdown(
-                    "- Models are trained once at launch on `data/cleveland.csv` (80/20 split).\n"
-                    "- `target` is binarized automatically (0 = no disease, >0 = disease).\n"
-                    "- Ensemble uses **soft voting** over Decision Tree, k-NN, and Naive Bayes.\n"
-                    "- Educational demo only; **not medical advice**."
-                )
     # Bind events
     demo.load(fn=init_page, inputs=None, outputs=[status_md, preview, metrics_df])
-    fill_btn.click(
         fn=fill_example,
         inputs=[ex_selector],
         outputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]
@@ -193,8 +279,8 @@ h1, h2, h3, h4 {{ color: {APP_PRIMARY}; }}
     predict_btn.click(
         fn=run_predict,
         inputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal],
-        outputs=[title_out, bar_out, sub_md, table_out]
     )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import plotly.graph_objects as go
 import pandas as pd
+import vlai_template
 from src.heart_disease_core import (
+    CLEVELAND_FEATURES_ORDER,
+    load_cleveland_dataframe, fit_all_models, predict_all, example_patient, get_example_labels
 )
+APP_PRIMARY = vlai_template.PRIMARY_COLOR
+APP_ACCENT = vlai_template.ACCENT_COLOR
 APP_BG = "#F7FAFC"
 STATE = {
 DATA_PATH = "data/cleveland.csv"
+vlai_template.set_meta(
+    project_name="Heart Disease Diagnosis Project",
+    year="2025",
+    module="03",
+    description="Predict heart disease risk from patient data with ML models trained on the Cleveland dataset.",
+    meta_items=[
+        ("Dataset", "Cleveland Heart Disease"),
+        ("Models", "Decision Tree, k-NN, Naive Bayes"),
+        ("Ensemble", "Soft Voting"),
+    ],
+)
+force_light_theme_js = """
+() => {
+  const params = new URLSearchParams(window.location.search);
+  if (!params.has('__theme')) {
+    params.set('__theme', 'light');
+    window.location.search = params.toString();
+  }
+}
+"""
 def init_page():
+    """Load dataset, train models, and return status, preview, metrics."""
     if not os.path.exists(DATA_PATH):
         msg = f"❌ Dataset not found at '{DATA_PATH}'. Please place Cleveland CSV there."
         return msg, pd.DataFrame(), pd.DataFrame()
+    df = load_cleveland_dataframe(file_path=DATA_PATH)
     models, metrics = fit_all_models(df)
     STATE["df"] = df
     return msg, head, metrics
 def fill_example(idx_text: str):
+    import re
+    match = re.search(r'Example (\d+)', idx_text)
+    if match:
+        idx = int(match.group(1)) - 1
+    else:
+        idx = 1
     ex = example_patient(idx)
     return [ex[c] for c in CLEVELAND_FEATURES_ORDER]
 def _bar_for_models(results: dict):
     names = list(results.keys())
+    confidences = []
+    predictions_text = []
+    bar_colors = []
+    line_colors = []
+    line_widths = []
+    for n in names:
+        r = results[n]
+        if r["label"] == 1:
+            confidences.append(r["prob_1"])
+            predictions_text.append("🫀 Heart Disease")
+            bar_colors.append("#C4314B")
+        else:
+            confidences.append(r["prob_0"])
+            predictions_text.append("✅ No Heart Disease")
+            bar_colors.append("#2E7D32")
+        line_colors.append("rgba(0,0,0,0.15)")
+        line_widths.append(1.0)
+    if "Ensemble (Soft Voting)" in names:
+        idx = names.index("Ensemble (Soft Voting)")
+        line_colors[idx] = "#000000"
+        line_widths[idx] = 2.5
     fig = go.Figure()
+    fig.add_bar(x=names, y=confidences, text=predictions_text, textposition="auto")
     fig.update_layout(
+        title="Model Predictions",
+        yaxis_title="Prediction Confidence",
         xaxis_title="Model",
         yaxis=dict(range=[0, 1]),
         plot_bgcolor="white",
+        paper_bgcolor="white",
+        font=dict(color="black", size=12),
         height=420,
         margin=dict(l=30, r=20, t=60, b=40)
     )
+    fig.data[0].marker.color = bar_colors
+    fig.data[0].marker.line.color = line_colors
+    fig.data[0].marker.line.width = line_widths
     return fig
     results = predict_all(STATE["models"], input_dict)
     final = results["Ensemble (Soft Voting)"]
+    ensemble_color = "#C4314B" if final["label"] == 1 else "#2E7D32"
+    ensemble_prediction = "🫀 **Heart Disease Detected**" if final["label"] == 1 else "✅ **No Heart Disease**"
+    ensemble_md = f"""
+<div style=\"border: 3px solid {ensemble_color}; border-radius: 10px; padding: 20px; margin: 15px 0; background: white;\">
+    <h3 style=\"margin: 0 0 15px 0; color: {ensemble_color};\">🎯 Ensemble Prediction (Final Result)</h3>
+    <p style=\"margin: 10px 0; font-size: 18px; color: black;\"><strong>{ensemble_prediction}</strong></p>
+    <p style=\"margin: 5px 0; font-size: 16px; color: black;\"><strong>Confidence:</strong> {final['prob_1']:.1%}</p>
+</div>
+"""
+    model_predictions = []
+    for name, r in results.items():
+        prediction_text = "🫀 **Heart Disease Detected**" if r["label"] == 1 else "✅ **No Heart Disease**"
+        confidence = r["prob_1"] if r["label"] == 1 else r["prob_0"]
+        color = "#C4314B" if r["label"] == 1 else "#2E7D32"
+        model_predictions.append(f"""
+<div style=\"border: 2px solid {color}; border-radius: 8px; padding: 15px; margin: 10px 0; background: white;\">
+    <h4 style=\"margin: 0 0 10px 0; color: {color};\">{name}</h4>
+    <p style=\"margin: 5px 0; font-size: 16px; color: black;\"><strong>Prediction:</strong> {prediction_text}</p>
+    <p style=\"margin: 5px 0; font-size: 14px; color: black;\"><strong>Confidence:</strong> {confidence:.1%}</p>
+    <p style=\"margin: 5px 0; font-size: 12px; color: #666;\">
+        P(No disease): {r['prob_0']:.3f} | P(Heart disease): {r['prob_1']:.3f}
+    </p>
+</div>
+""")
+    all_predictions = "\n".join(model_predictions)
     rows = []
     for name, r in results.items():
+        confidence = r["prob_1"] if r["label"] == 1 else r["prob_0"]
         rows.append({
             "Model": name,
+            "Prediction": "Heart Disease" if r["label"] == 1 else "No Heart Disease",
+            "Confidence": f"{confidence:.1%}",
             "P(No disease)": round(r["prob_0"], 3),
             "P(Heart disease)": round(r["prob_1"], 3),
         })
     fig = _bar_for_models(results)
+    return fig, "\n".join(model_predictions), table_df
+with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=True, js=force_light_theme_js) as demo:
+    vlai_template.create_header()
+    gr.HTML(vlai_template.render_info_card(icon="🫀", title="About this demo"))
+    gr.HTML(vlai_template.render_disclaimer(
+        text=(
+            "This interactive heart disease prediction demo is provided strictly for educational purposes. "
+            "It is not intended for clinical use and must not be relied upon for medical advice, diagnosis, "
+            "treatment, or decision-making. Always consult a qualified healthcare professional."
+        )
+    ))
+    gr.Markdown("### 🫀 **How to Use**: Enter patient features → Run prediction → View ensemble results!")
+    with gr.Row(equal_height=False, variant="panel"):
         # LEFT: data preview + inputs
         with gr.Column(scale=45):
+            with gr.Accordion("📁 Dataset & Model Status", open=True):
+                status_md = gr.Markdown("Loading dataset and training models...")
+                preview = gr.DataFrame(label="Cleveland Preview (first rows)", interactive=False)
+                metrics_df = gr.DataFrame(label="Validation Metrics (80/20 split)", interactive=False)
+            with gr.Accordion("✍️ Enter Patient Features", open=True):
+                with gr.Row():
+                    age = gr.Number(label="age (years)", value=58)
+                    sex = gr.Dropdown(label="sex (0=female, 1=male)", choices=[0, 1], value=1)
+                    cp = gr.Dropdown(label="cp (chest pain type 1..4)", choices=[1, 2, 3, 4], value=2)
+                    trestbps = gr.Number(label="trestbps (resting BP mmHg)", value=130)
+                with gr.Row():
+                    chol = gr.Number(label="chol (serum cholesterol mg/dl)", value=250)
+                    fbs = gr.Dropdown(label="fbs (>120 mg/dl? 1/0)", choices=[0, 1], value=0)
+                    restecg = gr.Dropdown(label="restecg (0..2)", choices=[0, 1, 2], value=1)
+                    thalach = gr.Number(label="thalach (max heart rate)", value=150)
+                with gr.Row():
+                    exang = gr.Dropdown(label="exang (exercise angina 1/0)", choices=[0, 1], value=0)
+                    oldpeak = gr.Number(label="oldpeak (ST depression)", value=1.0)
+                    slope = gr.Dropdown(label="slope (1..3)", choices=[1, 2, 3], value=1)
+                    ca = gr.Dropdown(label="ca (major vessels 0..3)", choices=[0, 1, 2, 3], value=0)
+                thal = gr.Dropdown(label="thal (3=normal, 6=fixed, 7=reversible)", choices=[3, 6, 7], value=3)
+                with gr.Row():
+                    # Get actual labels from the dataset - only 2 examples
+                    try:
+                        labels = get_example_labels()
+                        choices = []
+                        # Only use first two examples: one no disease, one disease
+                        for i in range(min(2, len(labels))):
+                            label_text = "No Heart Disease" if labels[i] == 0 else "Heart Disease"
+                            choices.append(f"Example {i+1} ({label_text})")
+                        default_choice = choices[0] if choices else "Example 1"
+                    except:
+                        choices = ["Example 1 (No Heart Disease)", "Example 2 (Heart Disease)"]
+                        default_choice = "Example 1 (No Heart Disease)"
+                    ex_selector = gr.Dropdown(
+                        label="Select Example Patient",
+                        choices=choices,
+                        value=default_choice
+                    )
+                    predict_btn = gr.Button("🔍 Predict", variant="primary")
         # RIGHT: outputs
         with gr.Column(scale=55):
+            gr.Markdown("### 📈 Model Predictions")
+            bar_out = gr.Plot(label="Model Predictions Overview")
+            sub_md = gr.Markdown("**Individual Model Results**")
+            table_out = gr.DataFrame(label="All Model Predictions", interactive=False)
+    gr.Markdown("""
+    ## 📋 **Notes**
+    - **Models are trained once at launch** on `data/cleveland.csv` (80/20 split).
+    - **Target is binarized automatically** (0 = no disease, >0 = disease).
+    - **Ensemble uses soft voting** over Decision Tree, k-NN, and Naive Bayes.
+    - **Feature descriptions**:
+      - `age`: Patient age in years
+      - `sex`: Gender (0=female, 1=male)
+      - `cp`: Chest pain type (1-4)
+      - `trestbps`: Resting blood pressure (mmHg)
+      - `chol`: Serum cholesterol (mg/dl)
+      - `fbs`: Fasting blood sugar >120 mg/dl (1=true, 0=false)
+      - `restecg`: Resting ECG results (0-2)
+      - `thalach`: Maximum heart rate achieved
+      - `exang`: Exercise induced angina (1=yes, 0=no)
+      - `oldpeak`: ST depression induced by exercise
+      - `slope`: Slope of peak exercise ST segment (1-3)
+      - `ca`: Number of major vessels colored by fluoroscopy (0-3)
+      - `thal`: Thalassemia (3=normal, 6=fixed defect, 7=reversible defect)
+    """)
+    vlai_template.create_footer()
     # Bind events
     demo.load(fn=init_page, inputs=None, outputs=[status_md, preview, metrics_df])
+    # Auto-fill when example is selected
+    ex_selector.change(
         fn=fill_example,
         inputs=[ex_selector],
         outputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]
     predict_btn.click(
         fn=run_predict,
         inputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal],
+        outputs=[bar_out, sub_md, table_out]
     )
 if __name__ == "__main__":
+    demo.launch(allowed_paths=["static/aivn_logo.png", "static/vlai_logo.png", "static"])

src/heart_disease_core.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# src/heart_disease_core.py
 import os
 import numpy as np
 import pandas as pd
@@ -9,7 +8,7 @@ from sklearn.preprocessing import OneHotEncoder
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
-from sklearn.metrics import roc_auc_score
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.naive_bayes import GaussianNB
@@ -20,48 +19,41 @@ CLEVELAND_FEATURES_ORDER: List[str] = [
     "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
     "thalach", "exang", "oldpeak", "slope", "ca", "thal"
 ]
-TARGET_COL = "target"  # 0: no disease, 1: disease (we binarize if needed)
 CATEGORICAL_CHOICES = {
-    "sex": [0, 1],              # 0: female, 1: male
-    "cp": [0, 1, 2, 3],         # chest pain type
-    "fbs": [0, 1],              # fasting blood sugar > 120 mg/dl (1 true, 0 false)
-    "restecg": [0, 1, 2],       # resting ECG results
-    "exang": [0, 1],            # exercise-induced angina
-    "slope": [0, 1, 2],         # slope of ST
-    "ca": [0, 1, 2, 3],         # number of major vessels (0-3) colored by fluoroscopy
-    "thal": [1, 2, 3],          # 1: normal, 2: fixed defect, 3: reversible defect (commonly 3/6/7 variants exist; we standardize)
 }
 NUMERIC_COLS = ["age", "trestbps", "chol", "thalach", "oldpeak"]
 CATEGORICAL_COLS = ["sex", "cp", "fbs", "restecg", "exang", "slope", "ca", "thal"]
 def _coerce_and_clean(df: pd.DataFrame) -> pd.DataFrame:
-    """Clean '?' and cast numeric; keep only known columns if present."""
     df = df.copy()
-    # Standardize columns if they are present with any case
     colmap = {c.lower(): c for c in df.columns}
     for col in CLEVELAND_FEATURES_ORDER + [TARGET_COL]:
         if col not in df.columns and col in colmap:
-            df[col] = df.pop(colmap[col])  # normalize name
-    # Replace '?' with NaN and cast
     for col in CLEVELAND_FEATURES_ORDER + [TARGET_COL]:
         if col in df.columns:
             df[col] = pd.to_numeric(df[col].replace("?", np.nan), errors="coerce")
-    # Binarize target if it appears as 0..4 (UCI often uses 0 vs 1..4 disease)
     if TARGET_COL in df.columns:
         df[TARGET_COL] = (df[TARGET_COL] > 0).astype(int)
     return df
 def load_cleveland_dataframe(file_path: Optional[str] = None, uploaded_df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
-    """
-    Load the Cleveland Heart Disease dataset.
-    Priority: uploaded_df > file_path > raise.
-    Expect columns CLEVELAND_FEATURES_ORDER + TARGET_COL.
-    """
     if uploaded_df is not None:
         df = _coerce_and_clean(uploaded_df)
         missing = [c for c in CLEVELAND_FEATURES_ORDER + [TARGET_COL] if c not in df.columns]
@@ -71,7 +63,19 @@ def load_cleveland_dataframe(file_path: Optional[str] = None, uploaded_df: Optio
     if file_path is not None and os.path.exists(file_path):
         if file_path.endswith(".csv"):
-            df = pd.read_csv(file_path)
         else:
             df = pd.read_excel(file_path)
         df = _coerce_and_clean(df)
@@ -171,14 +175,29 @@ def fit_all_models(df: pd.DataFrame, test_size: float = 0.2, random_state: int =
     for name, pipe in models.items():
         pipe.fit(X_tr, y_tr)
         if hasattr(pipe, "predict_proba"):
             proba = pipe.predict_proba(X_te)[:, 1]
             auc = roc_auc_score(y_te, proba)
         else:
-            # Fallback if any (unlikely here)
-            pred = pipe.predict(X_te)
-            auc = roc_auc_score(y_te, pred)
-        metrics.append({"model": name, "ROC-AUC": round(float(auc), 4)})
     metrics_df = pd.DataFrame(metrics).sort_values("ROC-AUC", ascending=False, ignore_index=True)
     return models, metrics_df
@@ -210,19 +229,51 @@ def predict_all(models: Dict[str, Pipeline], input_dict: Dict[str, float]) -> Di
 def example_patient(index: int = 0) -> Dict[str, float]:
     """
-    A few realistic examples pulled from common Cleveland-like ranges.
-    You can add more patterns for quick testing.
     """
-    examples = [
-        # Likely negative (no disease)
-        dict(age=45, sex=0, cp=0, trestbps=120, chol=230, fbs=0, restecg=1,
-             thalach=168, exang=0, oldpeak=0.0, slope=2, ca=0, thal=2),
-        # Borderline
-        dict(age=58, sex=1, cp=2, trestbps=138, chol=250, fbs=0, restecg=0,
-             thalach=150, exang=0, oldpeak=1.0, slope=1, ca=1, thal=2),
-        # Likely positive (disease)
-        dict(age=63, sex=1, cp=3, trestbps=145, chol=320, fbs=1, restecg=2,
-             thalach=130, exang=1, oldpeak=2.8, slope=0, ca=2, thal=3),
-    ]
-    index = max(0, min(index, len(examples) - 1))
-    return examples[index]

 import os
 import numpy as np
 import pandas as pd
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
 from sklearn.impute import SimpleImputer
+from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.naive_bayes import GaussianNB
     "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
     "thalach", "exang", "oldpeak", "slope", "ca", "thal"
 ]
+TARGET_COL = "target"
 CATEGORICAL_CHOICES = {
+    "sex": [0, 1],
+    "cp": [0, 1, 2, 3],
+    "fbs": [0, 1],
+    "restecg": [0, 1, 2],
+    "exang": [0, 1],
+    "slope": [0, 1, 2],
+    "ca": [0, 1, 2, 3],
+    "thal": [1, 2, 3],
 }
 NUMERIC_COLS = ["age", "trestbps", "chol", "thalach", "oldpeak"]
 CATEGORICAL_COLS = ["sex", "cp", "fbs", "restecg", "exang", "slope", "ca", "thal"]
 def _coerce_and_clean(df: pd.DataFrame) -> pd.DataFrame:
+    """Clean '?', cast numerics, normalize column names, and binarize target."""
     df = df.copy()
     colmap = {c.lower(): c for c in df.columns}
     for col in CLEVELAND_FEATURES_ORDER + [TARGET_COL]:
         if col not in df.columns and col in colmap:
+            df[col] = df.pop(colmap[col])
     for col in CLEVELAND_FEATURES_ORDER + [TARGET_COL]:
         if col in df.columns:
             df[col] = pd.to_numeric(df[col].replace("?", np.nan), errors="coerce")
     if TARGET_COL in df.columns:
         df[TARGET_COL] = (df[TARGET_COL] > 0).astype(int)
     return df
 def load_cleveland_dataframe(file_path: Optional[str] = None, uploaded_df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
+    """Load Cleveland dataset from upload or file path and ensure schema."""
     if uploaded_df is not None:
         df = _coerce_and_clean(uploaded_df)
         missing = [c for c in CLEVELAND_FEATURES_ORDER + [TARGET_COL] if c not in df.columns]
     if file_path is not None and os.path.exists(file_path):
         if file_path.endswith(".csv"):
+            # Try reading with headers first; fall back to no header
+            try:
+                df = pd.read_csv(file_path)
+                if len(df.columns) == len(CLEVELAND_FEATURES_ORDER) + 1:  # +1 for target
+                    first_row_numeric = all(pd.to_numeric(df.iloc[0], errors='coerce').notna())
+                    if first_row_numeric:
+                        # Re-read without headers and assign names
+                        df = pd.read_csv(file_path, header=None)
+                        df.columns = CLEVELAND_FEATURES_ORDER + [TARGET_COL]
+            except:
+                # Fallback: read without headers
+                df = pd.read_csv(file_path, header=None)
+                df.columns = CLEVELAND_FEATURES_ORDER + [TARGET_COL]
         else:
             df = pd.read_excel(file_path)
         df = _coerce_and_clean(df)
     for name, pipe in models.items():
         pipe.fit(X_tr, y_tr)
+        # Predictions and probabilities
+        y_pred = pipe.predict(X_te)
         if hasattr(pipe, "predict_proba"):
             proba = pipe.predict_proba(X_te)[:, 1]
             auc = roc_auc_score(y_te, proba)
         else:
+            # Fallback if probabilities are not available
+            proba = None
+            auc = roc_auc_score(y_te, y_pred)
+        acc = accuracy_score(y_te, y_pred)
+        prec = precision_score(y_te, y_pred, zero_division=0)
+        rec = recall_score(y_te, y_pred, zero_division=0)
+        f1 = f1_score(y_te, y_pred, zero_division=0)
+        metrics.append({
+            "model": name,
+            "ROC-AUC": round(float(auc), 4),
+            "Accuracy": round(float(acc), 4),
+            "Precision": round(float(prec), 4),
+            "Recall": round(float(rec), 4),
+            "F1": round(float(f1), 4),
+        })
     metrics_df = pd.DataFrame(metrics).sort_values("ROC-AUC", ascending=False, ignore_index=True)
     return models, metrics_df
 def example_patient(index: int = 0) -> Dict[str, float]:
     """
+    Get example patients with specific features provided by user.
+    """
+    # Example 1: No heart disease (37,1,3,130,250,0,0,187,0,3.5,3,0,3,0)
+    # Example 2: Heart disease (56,1,3,130,256,1,2,142,1,0.6,2,1,6,2)
+    if index == 0:
+        # No heart disease example
+        return {
+            "age": 37.0,
+            "sex": 1.0,
+            "cp": 3.0,
+            "trestbps": 130.0,
+            "chol": 250.0,
+            "fbs": 0.0,
+            "restecg": 0.0,
+            "thalach": 187.0,
+            "exang": 0.0,
+            "oldpeak": 3.5,
+            "slope": 3.0,
+            "ca": 0.0,
+            "thal": 3.0
+        }
+    else:
+        # Heart disease example
+        return {
+            "age": 56.0,
+            "sex": 1.0,
+            "cp": 3.0,
+            "trestbps": 130.0,
+            "chol": 256.0,
+            "fbs": 1.0,
+            "restecg": 2.0,
+            "thalach": 142.0,
+            "exang": 1.0,
+            "oldpeak": 0.6,
+            "slope": 2.0,
+            "ca": 1.0,
+            "thal": 6.0
+        }
+def get_example_labels() -> List[int]:
+    """
+    Get the labels for the example patients to display in the UI.
+    Returns list of labels for the specific examples provided.
     """
+    # Example 1: No heart disease (target = 0)
+    # Example 2: Heart disease (target = 2, binarized to 1)
+    return [0, 1]  # First example: no disease, second example: heart disease

vlai_template.py CHANGED Viewed

@@ -1,11 +1,73 @@
 import os, base64
 import gradio as gr
-PROJECT_NAME = "Decision Tree Demo"
 AIO_YEAR = "2025"
-AIO_MODULE = "03"
-# END
 def image_to_base64(image_path: str):
@@ -28,7 +90,7 @@ def create_header():
             gr.HTML(f"""
 <div style="display:flex;justify-content:flex-start;align-items:center;gap:30px;">
     <div>
-        <h1 style="margin-bottom:0; color: #2E7D32; font-size: 2.5em; font-weight: bold;"> {PROJECT_NAME} </h1>
         <h3 style="color: #888; font-style: italic"> AIO{AIO_YEAR}: Module {AIO_MODULE}. </h3>
     </div>
 </div>
@@ -53,90 +115,136 @@ def create_footer():
 """
     return gr.HTML(footer_html)
-custom_css = """
-.gradio-container {
     min-height: 100vh !important;
     width: 100vw !important;
     margin: 0 !important;
     padding: 0px !important;
-    background: linear-gradient(135deg, #E8F5E8 0%, #D4E6D4 50%, #A8D8A8 100%);
     background-size: 600% 600%;
     animation: gradientBG 7s ease infinite;
-}
-@keyframes gradientBG {
-    0% {background-position: 0% 50%;}
-    50% {background-position: 100% 50%;}
-    100% {background-position: 0% 50%;}
-}
 /* Minimize spacing and padding */
-.content-wrap {
     padding: 2px !important;
     margin: 0 !important;
-}
 /* Reduce component spacing */
-.gr-row {
     gap: 5px !important;
     margin: 2px 0 !important;
-}
-.gr-column {
     gap: 4px !important;
     padding: 4px !important;
-}
 /* Accordion optimization */
-.gr-accordion {
     margin: 4px 0 !important;
-}
-.gr-accordion .gr-accordion-content {
     padding: 2px !important;
-}
 /* Form elements spacing */
-.gr-form {
     gap: 2px !important;
-}
 /* Button styling */
-.gr-button {
     margin: 2px 0 !important;
-}
 /* DataFrame optimization */
-.gr-dataframe {
     margin: 4px 0 !important;
-}
 /* Remove horizontal scroll from data preview */
-.gr-dataframe .wrap {
     overflow-x: auto !important;
     max-width: 100% !important;
-}
 /* Plot optimization */
-.gr-plot {
     margin: 4px 0 !important;
-}
 /* Reduce markdown margins */
-.gr-markdown {
     margin: 2px 0 !important;
-}
 /* Footer positioning */
-.sticky-footer {
     position: fixed;
     bottom: 0px;
     left: 0;
     width: 100%;
-    background: #E8F5E8;
     padding: 6px !important;
     box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
     z-index: 1000;
-}
 """

 import os, base64
 import gradio as gr
+# Theming (can be overridden by the host app)
+PRIMARY_COLOR = "#0F6CBD"   # medical calm blue
+ACCENT_COLOR = "#C4314B"    # medical alert red
+SUCCESS_COLOR = "#2E7D32"   # positive/ok
+BG1 = "#F0F7FF"
+BG2 = "#E8F0FA"
+BG3 = "#DDE7F8"
+FONT_FAMILY = "'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, 'Noto Sans', 'Liberation Sans', sans-serif"
+# App metadata (overridable)
+PROJECT_NAME = "Demo Project"
 AIO_YEAR = "2025"
+AIO_MODULE = "00"
+PROJECT_DESCRIPTION = ""
+META_INFO = []  # list of (label, value)
+def set_colors(primary: str = None, accent: str = None, bg1: str = None, bg2: str = None, bg3: str = None):
+    """Allow host app to set theme colors dynamically."""
+    global PRIMARY_COLOR, ACCENT_COLOR, BG1, BG2, BG3, custom_css
+    if primary:
+        PRIMARY_COLOR = primary
+    if accent:
+        ACCENT_COLOR = accent
+    if bg1:
+        BG1 = bg1
+    if bg2:
+        BG2 = bg2
+    if bg3:
+        BG3 = bg3
+    # Rebuild CSS with new colors
+    custom_css = _build_custom_css()
+def set_font(font_family: str):
+    """Allow host app to set a custom font stack (e.g., 'Inter', system fallbacks)."""
+    global FONT_FAMILY, custom_css
+    if font_family and isinstance(font_family, str):
+        FONT_FAMILY = font_family
+        custom_css = _build_custom_css()
+def set_meta(project_name: str = None, year: str = None, module: str = None, description: str = None, meta_items: list = None):
+    """Set project metadata used across the header and info sections."""
+    global PROJECT_NAME, AIO_YEAR, AIO_MODULE, PROJECT_DESCRIPTION, META_INFO
+    if project_name is not None:
+        PROJECT_NAME = project_name
+    if year is not None:
+        AIO_YEAR = year
+    if module is not None:
+        AIO_MODULE = module
+    if description is not None:
+        PROJECT_DESCRIPTION = description
+    if meta_items is not None:
+        META_INFO = meta_items
+def configure(project_name: str = None, year: str = None, module: str = None, description: str = None,
+              colors: dict = None, font_family: str = None, meta_items: list = None):
+    """One-call configuration for meta, theme, and font."""
+    if colors:
+        set_colors(
+            primary=colors.get("primary"),
+            accent=colors.get("accent"),
+            bg1=colors.get("bg1"),
+            bg2=colors.get("bg2"),
+            bg3=colors.get("bg3"),
+        )
+    if font_family:
+        set_font(font_family)
+    set_meta(project_name, year, module, description, meta_items)
 def image_to_base64(image_path: str):
             gr.HTML(f"""
 <div style="display:flex;justify-content:flex-start;align-items:center;gap:30px;">
     <div>
+        <h1 style="margin-bottom:0; color: {PRIMARY_COLOR}; font-size: 2.5em; font-weight: bold;"> {PROJECT_NAME} </h1>
         <h3 style="color: #888; font-style: italic"> AIO{AIO_YEAR}: Module {AIO_MODULE}. </h3>
     </div>
 </div>
 """
     return gr.HTML(footer_html)
+def _build_custom_css() -> str:
+    return f"""
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+.gradio-container {{
     min-height: 100vh !important;
     width: 100vw !important;
     margin: 0 !important;
     padding: 0px !important;
+    background: linear-gradient(135deg, {BG1} 0%, {BG2} 50%, {BG3} 100%);
     background-size: 600% 600%;
     animation: gradientBG 7s ease infinite;
+}}
+/* Global font setup */
+body, .gradio-container, .gr-block, .gr-markdown, .gr-button, .gr-input,
+.gr-dropdown, .gr-number, .gr-plot, .gr-dataframe, .gr-accordion, .gr-form,
+.gr-textbox, .gr-html, table, th, td, label, h1, h2, h3, h4, h5, h6, p, span, div {{
+    font-family: {FONT_FAMILY} !important;
+}}
+@keyframes gradientBG {{
+    0% {{background-position: 0% 50%;}}
+    50% {{background-position: 100% 50%;}}
+    100% {{background-position: 0% 50%;}}
+}}
 /* Minimize spacing and padding */
+.content-wrap {{
     padding: 2px !important;
     margin: 0 !important;
+}}
 /* Reduce component spacing */
+.gr-row {{
     gap: 5px !important;
     margin: 2px 0 !important;
+}}
+.gr-column {{
     gap: 4px !important;
     padding: 4px !important;
+}}
 /* Accordion optimization */
+.gr-accordion {{
     margin: 4px 0 !important;
+}}
+.gr-accordion .gr-accordion-content {{
     padding: 2px !important;
+}}
 /* Form elements spacing */
+.gr-form {{
     gap: 2px !important;
+}}
 /* Button styling */
+.gr-button {{
     margin: 2px 0 !important;
+}}
 /* DataFrame optimization */
+.gr-dataframe {{
     margin: 4px 0 !important;
+}}
 /* Remove horizontal scroll from data preview */
+.gr-dataframe .wrap {{
     overflow-x: auto !important;
     max-width: 100% !important;
+}}
 /* Plot optimization */
+.gr-plot {{
     margin: 4px 0 !important;
+}}
 /* Reduce markdown margins */
+.gr-markdown {{
     margin: 2px 0 !important;
+}}
 /* Footer positioning */
+.sticky-footer {{
     position: fixed;
     bottom: 0px;
     left: 0;
     width: 100%;
+    background: {BG1};
     padding: 6px !important;
     box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
     z-index: 1000;
+}}
 """
+# Initialize CSS using defaults
+custom_css = _build_custom_css()
+def render_info_card(description: str = None, meta_items: list = None, icon: str = "🧠", title: str = "About this demo") -> str:
+    desc = description if description is not None else PROJECT_DESCRIPTION
+    items = meta_items if meta_items is not None else META_INFO
+    meta_html = " · ".join([f"<span><strong>{k}</strong>: {v}</span>" for k, v in items]) if items else ""
+    return f"""
+    <div style="margin: 8px 0 8px 0;">
+      <div style="background:#F5F9FF;border-left:6px solid {PRIMARY_COLOR};padding:14px 16px;border-radius:10px;box-shadow:0 1px 3px rgba(0,0,0,0.06);">
+        <div style="display:flex;gap:14px;align-items:flex-start;">
+          <div style="font-size:22px;">{icon}</div>
+          <div>
+            <div style="font-weight:700;color:{PRIMARY_COLOR};margin-bottom:4px;">{title}</div>
+            <div style="color:#000;font-size:14px;line-height:1.5;">{desc}</div>
+            <div style="margin-top:8px;color:#000;font-size:13px;">{meta_html}</div>
+          </div>
+        </div>
+      </div>
+    </div>
+    """
+def render_disclaimer(text: str, icon: str = "⚠️", title: str = "Educational Use Only") -> str:
+    return f"""
+    <div style=\"margin: 8px 0 6px 0;\">
+      <div style=\"background:#FFF4F4;border-left:6px solid {ACCENT_COLOR};padding:12px 16px;border-radius:8px;box-shadow:0 1px 3px rgba(0,0,0,0.06);\">
+        <div style=\"display:flex;gap:10px;align-items:flex-start;color:#000;\">
+          <span style=\"font-size:20px\">{icon}</span>
+          <div>
+            <div style=\"font-weight:700; margin-bottom:4px;\">{title}</div>
+            <div style=\"font-size:14px; line-height:1.4;\">{text}</div>
+          </div>
+        </div>
+      </div>
+    </div>
+    """