KNN / app.py
Soumyaratna's picture
Upload 2 files
13a088c
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import streamlit as st
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from mlxtend.evaluate import bias_variance_decomp
import numpy as np
# Use a dark background style for plots
plt.style.use('dark_background')
# Function to generate custom data
def generate_data(n_classes, n_samples, pattern='Linear'):
X = np.zeros((n_classes*n_samples, 2))
y = np.zeros(n_classes*n_samples, dtype='uint8')
for j in range(n_classes):
ix = range(n_samples*j, n_samples*(j+1))
if pattern == 'Spiral':
r = np.linspace(0.0, 1, n_samples) # radius
t = np.linspace(j*4, (j+1)*4, n_samples) + np.random.randn(n_samples)*0.2 # theta
X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
elif pattern == 'Linear':
X[ix] = np.random.rand(n_samples, 2) * [j * 2, 1] + np.random.randn(n_samples, 2) * 0.2
elif pattern == 'Concentric Circle':
t = np.linspace(0, 2*np.pi, n_samples)
r = j/float(n_classes) + np.random.randn(n_samples)*0.1
X[ix] = np.c_[r*np.cos(t), r*np.sin(t)]
elif pattern == 'Blob':
t = np.linspace(0, 2*np.pi, n_samples)
r = 0.8 + np.random.randn(n_samples)*0.1
X[ix] = np.c_[r*np.cos(t), r*np.sin(t)] + np.random.randn(n_samples, 2)*0.2
elif pattern == 'Crescent':
half_samples = int(n_samples / 2)
theta = np.linspace(j * np.pi, (j + 2) * np.pi, n_samples)
r = np.linspace(1.0, 2.5, half_samples)
r = np.concatenate((r, np.linspace(2.5, 1.0, half_samples)))
X[ix] = np.c_[r*np.sin(theta), r*np.cos(theta)]
elif pattern == 'Normal':
for j in range(n_classes):
ix = range(n_samples*j, n_samples*(j+1))
X[ix] = np.random.randn(n_samples, 2) * 0.5 + np.random.randn(2) * j * 2
y[ix] = j
return X, y
elif pattern == 'Random':
X[ix] = np.random.randn(n_samples, 2)*0.5 + np.random.randn(2)*j*2
else:
raise ValueError('Invalid pattern: {}'.format(pattern))
y[ix] = j
return X, y
# Function to plot decision boundary and calculate model evaluation metrics
def keffect(k):
X, y = generate_data(num_classes, num_data_points, pattern=pattern)
knn = KNeighborsClassifier(n_neighbors=k)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
knn.fit(X_train,y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
mse, bias, var = bias_variance_decomp(knn, X_train, y_train, X_test, y_test, loss='mse', num_rounds=200, random_seed=1)
# Create a meshgrid for decision boundary plotting
a = np.arange(start=X_train[:,0].min()-1, stop=X_train[:,0].max()+1, step=0.01)
b = np.arange(start=X_train[:,1].min()-1, stop=X_train[:,1].max()+1, step=0.01)
XX, YY = np.meshgrid(a, b)
input_array = np.array([XX.ravel(), YY.ravel()]).T
labels = knn.predict(input_array)
# Plot decision boundary
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
ax.set_facecolor('#FFF')
ax.contourf(XX, YY, labels.reshape(XX.shape), alpha=selected_alpha, cmap='Set1', edgecolors='black')
scatter = ax.scatter(X[:,0], X[:,1], c=y, cmap='Set1', edgecolors='black')
ax.set_title('K-Nearest Neighbors (K = {})'.format(k), color='white')
ax.set_xlabel('Feature 1', color='white')
ax.set_ylabel('Feature 2', color='white')
ax.tick_params(axis='x', colors='white')
ax.tick_params(axis='y', colors='white')
# Remove top and right spines
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.spines['bottom'].set_visible(False)
result = [accuracy, mse, bias, var]
return fig, result
# Function to plot bias-variance tradeoff
def plot_bias_variance_tradeoff(start_value, end_value):
X, y = generate_data(num_classes, num_data_points, pattern=pattern)
ks = range(start_value, end_value)
mse, bias, var = [], [], []
for k in ks:
knn = KNeighborsClassifier(n_neighbors=k)
mse_k, bias_k, var_k = bias_variance_decomp(knn, X, y, X, y, loss='mse', num_rounds=200, random_seed=1)
mse.append(mse_k)
bias.append(bias_k)
var.append(var_k)
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
ax.plot(ks, mse, label='MSE', color='crimson')
ax.plot(ks, bias, label='Bias', color='magenta')
ax.plot(ks, var, label='Variance', color='cyan')
ax.legend()
ax.set_title('Bias-Variance Tradeoff', color='white')
ax.set_xlabel('Number of Neighbors (K)', color='white')
ax.set_ylabel('Error', color='white')
ax.tick_params(axis='x', colors='white')
ax.tick_params(axis='y', colors='white')
ax.set_xticks(list(range(start_value, end_value, 5)) + [end_value])
ax.set_facecolor('#000')
return fig
# Create a streamlit app to interact with the functions
st.set_page_config(page_title='K-Nearest Neighbors', layout='wide')
st.title('K-Nearest Neighbors')
with st.sidebar:
# Set up Streamlit sidebar
st.sidebar.header("Plot Settings")
[fig_width, fig_height] = [st.sidebar.slider(label, 1, 20, default) for label, default in [("Figure Width", 10), ("Figure Height", 6)]]
selected_alpha = st.sidebar.slider('Select the transparency of the decision boundary', min_value=0.0, max_value=1.0, value=0.5, step=0.1)
st.write("---")
st.sidebar.header("Data Settings")
pattern = st.selectbox('Select a pattern', ['Linear', 'Concentric Circle', 'Spiral', 'Blob', 'Crescent', 'Normal', 'Random'])
num_classes = st.slider('Select the number of classes', min_value=2, max_value=10, value=2, step=1)
num_data_points = st.slider('Select the number of data points', min_value=20, max_value=200, value=40, step=20)
st.write("---")
st.sidebar.header("Select the number of neighbors (K)")
selected_k = st.slider(label="", min_value=1, max_value=50, value=3, step=1)
st.write("---")
st.sidebar.header("Select a range for bias-variance tradeoff")
range_slider = st.slider(
label="",
min_value=1,
max_value=50,
value=(1, 20),
step=1
)
start_value, end_value = range_slider
st.write("---")
if st.button('Get Decision Boundary'):
# st.write('Decision Boundary')
fig, result = keffect(min(selected_k, num_data_points))
st.write(fig)
st.write('Model evaluation metrics')
st.write('Accuracy:', round(result[0], 3))
st.write('MSE:', round(result[1], 3))
st.write('Bias:', round(result[2], 3))
st.write('Variance:', round(result[3], 3))
if st.button('Get Bias-Variance Tradeoff'):
# st.write('Bias-Variance Tradeoff')
fig2 = plot_bias_variance_tradeoff(min(start_value, num_data_points), min(end_value, num_data_points))
st.write(fig2)