Spaces:

Soumyaratna
/

KNN

Build error

App Files Files Community

KNN / app.py

Soumyaratna

Upload 2 files

13a088c about 2 years ago

raw

history blame contribute delete

7.14 kB

	import os
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import streamlit as st
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import accuracy_score
	from mlxtend.evaluate import bias_variance_decomp
	import numpy as np

	# Use a dark background style for plots
	plt.style.use('dark_background')

	# Function to generate custom data
	def generate_data(n_classes, n_samples, pattern='Linear'):
	X = np.zeros((n_classes*n_samples, 2))
	y = np.zeros(n_classes*n_samples, dtype='uint8')
	for j in range(n_classes):
	ix = range(n_samplesj, n_samples(j+1))
	if pattern == 'Spiral':
	r = np.linspace(0.0, 1, n_samples) # radius
	t = np.linspace(j4, (j+1)4, n_samples) + np.random.randn(n_samples)*0.2 # theta
	X[ix] = np.c_[rnp.sin(t), rnp.cos(t)]
	elif pattern == 'Linear':
	X[ix] = np.random.rand(n_samples, 2) * [j * 2, 1] + np.random.randn(n_samples, 2) * 0.2
	elif pattern == 'Concentric Circle':
	t = np.linspace(0, 2*np.pi, n_samples)
	r = j/float(n_classes) + np.random.randn(n_samples)*0.1
	X[ix] = np.c_[rnp.cos(t), rnp.sin(t)]
	elif pattern == 'Blob':
	t = np.linspace(0, 2*np.pi, n_samples)
	r = 0.8 + np.random.randn(n_samples)*0.1
	X[ix] = np.c_[rnp.cos(t), rnp.sin(t)] + np.random.randn(n_samples, 2)*0.2
	elif pattern == 'Crescent':
	half_samples = int(n_samples / 2)
	theta = np.linspace(j * np.pi, (j + 2) * np.pi, n_samples)
	r = np.linspace(1.0, 2.5, half_samples)
	r = np.concatenate((r, np.linspace(2.5, 1.0, half_samples)))
	X[ix] = np.c_[rnp.sin(theta), rnp.cos(theta)]
	elif pattern == 'Normal':
	for j in range(n_classes):
	ix = range(n_samplesj, n_samples(j+1))
	X[ix] = np.random.randn(n_samples, 2) * 0.5 + np.random.randn(2) * j * 2
	y[ix] = j
	return X, y
	elif pattern == 'Random':
	X[ix] = np.random.randn(n_samples, 2)0.5 + np.random.randn(2)j*2
	else:
	raise ValueError('Invalid pattern: {}'.format(pattern))
	y[ix] = j
	return X, y

	# Function to plot decision boundary and calculate model evaluation metrics
	def keffect(k):
	X, y = generate_data(num_classes, num_data_points, pattern=pattern)

	knn = KNeighborsClassifier(n_neighbors=k)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
	knn.fit(X_train,y_train)
	y_pred = knn.predict(X_test)

	accuracy = accuracy_score(y_test, y_pred)
	mse, bias, var = bias_variance_decomp(knn, X_train, y_train, X_test, y_test, loss='mse', num_rounds=200, random_seed=1)

	# Create a meshgrid for decision boundary plotting
	a = np.arange(start=X_train[:,0].min()-1, stop=X_train[:,0].max()+1, step=0.01)
	b = np.arange(start=X_train[:,1].min()-1, stop=X_train[:,1].max()+1, step=0.01)
	XX, YY = np.meshgrid(a, b)
	input_array = np.array([XX.ravel(), YY.ravel()]).T
	labels = knn.predict(input_array)

	# Plot decision boundary
	fig, ax = plt.subplots(figsize=(fig_width, fig_height))
	ax.set_facecolor('#FFF')
	ax.contourf(XX, YY, labels.reshape(XX.shape), alpha=selected_alpha, cmap='Set1', edgecolors='black')
	scatter = ax.scatter(X[:,0], X[:,1], c=y, cmap='Set1', edgecolors='black')
	ax.set_title('K-Nearest Neighbors (K = {})'.format(k), color='white')
	ax.set_xlabel('Feature 1', color='white')
	ax.set_ylabel('Feature 2', color='white')
	ax.tick_params(axis='x', colors='white')
	ax.tick_params(axis='y', colors='white')

	# Remove top and right spines
	ax.spines['right'].set_visible(False)
	ax.spines['left'].set_visible(False)
	ax.spines['top'].set_visible(False)
	ax.spines['bottom'].set_visible(False)

	result = [accuracy, mse, bias, var]
	return fig, result

	# Function to plot bias-variance tradeoff
	def plot_bias_variance_tradeoff(start_value, end_value):
	X, y = generate_data(num_classes, num_data_points, pattern=pattern)

	ks = range(start_value, end_value)
	mse, bias, var = [], [], []
	for k in ks:
	knn = KNeighborsClassifier(n_neighbors=k)
	mse_k, bias_k, var_k = bias_variance_decomp(knn, X, y, X, y, loss='mse', num_rounds=200, random_seed=1)
	mse.append(mse_k)
	bias.append(bias_k)
	var.append(var_k)

	fig, ax = plt.subplots(figsize=(fig_width, fig_height))
	ax.plot(ks, mse, label='MSE', color='crimson')
	ax.plot(ks, bias, label='Bias', color='magenta')
	ax.plot(ks, var, label='Variance', color='cyan')
	ax.legend()
	ax.set_title('Bias-Variance Tradeoff', color='white')
	ax.set_xlabel('Number of Neighbors (K)', color='white')
	ax.set_ylabel('Error', color='white')
	ax.tick_params(axis='x', colors='white')
	ax.tick_params(axis='y', colors='white')
	ax.set_xticks(list(range(start_value, end_value, 5)) + [end_value])
	ax.set_facecolor('#000')

	return fig

	# Create a streamlit app to interact with the functions
	st.set_page_config(page_title='K-Nearest Neighbors', layout='wide')
	st.title('K-Nearest Neighbors')

	with st.sidebar:
	# Set up Streamlit sidebar
	st.sidebar.header("Plot Settings")
	[fig_width, fig_height] = [st.sidebar.slider(label, 1, 20, default) for label, default in [("Figure Width", 10), ("Figure Height", 6)]]
	selected_alpha = st.sidebar.slider('Select the transparency of the decision boundary', min_value=0.0, max_value=1.0, value=0.5, step=0.1)

	st.write("---")
	st.sidebar.header("Data Settings")
	pattern = st.selectbox('Select a pattern', ['Linear', 'Concentric Circle', 'Spiral', 'Blob', 'Crescent', 'Normal', 'Random'])
	num_classes = st.slider('Select the number of classes', min_value=2, max_value=10, value=2, step=1)
	num_data_points = st.slider('Select the number of data points', min_value=20, max_value=200, value=40, step=20)

	st.write("---")
	st.sidebar.header("Select the number of neighbors (K)")
	selected_k = st.slider(label="", min_value=1, max_value=50, value=3, step=1)

	st.write("---")
	st.sidebar.header("Select a range for bias-variance tradeoff")
	range_slider = st.slider(
	label="",
	min_value=1,
	max_value=50,
	value=(1, 20),
	step=1
	)
	start_value, end_value = range_slider

	st.write("---")

	if st.button('Get Decision Boundary'):
	# st.write('Decision Boundary')
	fig, result = keffect(min(selected_k, num_data_points))
	st.write(fig)

	st.write('Model evaluation metrics')
	st.write('Accuracy:', round(result[0], 3))
	st.write('MSE:', round(result[1], 3))
	st.write('Bias:', round(result[2], 3))
	st.write('Variance:', round(result[3], 3))

	if st.button('Get Bias-Variance Tradeoff'):
	# st.write('Bias-Variance Tradeoff')
	fig2 = plot_bias_variance_tradeoff(min(start_value, num_data_points), min(end_value, num_data_points))
	st.write(fig2)