Spaces:

kdevoe
/

ASL_MobileNetV3

Sleeping

App Files Files Community

ASL_MobileNetV3 / app.py

kdevoe

Fixing input size

f4253f8 verified about 1 year ago

raw

history blame contribute delete

3.21 kB

	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	import cv2
	import tensorflow as tf
	from tensorflow.keras.models import load_model

	IMG_HEIGHT = 96
	IMG_WIDTH = 96

	# Load the saved Keras model
	model = load_model("model_01.keras")

	# Define the labels for ASL classes
	labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
	'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
	'U', 'V', 'W', 'X', 'Y']

	def preprocess_frame(frame):
	"""Preprocess the video frame for the ASL model."""
	# Convert the frame to a TensorFlow tensor
	if isinstance(frame, np.ndarray):
	frame = tf.convert_to_tensor(frame, dtype=tf.float32)
	# Reshape to add channel dimension if grayscale
	if frame.ndim == 2: # If the input is grayscale
	frame = tf.expand_dims(frame, axis=-1)
	frame = tf.image.grayscale_to_rgb(frame)

	# Ensure the frame has 3 channels (RGB)
	if frame.shape[-1] == 1: # Grayscale image
	frame = tf.image.grayscale_to_rgb(frame)

	# First scale down to dataset dimensions (if applicable)
	frame = tf.image.resize(frame, [28, 28]) # Resize to smaller dimensions for consistency

	# Resize to the target model input dimensions
	frame = tf.image.resize(frame, [IMG_HEIGHT, IMG_WIDTH])

	# Normalize pixel values to [0, 1]
	frame = tf.cast(frame, tf.float32) / 255.0

	# Add batch dimension for model input
	frame = tf.expand_dims(frame, axis=0)

	return frame

	def preprocess_frame_cnn(frame):
	img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	img = np.expand_dims(cv2.resize(img, (64, 64)), axis = 0)
	return img


	def predict_asl(frame):
	"""Predict the ASL sign and return the label and probabilities."""
	processed_frame = preprocess_frame(frame)
	predictions = model.predict(processed_frame) # Predict probabilities
	predicted_label = labels[np.argmax(predictions)] # Get the class with the highest probability

	# Generate a bar chart for probabilities
	fig, ax = plt.subplots(figsize=(6, 4))
	ax.bar(labels, predictions[0])
	ax.set_title("Class Probabilities")
	ax.set_ylabel("Probability")
	ax.set_xlabel("ASL Classes")
	ax.set_xticks(range(len(labels)))
	ax.set_xticklabels(labels, rotation=45)
	plt.tight_layout()

	return predicted_label, fig

	css = """.my-group {max-width: 500px !important; max-height: 500px !important;}
	.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""

	with gr.Blocks(css=css) as demo:
	with gr.Row():
	gr.Markdown("# ASL Recognition App")
	with gr.Row():
	with gr.Column(scale=1):
	input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="Webcam Input")
	with gr.Column(scale=1):
	output_label = gr.Label(label="Predicted ASL Sign")
	output_plot = gr.Plot(label="Class Probabilities")

	def gradio_pipeline(frame):
	predicted_label, fig = predict_asl(frame)
	return predicted_label, fig

	input_img.stream(gradio_pipeline, [input_img], [output_label, output_plot], time_limit=300, stream_every=0.5)

	demo.launch()