import gradio as gr import matplotlib.pyplot as plt import numpy as np import cv2 import tensorflow as tf from tensorflow.keras.models import load_model IMG_HEIGHT = 96 IMG_WIDTH = 96 # Load the saved Keras model model = load_model("model_01.keras") # Define the labels for ASL classes labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y'] def preprocess_frame(frame): """Preprocess the video frame for the ASL model.""" # Convert the frame to a TensorFlow tensor if isinstance(frame, np.ndarray): frame = tf.convert_to_tensor(frame, dtype=tf.float32) # Reshape to add channel dimension if grayscale if frame.ndim == 2: # If the input is grayscale frame = tf.expand_dims(frame, axis=-1) frame = tf.image.grayscale_to_rgb(frame) # Ensure the frame has 3 channels (RGB) if frame.shape[-1] == 1: # Grayscale image frame = tf.image.grayscale_to_rgb(frame) # First scale down to dataset dimensions (if applicable) frame = tf.image.resize(frame, [28, 28]) # Resize to smaller dimensions for consistency # Resize to the target model input dimensions frame = tf.image.resize(frame, [IMG_HEIGHT, IMG_WIDTH]) # Normalize pixel values to [0, 1] frame = tf.cast(frame, tf.float32) / 255.0 # Add batch dimension for model input frame = tf.expand_dims(frame, axis=0) return frame def preprocess_frame_cnn(frame): img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) img = np.expand_dims(cv2.resize(img, (64, 64)), axis = 0) return img def predict_asl(frame): """Predict the ASL sign and return the label and probabilities.""" processed_frame = preprocess_frame(frame) predictions = model.predict(processed_frame) # Predict probabilities predicted_label = labels[np.argmax(predictions)] # Get the class with the highest probability # Generate a bar chart for probabilities fig, ax = plt.subplots(figsize=(6, 4)) ax.bar(labels, predictions[0]) ax.set_title("Class Probabilities") ax.set_ylabel("Probability") ax.set_xlabel("ASL Classes") ax.set_xticks(range(len(labels))) ax.set_xticklabels(labels, rotation=45) plt.tight_layout() return predicted_label, fig css = """.my-group {max-width: 500px !important; max-height: 500px !important;} .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" with gr.Blocks(css=css) as demo: with gr.Row(): gr.Markdown("# ASL Recognition App") with gr.Row(): with gr.Column(scale=1): input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="Webcam Input") with gr.Column(scale=1): output_label = gr.Label(label="Predicted ASL Sign") output_plot = gr.Plot(label="Class Probabilities") def gradio_pipeline(frame): predicted_label, fig = predict_asl(frame) return predicted_label, fig input_img.stream(gradio_pipeline, [input_img], [output_label, output_plot], time_limit=300, stream_every=0.5) demo.launch()