Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import cv2 | |
| import tensorflow as tf | |
| from tensorflow.keras.models import load_model | |
| IMG_HEIGHT = 96 | |
| IMG_WIDTH = 96 | |
| # Load the saved Keras model | |
| model = load_model("model_01.keras") | |
| # Define the labels for ASL classes | |
| labels = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', | |
| 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', | |
| 'U', 'V', 'W', 'X', 'Y'] | |
| def preprocess_frame(frame): | |
| """Preprocess the video frame for the ASL model.""" | |
| # Convert the frame to a TensorFlow tensor | |
| if isinstance(frame, np.ndarray): | |
| frame = tf.convert_to_tensor(frame, dtype=tf.float32) | |
| # Reshape to add channel dimension if grayscale | |
| if frame.ndim == 2: # If the input is grayscale | |
| frame = tf.expand_dims(frame, axis=-1) | |
| frame = tf.image.grayscale_to_rgb(frame) | |
| # Ensure the frame has 3 channels (RGB) | |
| if frame.shape[-1] == 1: # Grayscale image | |
| frame = tf.image.grayscale_to_rgb(frame) | |
| # First scale down to dataset dimensions (if applicable) | |
| frame = tf.image.resize(frame, [28, 28]) # Resize to smaller dimensions for consistency | |
| # Resize to the target model input dimensions | |
| frame = tf.image.resize(frame, [IMG_HEIGHT, IMG_WIDTH]) | |
| # Normalize pixel values to [0, 1] | |
| frame = tf.cast(frame, tf.float32) / 255.0 | |
| # Add batch dimension for model input | |
| frame = tf.expand_dims(frame, axis=0) | |
| return frame | |
| def preprocess_frame_cnn(frame): | |
| img = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
| img = np.expand_dims(cv2.resize(img, (64, 64)), axis = 0) | |
| return img | |
| def predict_asl(frame): | |
| """Predict the ASL sign and return the label and probabilities.""" | |
| processed_frame = preprocess_frame(frame) | |
| predictions = model.predict(processed_frame) # Predict probabilities | |
| predicted_label = labels[np.argmax(predictions)] # Get the class with the highest probability | |
| # Generate a bar chart for probabilities | |
| fig, ax = plt.subplots(figsize=(6, 4)) | |
| ax.bar(labels, predictions[0]) | |
| ax.set_title("Class Probabilities") | |
| ax.set_ylabel("Probability") | |
| ax.set_xlabel("ASL Classes") | |
| ax.set_xticks(range(len(labels))) | |
| ax.set_xticklabels(labels, rotation=45) | |
| plt.tight_layout() | |
| return predicted_label, fig | |
| css = """.my-group {max-width: 500px !important; max-height: 500px !important;} | |
| .my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" | |
| with gr.Blocks(css=css) as demo: | |
| with gr.Row(): | |
| gr.Markdown("# ASL Recognition App") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True, label="Webcam Input") | |
| with gr.Column(scale=1): | |
| output_label = gr.Label(label="Predicted ASL Sign") | |
| output_plot = gr.Plot(label="Class Probabilities") | |
| def gradio_pipeline(frame): | |
| predicted_label, fig = predict_asl(frame) | |
| return predicted_label, fig | |
| input_img.stream(gradio_pipeline, [input_img], [output_label, output_plot], time_limit=300, stream_every=0.5) | |
| demo.launch() | |