from tensorflow_docs.vis import embed from tensorflow import keras from imutils import paths import tensorflow as tf import pandas as pd import numpy as np import imageio import cv2 import os from tensorflow.keras.models import model_from_json import numpy import gradio as gr from googletrans import Translator translator = Translator() train_df = pd.read_csv("train.csv") label_processor = keras.layers.StringLookup(num_oov_indices=0, vocabulary=np.unique(train_df["tag"])) print(label_processor.get_vocabulary()) labels = train_df["tag"].values labels = label_processor(labels[..., None]).numpy() IMG_SIZE = 224 BATCH_SIZE = 64 EPOCHS = 100 MAX_SEQ_LENGTH = 20 NUM_FEATURES = 2048 json_file = open('model.json', 'r') sequence_model_json = json_file.read() json_file.close() sequence_model = model_from_json(sequence_model_json) # load weights into new model sequence_model.load_weights("model.h5") def crop_center_square(frame): y, x = frame.shape[0:2] min_dim = min(y, x) start_x = (x // 2) - (min_dim // 2) start_y = (y // 2) - (min_dim // 2) return frame[start_y : start_y + min_dim, start_x : start_x + min_dim] def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)): cap = cv2.VideoCapture(path) frames = [] try: while True: ret, frame = cap.read() if not ret: break frame = crop_center_square(frame) frame = cv2.resize(frame, resize) frame = frame[:, :, [2, 1, 0]] frames.append(frame) if len(frames) == max_frames: break finally: cap.release() return np.array(frames) def create_clips(video_path, interval): interval=int(interval) NoOfClips=0 cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) duration = frame_count / fps for i in range(0, int(duration), interval): NoOfClips+=1 start_time = i end_time = min(i+interval, duration) start_frame = int(start_time * fps) end_frame = int(end_time * fps) cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) fourcc = cv2.VideoWriter_fourcc(*'mp4v') output_path = f"clip_{NoOfClips}.mp4" out = cv2.VideoWriter(output_path, fourcc, fps, (int(cap.get(3)), int(cap.get(4)))) for j in range(start_frame, end_frame): ret, frame = cap.read() if ret: out.write(frame) else: break out.release() cap.release() return NoOfClips def build_feature_extractor(): feature_extractor = keras.applications.InceptionV3( weights="imagenet", include_top=False, pooling="avg", input_shape=(IMG_SIZE, IMG_SIZE, 3), ) preprocess_input = keras.applications.inception_v3.preprocess_input inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3)) preprocessed = preprocess_input(inputs) outputs = feature_extractor(preprocessed) return keras.Model(inputs, outputs, name="feature_extractor") feature_extractor = build_feature_extractor() def prepare_single_video(frames): frames = frames[None, ...] frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool") frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32") for i, batch in enumerate(frames): video_length = batch.shape[0] length = min(MAX_SEQ_LENGTH, video_length) for j in range(length): frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :]) frame_mask[i, :length] = 1 # 1 = not masked, 0 = masked return frame_features, frame_mask def sequence_prediction(path): class_vocab = label_processor.get_vocabulary() frames = load_video(os.path.join("test", path)) frame_features, frame_mask = prepare_single_video(frames) probabilities = sequence_model.predict([frame_features, frame_mask])[0] for i in np.argsort(probabilities)[::-1]: #if probabilities[i]* 100>0.75: return class_vocab[i] print(f" {class_vocab[i]}: {probabilities[i] * 100:5.2f}%") return class_vocab[0] def SignTotext(video,interval): NoofClips=create_clips(video,interval) Text=[] for i in range(NoofClips): Text.append(sequence_prediction(f"clip_{i}.mp4")) EnglishText=" ".join(Text) translated_text = translator.translate(EnglishText, dest='ur') return EnglishText,translated_text.text demo=gr.Interface(fn=SignTotext, inputs=["video",gr.inputs.Number(label="Enter Duration in which one sign is completed")], outputs=[gr.inputs.Textbox(label="English Text"),gr.inputs.Textbox(label="Urdu text")], title="Urdu Sign to Video") demo.launch(debug=True)