import gradio as gr import cv2 import numpy as np import torch from torchvision import transforms import torch.nn as nn from ultralytics import YOLO import os import sys # --- 1. OCR Model (CRNN) --- class CRNN(nn.Module): def __init__(self, vocab_size, hidden_size=256): super(CRNN, self).__init__() self.cnn = nn.Sequential( nn.Conv2d(3, 64, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), nn.Conv2d(64, 128, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d(2, 2), nn.Conv2d(128, 256, 3, 1, 1), nn.BatchNorm2d(256), nn.ReLU(True), nn.Conv2d(256, 256, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d((2, 1)), nn.Conv2d(256, 512, 3, 1, 1), nn.BatchNorm2d(512), nn.ReLU(True), nn.Conv2d(512, 512, 3, 1, 1), nn.ReLU(True), nn.MaxPool2d((2, 1)), nn.Conv2d(512, 512, 2, 1, 0), nn.BatchNorm2d(512), nn.ReLU(True) ) with torch.no_grad(): dummy_input = torch.zeros(1, 3, 32, 128) cnn_out = self.cnn(dummy_input) b, c, h, w = cnn_out.size() feature_size = c * h self.rnn = nn.LSTM(feature_size, hidden_size, bidirectional=True, num_layers=2, batch_first=True, dropout=0.5) self.classifier = nn.Linear(hidden_size * 2, vocab_size) def forward(self, x): conv = self.cnn(x) b, c, h, w = conv.size() conv = conv.contiguous().view(b, c * h, w) conv = conv.permute(0, 2, 1) rnn_out, _ = self.rnn(conv) output = self.classifier(rnn_out) return output def decode_prediction(preds, idx_map): preds = torch.softmax(preds, 2) preds = torch.argmax(preds, 2) preds = preds.detach().cpu().numpy() decoded_texts = [] for pred in preds: sequence = [] for i in range(len(pred)): char_index = pred[i] if char_index != 0 and (i == 0 or char_index != pred[i-1]): sequence.append(idx_map.get(char_index, '?')) decoded_texts.append("".join(sequence)) return decoded_texts class OCRModel: def __init__(self, model_path): self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') try: checkpoint = torch.load(model_path, map_location=self.device) except FileNotFoundError: print(f"Error: OCR Model file not found at '{model_path}'") sys.exit(1) char_map = checkpoint['char_map'] self.idx_map = {v: k for k, v in char_map.items()} vocab_size = len(char_map) + 1 self.model = CRNN(vocab_size).to(self.device) self.model.load_state_dict(checkpoint['model_state_dict']) self.model.eval() self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((32, 128)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) print(f"OCR Model loaded successfully from {model_path} on {self.device}.") def predict(self, image_np): image_tensor = self.transform(image_np).unsqueeze(0).to(self.device) with torch.no_grad(): output = self.model(image_tensor) predicted_text = decode_prediction(output, self.idx_map) return predicted_text[0] # --- 2. Number Plate Detector (YOLO) --- class NumberPlateDetector: def __init__(self, model_path, confidence_threshold=0.5): self.model = YOLO(model_path) self.confidence_threshold = confidence_threshold print(f"YOLO Detector loaded successfully from {model_path}.") def detect_plates(self, image): # Modified to accept an image array results = self.model(image, conf=self.confidence_threshold) detections = [] for result in results: boxes = result.boxes if boxes is not None: for box in boxes: x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy()) confidence = box.conf[0].cpu().numpy() detections.append({'bbox': (x1, y1, x2, y2), 'confidence': float(confidence)}) return detections # --- 3. Gradio Pipeline --- # Load models once YOLO_MODEL_PATH = 'models/yolov8_8e.pt' OCR_MODEL_PATH = 'models/best_ocr_model_50.pth' detector = NumberPlateDetector(YOLO_MODEL_PATH) ocr = OCRModel(OCR_MODEL_PATH) def recognize_plate(image): """ Main function for the Gradio interface. Takes an uploaded image, performs detection and OCR, and returns the annotated image. """ if image is None: return None # Gradio provides images in RGB format, but OpenCV works with BGR. # The YOLO model was likely trained on BGR images, so we convert. original_image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) detections = detector.detect_plates(original_image_bgr) print(f"\nFound {len(detections)} potential number plates.") annotated_image = image.copy() # Work with the RGB image for annotation for i, det in enumerate(detections): x1, y1, x2, y2 = det['bbox'] # Crop the number plate from the original image (use the RGB version) padding = 5 plate_crop = image[max(0, y1-padding):min(image.shape[0], y2+padding), max(0, x1-padding):min(image.shape[1], x2+padding)] if plate_crop.size == 0: print(f" - Skipping detection {i+1} due to empty crop.") continue # OCR model expects an RGB image, which `plate_crop` already is. plate_text = ocr.predict(plate_crop) print(f" - Detection {i+1}: BBox={det['bbox']}, Conf={det['confidence']:.2f}, Predicted Text='{plate_text}'") # Draw bounding box and predicted text on the image cv2.rectangle(annotated_image, (x1, y1), (x2, y2), (0, 255, 0), 3) label = f"{plate_text}" # Position the label above the bounding box label_y = y1 - 15 if y1 - 15 > 15 else y1 + 25 cv2.putText(annotated_image, label, (x1, label_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 3) return annotated_image # --- 4. Launch Gradio Interface --- if __name__ == "__main__": iface = gr.Interface( fn=recognize_plate, inputs=gr.Image(type="numpy", label="Upload an Image"), outputs=gr.Image(type="numpy", label="Result"), title="Number Plate Recognition", description="Upload an image to detect and read number plates. The system uses YOLOv8 for detection and a CRNN model for OCR.", examples=[ ['images/image1.jpg'], ['images/image2.jpg'], ['images/image3.jpg'] ] ) iface.launch()