Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from ultralytics import YOLO | |
| from PIL import Image | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| from qreader import QReader | |
| import cv2 | |
| import json | |
| import ast | |
| from datetime import datetime | |
| processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-stage1") | |
| model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-stage1") | |
| qreader = QReader() | |
| def yolo_and_trocr(image_input, save): | |
| try: | |
| # YOLO instanciated from the trained model | |
| yolo = YOLO('best.pt') | |
| # Creating results | |
| results = yolo(image_input, conf=0.5, iou=0.7) | |
| res = results[0].plot()[:, :, [2,1,0]] | |
| boxes = results[0].boxes.xyxy | |
| image = Image.fromarray(res) | |
| texts = [] | |
| # Texts and cropped images get saved in the lists. | |
| for i in boxes: | |
| img_cropped = image.crop(tuple(i.tolist())) | |
| # TrOCR model is run to detect text in image | |
| pixel_values = processor(img_cropped, return_tensors="pt").pixel_values | |
| generated_ids = model.generate(pixel_values) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| texts.append(generated_text) | |
| text = texts[0] | |
| text = f"{text[:5]}.{text[5:]}" # fix decimals | |
| # Reading the QR code from the image | |
| qr_code = cv2.cvtColor(cv2.imread(image_input), cv2.COLOR_BGR2RGB) | |
| decoded_text = qreader.detect_and_decode(image=qr_code) | |
| if len(decoded_text) == 0: | |
| decoded_text = "No QR code detected" | |
| else: | |
| decoded_text = decoded_text[0] | |
| # Saving the info in a dictionary for output if requested | |
| if save: | |
| data_dict = ast.literal_eval(decoded_text) | |
| file_path = f"{data_dict['Address']}.json" | |
| with open(file_path, "w") as file: | |
| current_datetime = datetime.now() | |
| timestamp = current_datetime.strftime("%Y-%m-%d %H:%M:%S") | |
| data_dict['Last_Reading'] = {f'{timestamp}': f'{text}'} | |
| json.dump(data_dict, file, indent=4) | |
| return image, text, decoded_text, file_path | |
| # Outputing the image, reading, and QR code info without saving the data | |
| else: | |
| return image, text, decoded_text, None | |
| except Exception as e: | |
| return "", f"Your input is invalid: {str(e)}", f"Try Again: Make sure the meter and QR code are clearly captured" | |
| app = gr.Interface( | |
| fn=yolo_and_trocr, | |
| inputs=[gr.File(label="Input: Water Meter Image"), | |
| gr.Checkbox(label="Save")], | |
| outputs=[gr.Image(label='Output: Water Meter Photo'), | |
| gr.Textbox(label="Output: Water Meter Reading"), | |
| gr.Textbox(label="Output: QR Code Detection"), | |
| gr.File(label="Output: Saved Data")], | |
| title="Water Meter Reading with YOLO and OCR" | |
| ) | |
| app.launch() |