sameernotes commited on
Commit
30b75f2
·
verified ·
1 Parent(s): 87b6a68

Upload 6 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hindi_ocr_model.keras filter=lfs diff=lfs merge=lfs -text
37
+ NotoSansDevanagari-Regular.ttf filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
NotoSansDevanagari-Regular.ttf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385e78e6359a9d88a0f243d53b1209d7548361ba2194e2b9ec779bcaa7e8949d
3
+ size 219212
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import sys
4
+ import cv2
5
+ import base64
6
+ import pickle
7
+ import numpy as np
8
+ import tensorflow as tf
9
+ import matplotlib.pyplot as plt
10
+ import matplotlib.font_manager as fm
11
+ import tempfile
12
+ import sakshi_ocr
13
+
14
+ from fastapi import FastAPI, File, UploadFile, HTTPException
15
+ from fastapi.responses import HTMLResponse, JSONResponse
16
+
17
+ # Define paths to your assets (update these if necessary)
18
+ MODEL_PATH = 'hindi_ocr_model.keras'
19
+ ENCODER_PATH = 'label_encoder.pkl'
20
+ FONT_PATH = 'NotoSansDevanagari-Regular.ttf'
21
+
22
+ # Load custom font if available
23
+ if os.path.exists(FONT_PATH):
24
+ fm.fontManager.addfont(FONT_PATH)
25
+ plt.rcParams['font.family'] = 'Noto Sans Devanagari'
26
+ else:
27
+ print("Custom font not found. Using default font.")
28
+
29
+ # Load the OCR model
30
+ def load_model():
31
+ if not os.path.exists(MODEL_PATH):
32
+ raise FileNotFoundError(f"Model file not found at {MODEL_PATH}")
33
+ return tf.keras.models.load_model(MODEL_PATH)
34
+
35
+ # Load the label encoder
36
+ def load_label_encoder():
37
+ if not os.path.exists(ENCODER_PATH):
38
+ raise FileNotFoundError(f"Label encoder file not found at {ENCODER_PATH}")
39
+ with open(ENCODER_PATH, 'rb') as f:
40
+ return pickle.load(f)
41
+
42
+ # Global loading so they persist across requests
43
+ model = load_model()
44
+ label_encoder = load_label_encoder()
45
+
46
+ # Function for word detection
47
+ def detect_words(image):
48
+ # Assume input is a grayscale image
49
+ _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
50
+ kernel = np.ones((3, 3), np.uint8)
51
+ dilated = cv2.dilate(binary, kernel, iterations=2)
52
+ contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
53
+
54
+ word_img = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
55
+ word_count = 0
56
+ for contour in contours:
57
+ x, y, w, h = cv2.boundingRect(contour)
58
+ if w > 10 and h > 10:
59
+ cv2.rectangle(word_img, (x, y), (x+w, y+h), (0, 255, 0), 2)
60
+ word_count += 1
61
+ return word_img, word_count
62
+
63
+ # Function to run Sakshi OCR and capture its output
64
+ def run_sakshi_ocr(image_path):
65
+ buffer = io.StringIO()
66
+ old_stdout = sys.stdout
67
+ sys.stdout = buffer
68
+ try:
69
+ sakshi_ocr.generate(image_path)
70
+ finally:
71
+ sys.stdout = old_stdout
72
+ return buffer.getvalue()
73
+
74
+ # Utility function: convert image (numpy array) to a base64 encoded string
75
+ def image_to_base64(image, ext=".png"):
76
+ success, encoded_image = cv2.imencode(ext, image)
77
+ if not success:
78
+ return None
79
+ return base64.b64encode(encoded_image).decode('utf-8')
80
+
81
+ # Initialize FastAPI app
82
+ app = FastAPI(title="Hindi OCR App by sakshi")
83
+
84
+ @app.get("/", response_class=HTMLResponse)
85
+ async def root():
86
+ html_content = """
87
+ <html>
88
+ <head>
89
+ <title>Hindi OCR App by sakshi</title>
90
+ </head>
91
+ <body>
92
+ <h1>Hindi OCR App by sakshi</h1>
93
+ <form action="/predict" enctype="multipart/form-data" method="post">
94
+ <input name="file" type="file" accept="image/*">
95
+ <input type="submit" value="Upload and Predict">
96
+ </form>
97
+ </body>
98
+ </html>
99
+ """
100
+ return HTMLResponse(content=html_content)
101
+
102
+ @app.post("/predict")
103
+ async def predict(file: UploadFile = File(...)):
104
+ # Read and decode the uploaded image
105
+ contents = await file.read()
106
+ nparr = np.frombuffer(contents, np.uint8)
107
+ img = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE)
108
+ if img is None:
109
+ raise HTTPException(status_code=400, detail="Error reading the image.")
110
+
111
+ # Encode the original image to base64 for visualization
112
+ original_image = image_to_base64(cv2.cvtColor(img, cv2.COLOR_GRAY2BGR))
113
+
114
+ # Word detection
115
+ word_img, word_count = detect_words(img)
116
+ word_img_encoded = image_to_base64(word_img)
117
+
118
+ # OCR model prediction for single word
119
+ try:
120
+ img_resized = cv2.resize(img, (128, 32))
121
+ img_norm = img_resized / 255.0
122
+ img_input = img_norm[np.newaxis, ..., np.newaxis] # shape: (1, 32, 128, 1)
123
+ pred = model.predict(img_input)
124
+ pred_label_idx = np.argmax(pred)
125
+ pred_label = label_encoder.inverse_transform([pred_label_idx])[0]
126
+
127
+ # Generate an image with the prediction using matplotlib
128
+ fig, ax = plt.subplots()
129
+ ax.imshow(img, cmap='gray')
130
+ ax.set_title(f"Predicted: {pred_label}", fontsize=12)
131
+ ax.axis('off')
132
+ buf = io.BytesIO()
133
+ plt.savefig(buf, format="png")
134
+ buf.seek(0)
135
+ pred_img_array = np.frombuffer(buf.getvalue(), np.uint8)
136
+ prediction_img = cv2.imdecode(pred_img_array, cv2.IMREAD_COLOR)
137
+ prediction_img_encoded = image_to_base64(prediction_img)
138
+ plt.close(fig)
139
+ except Exception as e:
140
+ raise HTTPException(status_code=500, detail=f"Error in OCR model processing: {e}")
141
+
142
+ # Run Sakshi OCR on the image by saving temporarily
143
+ try:
144
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
145
+ cv2.imwrite(tmp_file.name, img)
146
+ tmp_file_path = tmp_file.name
147
+ sakshi_output = run_sakshi_ocr(tmp_file_path)
148
+ os.remove(tmp_file_path)
149
+ except Exception as e:
150
+ sakshi_output = f"Error running Sakshi OCR: {e}"
151
+
152
+ # Prepare the response
153
+ response_data = {
154
+ "word_count": word_count,
155
+ "ocr_prediction": pred_label,
156
+ "sakshi_ocr_output": sakshi_output,
157
+ "original_image": original_image,
158
+ "word_detected_image": word_img_encoded,
159
+ "prediction_image": prediction_img_encoded
160
+ }
161
+
162
+ return JSONResponse(content=response_data)
163
+
164
+ if __name__ == "__main__":
165
+ import uvicorn
166
+ uvicorn.run(app, host="0.0.0.0", port=8000)
hindi_ocr_model.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e4243a9588d0706e4b49a73d194d5606278e95e40ed38d0cfa1de1cc9905a4
3
+ size 12280598
label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efc157e2cf396d358a32e1815c63b9636b963344635ef12e36e0ecb686f0ba7d
3
+ size 1920
requirements.txt ADDED
File without changes