Spaces:

sameernotes
/

ocr

Sleeping

App Files Files Community

sameernotes commited on Mar 19

Commit

afd0824

verified ·

1 Parent(s): 14350c4

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -19

app.py CHANGED Viewed

@@ -30,13 +30,12 @@ ENCODER_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/label_e
 FONT_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/NotoSansDevanagari-Regular.ttf"
 # Paths for local storage
-MODEL_PATH = "hindi_ocr_model.keras"
-ENCODER_PATH = "label_encoder.pkl"
-FONT_PATH = "NotoSansDevanagari-Regular.ttf"
-OUTPUT_DIR = "output"
-# Create output directory if it doesn't exist
-os.makedirs(OUTPUT_DIR, exist_ok=True)
 # Download model and encoder
 def download_file(url, dest):
@@ -56,6 +55,10 @@ def load_label_encoder():
     with open(ENCODER_PATH, 'rb') as f:
         return pickle.load(f)
 # Download required files on startup
 @app.on_event("startup")
 async def startup_event():
@@ -106,6 +109,9 @@ def run_sakshi_ocr(image_path):
         sys.stdout = old_stdout
     return buffer.getvalue()
 # Main OCR processing function
 def process_image(image_array):
     # Convert image array to grayscale
@@ -113,10 +119,14 @@ def process_image(image_array):
     # Word detection
     word_detected_img, word_count = detect_words(img)
-    word_detection_path = os.path.join(OUTPUT_DIR, "word_detection.png")
     cv2.imwrite(word_detection_path, word_detected_img)
     # First OCR model prediction
     try:
         img_resized = cv2.resize(img, (128, 32))
         img_norm = img_resized / 255.0
@@ -132,27 +142,28 @@ def process_image(image_array):
             ax.imshow(img, cmap='gray')
             ax.set_title(f"Predicted: {pred_label}", fontsize=12)
             ax.axis('off')
-            pred_path = os.path.join(OUTPUT_DIR, "prediction.png")
             plt.savefig(pred_path)
             plt.close()
         else:
-            pred_path = None
             pred_label = "Model or encoder not loaded"
     except Exception as e:
-        pred_path = None
         pred_label = f"Error: {str(e)}"
     # Sakshi OCR processing
     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
         cv2.imwrite(tmp_file.name, img)
         sakshi_output = run_sakshi_ocr(tmp_file.name)
-        os.remove(tmp_file.name)
     return {
         "sakshi_output": sakshi_output,
-        "word_detection_path": word_detection_path,
         "word_count": word_count,
-        "prediction_path": pred_path,
         "prediction_label": pred_label
     }
@@ -167,6 +178,15 @@ async def process(file: UploadFile = File(...)):
     if not file.content_type.startswith("image/"):
         raise HTTPException(status_code=400, detail="File must be an image")
     # Create a temporary file to save the uploaded image
     temp_file = tempfile.NamedTemporaryFile(delete=False)
     try:
@@ -193,18 +213,16 @@ async def process(file: UploadFile = File(...)):
 @app.get("/word-detection/")
 async def get_word_detection():
     """Return the word detection image."""
-    word_detection_path = Path(OUTPUT_DIR) / "word_detection.png"
-    if not word_detection_path.exists():
         raise HTTPException(status_code=404, detail="Word detection image not found. Process an image first.")
-    return FileResponse(word_detection_path)
 @app.get("/prediction/")
 async def get_prediction():
     """Return the prediction image."""
-    prediction_path = Path(OUTPUT_DIR) / "prediction.png"
-    if not prediction_path.exists():
         raise HTTPException(status_code=404, detail="Prediction image not found. Process an image first.")
-    return FileResponse(prediction_path)
 @app.get("/")
 async def root():

 FONT_URL = "https://huggingface.co/sameernotes/hindi-ocr/resolve/main/NotoSansDevanagari-Regular.ttf"
 # Paths for local storage
+MODEL_PATH = os.path.join(tempfile.gettempdir(), "hindi_ocr_model.keras")
+ENCODER_PATH = os.path.join(tempfile.gettempdir(), "label_encoder.pkl")
+FONT_PATH = os.path.join(tempfile.gettempdir(), "NotoSansDevanagari-Regular.ttf")
+# Use a temporary directory for outputs
+OUTPUT_DIR = tempfile.mkdtemp()
 # Download model and encoder
 def download_file(url, dest):
     with open(ENCODER_PATH, 'rb') as f:
         return pickle.load(f)
+# Set up global variables
+model = None
+label_encoder = None
 # Download required files on startup
 @app.on_event("startup")
 async def startup_event():
         sys.stdout = old_stdout
     return buffer.getvalue()
+# File storage for session
+session_files = {}
 # Main OCR processing function
 def process_image(image_array):
     # Convert image array to grayscale
     # Word detection
     word_detected_img, word_count = detect_words(img)
+    word_detection_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
     cv2.imwrite(word_detection_path, word_detected_img)
+    # Store the file path in our session dict
+    session_files['word_detection'] = word_detection_path
     # First OCR model prediction
+    pred_path = None
     try:
         img_resized = cv2.resize(img, (128, 32))
         img_norm = img_resized / 255.0
             ax.imshow(img, cmap='gray')
             ax.set_title(f"Predicted: {pred_label}", fontsize=12)
             ax.axis('off')
+            pred_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
             plt.savefig(pred_path)
             plt.close()
+            # Store the file path in our session dict
+            session_files['prediction'] = pred_path
         else:
             pred_label = "Model or encoder not loaded"
     except Exception as e:
         pred_label = f"Error: {str(e)}"
     # Sakshi OCR processing
     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
         cv2.imwrite(tmp_file.name, img)
         sakshi_output = run_sakshi_ocr(tmp_file.name)
+        os.unlink(tmp_file.name)
     return {
         "sakshi_output": sakshi_output,
+        "word_detection_path": word_detection_path if 'word_detection' in session_files else None,
         "word_count": word_count,
+        "prediction_path": pred_path if 'prediction' in session_files else None,
         "prediction_label": pred_label
     }
     if not file.content_type.startswith("image/"):
         raise HTTPException(status_code=400, detail="File must be an image")
+    # Clean up previous session files
+    for key, filepath in session_files.items():
+        if os.path.exists(filepath):
+            try:
+                os.unlink(filepath)
+            except:
+                pass
+    session_files.clear()
     # Create a temporary file to save the uploaded image
     temp_file = tempfile.NamedTemporaryFile(delete=False)
     try:
 @app.get("/word-detection/")
 async def get_word_detection():
     """Return the word detection image."""
+    if 'word_detection' not in session_files or not os.path.exists(session_files['word_detection']):
         raise HTTPException(status_code=404, detail="Word detection image not found. Process an image first.")
+    return FileResponse(session_files['word_detection'])
 @app.get("/prediction/")
 async def get_prediction():
     """Return the prediction image."""
+    if 'prediction' not in session_files or not os.path.exists(session_files['prediction']):
         raise HTTPException(status_code=404, detail="Prediction image not found. Process an image first.")
+    return FileResponse(session_files['prediction'])
 @app.get("/")
 async def root():