Spaces:

NaserNajeh
/

Horoof-Hybrid-OCR

Runtime error

App Files Files Community

NaserNajeh commited on Sep 5

Commit

7d26757

verified ·

1 Parent(s): 1532fcf

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -60

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, io, base64, traceback
 import gradio as gr
 import fitz  # PyMuPDF
 from PIL import Image
@@ -25,12 +25,12 @@ def ocr_easyocr(pil_img: Image.Image) -> str:
 # ===============================================
 # 2) Inference API (يستهلك اعتمادات PRO بدل دقائق GPU)
-#    نحاول VQA أولاً ثم نسقط إلى image_to_text ثم chat.completions
 # ===============================================
 from huggingface_hub import InferenceClient
 _INFER_CLIENT = None
-# يمكن تغيير الموديل من Settings → Variables بوضع INFER_MODEL. الافتراضي:
-INFER_MODEL = os.environ.get("INFER_MODEL", "Qwen/Qwen2-VL-2B-Instruct")
 def get_infer_client():
     """تهيئة عميل الاستدلال مع مهلة أطول ورسالة واضحة إن غاب التوكين."""
@@ -44,10 +44,8 @@ def get_infer_client():
 def ocr_infer_api(pil_img: Image.Image) -> str:
     """
-    نحاول بالترتيب:
-    1) visual_question_answering (VQA)
-    2) image_to_text بدون prompt
-    3) chat.completions مع data URI كحل أخير
     """
     try:
         client = get_infer_client()
@@ -55,62 +53,29 @@ def ocr_infer_api(pil_img: Image.Image) -> str:
         # حضّر الصورة كـ BytesIO
         buf = io.BytesIO()
         pil_img.save(buf, format="PNG")
-        raw_bytes = buf.getvalue()
-        b64 = base64.b64encode(raw_bytes).decode("utf-8")
-        # --- المحاولة 1: VQA ---
         try:
-            buf.seek(0)
-            vqa_out = client.visual_question_answering(
-                image=buf,
-                question="اقرأ النص العربي في الصورة كما هو دون أي تغيير أو تعليق إضافي."
-            )
-            # بعض المزودين يرجعون list أو dict
-            if isinstance(vqa_out, list):
-                if vqa_out:
-                    if isinstance(vqa_out[0], dict) and "generated_text" in vqa_out[0]:
-                        txt = vqa_out[0]["generated_text"]
-                    else:
-                        txt = str(vqa_out[0])
-                else:
-                    txt = ""
-            else:
-                txt = str(vqa_out)
-            txt = (txt or "").strip()
             if txt:
                 return txt
-        except Exception:
-            pass  # ننتقل للمحاولة التالية
-        # --- المحاولة 2: image_to_text (بدون prompt) ---
-        last_err = None
         try:
-            buf.seek(0)
-            it_out = client.image_to_text(image=buf)
-            it_txt = (it_out or "").strip()
-            if it_txt:
-                return it_txt
-        except Exception as e_img2txt:
-            last_err = e_img2txt  # نحتفظ بآخر خطأ محتمل
-        # --- المحاولة 3: chat.completions مع data URI ---
-        try:
-            messages = [
-                {"role": "system", "content": "You are an OCR assistant. Return ONLY the Arabic text as-is."},
-                {
-                    "role": "user",
-                    "content": [
-                        {"type": "input_text", "text": "Extract Arabic text exactly as-is, no extra commentary."},
-                        {"type": "image_url", "image_url": f"data:image/png;base64,{b64}"},
-                    ],
-                },
-            ]
-            resp = client.chat.completions.create(messages=messages, max_tokens=2048)
-            txt = resp.choices[0].message.content or ""
-            return txt.strip()
-        except Exception as e_chat:
-            # إن فشل الجميع، نُظهر أوضح خطأ متاح
-            return f"حدث خطأ أثناء استدعاء Inference API: {repr(last_err or e_chat)}"
     except Exception as e:
         return f"حدث خطأ أثناء استدعاء Inference API: {repr(e)}"
@@ -129,7 +94,7 @@ except Exception:
 _HOROOF_MODEL = None
 _HOROOF_PROC = None
-# يمكن تغيير اسم نموذجك من Settings → Variables بوضع HOROOF_MODEL:
 HOROOF_MODEL_NAME = os.environ.get("HOROOF_MODEL", "NaserNajeh/Horoof")
 def ensure_horoof_loaded():
@@ -225,8 +190,8 @@ def diag_infer():
         info.append(f"HTTP check error: {repr(e)}")
     try:
         client = get_infer_client()
-        # ping بسيط
-        resp = client.text_generation("ping", max_new_tokens=1)
         info.append("basic text_generation ping: OK")
     except Exception as e:
         info.append(f"text_generation ping error: {repr(e)}")

+import os, io, base64, traceback, time
 import gradio as gr
 import fitz  # PyMuPDF
 from PIL import Image
 # ===============================================
 # 2) Inference API (يستهلك اعتمادات PRO بدل دقائق GPU)
+#    نستخدم TrOCR عربي عبر image_to_text فقط
 # ===============================================
 from huggingface_hub import InferenceClient
 _INFER_CLIENT = None
+# يمكنك تغييره من Settings → Variables بوضع INFER_MODEL، الافتراضي أدناه TrOCR عربي:
+INFER_MODEL = os.environ.get("INFER_MODEL", "David-Magdy/TR_OCR_LARGE")
 def get_infer_client():
     """تهيئة عميل الاستدلال مع مهلة أطول ورسالة واضحة إن غاب التوكين."""
 def ocr_infer_api(pil_img: Image.Image) -> str:
     """
+    استدعاء Inference API لنموذج OCR عربي (TrOCR) عبر image_to_text فقط،
+    مع معالجة حالة التحميل/المهلة بإعادة محاولة واحدة.
     """
     try:
         client = get_infer_client()
         # حضّر الصورة كـ BytesIO
         buf = io.BytesIO()
         pil_img.save(buf, format="PNG")
+        buf.seek(0)
+        # المحاولة الأولى
         try:
+            out = client.image_to_text(image=buf)
+            txt = (out or "").strip()
             if txt:
                 return txt
+        except Exception as e1:
+            last = repr(e1)
+        # إعادة المحاولة (أحيانًا الموديل يكون في حالة "loading")
+        time.sleep(3)
+        buf.seek(0)
         try:
+            out = client.image_to_text(image=buf)
+            txt = (out or "").strip()
+            if txt:
+                return txt
+        except Exception as e2:
+            last = repr(e2)
+        return f"حدث خطأ أثناء استدعاء Inference API (image_to_text): {last}"
     except Exception as e:
         return f"حدث خطأ أثناء استدعاء Inference API: {repr(e)}"
 _HOROOF_MODEL = None
 _HOROOF_PROC = None
+# يمكنك تغييره من Settings → Variables بوضع HOROOF_MODEL:
 HOROOF_MODEL_NAME = os.environ.get("HOROOF_MODEL", "NaserNajeh/Horoof")
 def ensure_horoof_loaded():
         info.append(f"HTTP check error: {repr(e)}")
     try:
         client = get_infer_client()
+        # ping بسيط (قد يفشل لبعض النماذج؛ الهدف اختبار المصادقة والاتصال)
+        _ = client.text_generation("ping", max_new_tokens=1)
         info.append("basic text_generation ping: OK")
     except Exception as e:
         info.append(f"text_generation ping error: {repr(e)}")