Spaces:
Runtime error
Runtime error
| import os, io, base64, traceback | |
| import gradio as gr | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| import numpy as np | |
| # ======================= | |
| # 1) EasyOCR (CPU - مجاني) | |
| # ======================= | |
| import easyocr | |
| _EASY_READER = None | |
| def get_easy_reader(): | |
| global _EASY_READER | |
| if _EASY_READER is None: | |
| _EASY_READER = easyocr.Reader(['ar'], gpu=False) | |
| return _EASY_READER | |
| def ocr_easyocr(pil_img: Image.Image) -> str: | |
| reader = get_easy_reader() | |
| arr = np.array(pil_img) | |
| lines = reader.readtext(arr, detail=0, paragraph=True) | |
| return "\n".join([x.strip() for x in lines if x and x.strip()]) | |
| # =============================================== | |
| # 2) Inference API (يستهلك اعتمادات PRO بدل دقائق GPU) | |
| # =============================================== | |
| from huggingface_hub import InferenceClient | |
| _INFER_CLIENT = None | |
| # يمكن تغيير الموديل من Secrets → Variables بوضع INFER_MODEL، الافتراضي: | |
| INFER_MODEL = os.environ.get("INFER_MODEL", "Qwen/Qwen2-VL-2B-Instruct") | |
| def get_infer_client(): | |
| """تهيئة عميل الاستدلال مع مهلة أطول ورسالة واضحة إن غاب التوكين.""" | |
| global _INFER_CLIENT | |
| if _INFER_CLIENT is None: | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| raise RuntimeError("لا يوجد HF_TOKEN في Secrets. أضِفه من Settings → Variables and secrets.") | |
| _INFER_CLIENT = InferenceClient(model=INFER_MODEL, token=token, timeout=120) | |
| return _INFER_CLIENT | |
| def ocr_infer_api(pil_img: Image.Image) -> str: | |
| """نحاول أولاً واجهة chat.completions بتمرير bytes، وإن فشلت نستخدم data URI.""" | |
| try: | |
| client = get_infer_client() | |
| buf = io.BytesIO() | |
| pil_img.save(buf, format="PNG") | |
| raw_bytes = buf.getvalue() | |
| b64 = base64.b64encode(raw_bytes).decode("utf-8") | |
| # محاولة 1: تمريـر الصورة كـ bytes (بعض النماذج تدعم ذلك) | |
| messages = [ | |
| {"role": "system", "content": "You are an OCR assistant. Return ONLY the Arabic text as-is."}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "input_text", "text": "Extract Arabic text exactly as-is, no extra commentary."}, | |
| {"type": "image", "image": raw_bytes}, | |
| ], | |
| }, | |
| ] | |
| try: | |
| resp = client.chat.completions.create(messages=messages, max_tokens=2048) | |
| txt = resp.choices[0].message.content or "" | |
| return txt.strip() | |
| except Exception: | |
| # محاولة 2: تمريـر الصورة كـ data URI عبر image_url | |
| messages_fallback = [ | |
| {"role": "system", "content": "You are an OCR assistant. Return ONLY the Arabic text as-is."}, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "input_text", "text": "Extract Arabic text exactly as-is, no extra commentary."}, | |
| {"type": "image_url", "image_url": f"data:image/png;base64,{b64}"}, | |
| ], | |
| }, | |
| ] | |
| resp = client.chat.completions.create(messages=messages_fallback, max_tokens=2048) | |
| txt = resp.choices[0].message.content or "" | |
| return txt.strip() | |
| except Exception as e: | |
| # إظهار رسالة مفيدة بدل سطر فارغ | |
| return f"حدث خطأ أثناء استدعاء Inference API: {repr(e)}" | |
| # ===================================================== | |
| # 3) Horoof (Qari محليًا) — يتطلب CUDA لعمل فعّال على الـSpace | |
| # ===================================================== | |
| try: | |
| import torch | |
| HAS_TORCH = True | |
| HAS_CUDA = torch.cuda.is_available() | |
| except Exception: | |
| HAS_TORCH = False | |
| HAS_CUDA = False | |
| _HOROOF_MODEL = None | |
| _HOROOF_PROC = None | |
| # يمكن تغيير اسم نموذجك من Secrets → Variables بوضع HOROOF_MODEL، الافتراضي: | |
| HOROOF_MODEL_NAME = os.environ.get("HOROOF_MODEL", "NaserNajeh/Horoof") | |
| def ensure_horoof_loaded(): | |
| """تحميل نموذج Horoof عند الحاجة فقط (Lazy) لتقليل زمن الإقلاع على CPU-basic.""" | |
| global _HOROOF_MODEL, _HOROOF_PROC | |
| if _HOROOF_MODEL is None: | |
| if not HAS_TORCH: | |
| raise RuntimeError("حزمة torch غير متاحة. ثبّت torch أو استخدم Backend آخر.") | |
| if not HAS_CUDA: | |
| raise RuntimeError("خيار Horoof المحلي يتطلب GPU (CUDA). اختر EasyOCR أو Inference API.") | |
| from transformers import Qwen2VLForConditionalGeneration, AutoProcessor | |
| _HOROOF_MODEL = Qwen2VLForConditionalGeneration.from_pretrained( | |
| HOROOF_MODEL_NAME, torch_dtype="auto" | |
| ).to("cuda") | |
| _HOROOF_PROC = AutoProcessor.from_pretrained(HOROOF_MODEL_NAME) | |
| def ocr_horoof(pil_img: Image.Image) -> str: | |
| if not HAS_TORCH or not HAS_CUDA: | |
| return "⚠️ خيار Horoof المحلي يتطلب GPU (CUDA). اختر EasyOCR أو Inference API." | |
| ensure_horoof_loaded() | |
| inputs = _HOROOF_PROC(images=pil_img, return_tensors="pt").to("cuda") | |
| gen = _HOROOF_MODEL.generate(**inputs, max_new_tokens=1800) | |
| text = _HOROOF_PROC.batch_decode(gen, skip_special_tokens=True)[0] | |
| return (text or "").strip() | |
| # =========================== | |
| # أداة: تحويل PDF إلى صور | |
| # =========================== | |
| def pdf_to_images(pdf_bytes: bytes, dpi: int = 220, max_pages: int = 0): | |
| pages_imgs = [] | |
| doc = fitz.open(stream=pdf_bytes, filetype="pdf") | |
| total = doc.page_count | |
| n_pages = total if (max_pages in [0, None]) else min(max_pages, total) | |
| for i in range(n_pages): | |
| page = doc.load_page(i) | |
| pix = page.get_pixmap(dpi=dpi, alpha=False) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| pages_imgs.append((i + 1, img)) | |
| doc.close() | |
| return pages_imgs | |
| # ========================================= | |
| # الدالة الرئيسية + اختيار الـBackend | |
| # ========================================= | |
| BACKENDS = ["EasyOCR (CPU - مجاني)", "Inference API (Qwen2-VL)", "Horoof (محلي - يتطلب GPU)"] | |
| def ocr_pdf(pdf_file, dpi, limit_pages, backend): | |
| if pdf_file is None: | |
| return "لم يتم رفع ملف." | |
| try: | |
| pdf_bytes = pdf_file.read() if hasattr(pdf_file, "read") else pdf_file | |
| pages = pdf_to_images(pdf_bytes, dpi=int(dpi), max_pages=int(limit_pages)) | |
| results = [] | |
| for idx, img in pages: | |
| if backend.startswith("EasyOCR"): | |
| txt = ocr_easyocr(img) | |
| elif backend.startswith("Inference API"): | |
| txt = ocr_infer_api(img) | |
| else: | |
| txt = ocr_horoof(img) | |
| results.append(f"--- صفحة {idx} ---\n{txt}") | |
| return "\n\n".join(results) if results else "لا توجد صفحات." | |
| except Exception as e: | |
| # طباعة أثر الخطأ للمساعدة في التشخيص داخل Logs | |
| traceback.print_exc() | |
| return f"حدث خطأ: {repr(e)}" | |
| # ======================= | |
| # واجهة Gradio + API Name | |
| # ======================= | |
| with gr.Blocks(title="Horoof Hybrid OCR") as demo: | |
| gr.Markdown("### OCR عربي هجين: مجاني على CPU (EasyOCR)، أو عبر Inference API، أو Horoof محليًا على GPU.") | |
| with gr.Row(): | |
| pdf_in = gr.File(label="ارفع ملف PDF", file_types=[".pdf"], type="binary") | |
| with gr.Row(): | |
| dpi = gr.Slider(150, 300, value=220, step=10, label="دقّة التحويل (DPI)") | |
| limit_pages = gr.Number(value=0, precision=0, label="حدّ الصفحات (0 = كل الصفحات)") | |
| backend = gr.Dropdown(choices=BACKENDS, value="EasyOCR (CPU - مجاني)", label="المحرّك (Backend)") | |
| run_btn = gr.Button("بدء التحويل") | |
| out = gr.Textbox(label="النص المستخرج", lines=24) | |
| # api_name يجعل الـSpace قابلاً للاستدعاء كـ API: | |
| run_btn.click(fn=ocr_pdf, inputs=[pdf_in, dpi, limit_pages, backend], outputs=out, api_name="ocr_pdf") | |
| if __name__ == "__main__": | |
| demo.launch() | |