import os, io, json, traceback
import gradio as gr
import fitz  # PyMuPDF
from PIL import Image

import spaces  # مطلوب ل ZeroGPU
from huggingface_hub import hf_hub_download

# ===== إعدادات النموذج =====
HOROOF_MODEL_NAME = os.environ.get("HOROOF_MODEL", "NaserNajeh/Horoof")

# تحميل كسول لتقليل زمن الإقلاع
_model = None
_tokenizer = None
_image_processor = None

def _load_clean_config(repo_id: str):
    """تحميل config.json وإزالة أي quantization_config لتفادي bitsandbytes."""
    from transformers import AutoConfig
    try:
        cfg_path = hf_hub_download(repo_id=repo_id, filename="config.json")
        with open(cfg_path, "r", encoding="utf-8") as f:
            cfg_json = json.load(f)
        # أزل أي مفاتيح قد تفرض bnb
        cfg_json.pop("quantization_config", None)
        cfg_json.pop("load_in_4bit", None)
        cfg_json.pop("load_in_8bit", None)
        return AutoConfig.from_dict(cfg_json)
    except Exception:
        # احتياطي: لو فشل، خذ الإعدادات الافتراضية
        return AutoConfig.from_pretrained(repo_id)

def load_horoof():
    """تحميل نموذج Horoof (Qwen2-VL) على الـGPU عند أول استدعاء فقط، بدون bitsandbytes/torchvision."""
    global _model, _tokenizer, _image_processor
    if _model is not None:
        return
    try:
        import torch
        from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoImageProcessor
        if not torch.cuda.is_available():
            raise AssertionError("هذه النسخة تتطلب GPU (CUDA) مفعّل على الـSpace.")

        # إعدادات خفيفة: نمنع أي كمّية كي لا يُستدعى bitsandbytes
        cfg = _load_clean_config(HOROOF_MODEL_NAME)

        _tokenizer = AutoTokenizer.from_pretrained(HOROOF_MODEL_NAME, trust_remote_code=False)
        _image_processor = AutoImageProcessor.from_pretrained(HOROOF_MODEL_NAME, trust_remote_code=False)

        _model = Qwen2VLForConditionalGeneration.from_pretrained(
            HOROOF_MODEL_NAME,
            config=cfg,
            torch_dtype=torch.float16,   # fp16 على الـGPU
        ).to("cuda")

    except Exception as e:
        raise RuntimeError(f"تعذّر تحميل النموذج: {e}")

def pdf_to_images(pdf_bytes: bytes, dpi: int = 220, max_pages: int = 0):
    """تحويل PDF إلى قائمة صور PIL."""
    pages_imgs = []
    doc = fitz.open(stream=pdf_bytes, filetype="pdf")
    total = doc.page_count
    n_pages = total if (not max_pages or max_pages <= 0) else min(max_pages, total)
    for i in range(n_pages):
        page = doc.load_page(i)
        pix = page.get_pixmap(dpi=dpi, alpha=False)
        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
        pages_imgs.append((i + 1, img))
    doc.close()
    return pages_imgs

def ocr_page_with_horoof(pil_img: Image.Image, max_new_tokens: int = 1200) -> str:
    """تشغيل Horoof على صورة صفحة واحدة (بدون quantization)."""
    load_horoof()
    import torch

    # رسالة محادثة متوافقة مع Qwen2-VL
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": pil_img},
                {"type": "text", "text": "اقرأ النص العربي في الصورة كما هو دون أي تعديل أو تفسير."},
            ],
        }
    ]

    # قالب المحادثة كنص (بدون تقطيع) ثم نقاطّع
    prompt = _tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)

    # مدخلات الصورة + النص
    vision_inputs = _image_processor(images=pil_img, return_tensors="pt")
    text_inputs = _tokenizer([prompt], return_tensors="pt")
    inputs = {**vision_inputs, **text_inputs}
    inputs = {k: (v.to("cuda") if hasattr(v, "to") else v) for k, v in inputs.items()}

    # توليد
    output_ids = _model.generate(**inputs, max_new_tokens=max_new_tokens)
    text = _tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
    return (text or "").strip()

@spaces.GPU  # ضروري ل ZeroGPU: يجعل الاستدعاء يحجز GPU
def ocr_pdf(pdf_file, dpi, limit_pages):
    """الدالة الرئيسة التي يستدعيها Gradio."""
    if pdf_file is None:
        return "لم يتم رفع ملف."
    try:
        pdf_bytes = pdf_file.read() if hasattr(pdf_file, "read") else pdf_file
        limit = int(limit_pages) if limit_pages else 1  # صفحة واحدة افتراضًا للاختبار
        pages = pdf_to_images(pdf_bytes, dpi=int(dpi), max_pages=limit)
        if not pages:
            return "لا توجد صفحات."
        out = []
        for idx, img in pages:
            txt = ocr_page_with_horoof(img)
            out.append(f"--- صفحة {idx} ---\n{txt}")
        return "\n\n".join(out)
    except AssertionError as ae:
        return f"⚠️ {ae}"
    except Exception as e:
        traceback.print_exc()
        return f"حدث خطأ: {repr(e)}"

with gr.Blocks(title="Horoof OCR (ZeroGPU)") as demo:
    gr.Markdown("### Horoof OCR على ZeroGPU (Qwen2-VL) — بدون bitsandbytes/torchvision.")
    pdf_in = gr.File(label="ارفع ملف PDF", file_types=[".pdf"], type="binary")
    dpi = gr.Slider(150, 300, value=220, step=10, label="دقة التحويل (DPI)")
    limit_pages = gr.Number(value=1, precision=0, label="عدد الصفحات (للاختبار؛ زِد لاحقًا)")
    run_btn = gr.Button("بدء التحويل")
    out = gr.Textbox(label="النص المستخرج", lines=24)

    demo.queue()
    run_btn.click(fn=ocr_pdf, inputs=[pdf_in, dpi, limit_pages], outputs=out, api_name="ocr_pdf")

if __name__ == "__main__":
    demo.launch()