import gradio as gr
from transformers import BlipProcessor, BlipForQuestionAnswering, MarianMTModel, MarianTokenizer
from PIL import Image, ImageDraw, ImageFont
import torch, uuid, os
from datetime import datetime

# ✅ Load BLIP model
blip_model = BlipForQuestionAnswering.from_pretrained("sharawy53/diploma")
processor = BlipProcessor.from_pretrained("sharawy53/diploma")

# ✅ Load translation models
ar_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
ar_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en")

en_ar_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
en_ar_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ar")

# ✅ Manual Arabic medical term dictionary
medical_terms = {
    "chest x-ray": "أشعة سينية للصدر",
    "x-ray": "أشعة سينية",
    "ct scan": "تصوير مقطعي محوسب",
    "mri": "تصوير بالرنين المغناطيسي",
    "ultrasound": "تصوير بالموجات فوق الصوتية",
    "normal": "طبيعي",
    "abnormal": "غير طبيعي",
    "brain": "الدماغ",
    "fracture": "كسر",
    "no abnormality detected": "لا توجد شذوذات",
    "left lung": "الرئة اليسرى",
    "right lung": "الرئة اليمنى",
     "x - ray": "أشعة سينية",
    "chest x - ray": "أشعة سينية",
    "cardiomegaly": "تضخم القلب"
    ,"ct":"المسح المقطعي",
    
    "CT":"المسح المقطعي",
        "MRI": "تصوير بالرنين المغناطيسي"

     
}

# تصحيحات لبعض الأسئلة المعروفة
question_fixes = {
    "what is the unnatural in this image?": "ما الشيء غير الطبيعي في هذه الصورة؟",
    "what is abnormal in this image?": "ما الشيء غير الطبيعي في هذه الصورة؟",
    "is this image normal?": "هل هذه الصورة طبيعية؟"
}

# ✅ Translation utilities
def translate_ar_to_en(text):
    inputs = ar_en_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = ar_en_model.generate(**inputs)
    return ar_en_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

def translate_en_to_ar(text):
    if text.lower().strip() in question_fixes:
        return question_fixes[text.lower().strip()]
    inputs = en_ar_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = en_ar_model.generate(**inputs)
    translated = en_ar_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    if "القرآن" in translated or "يتفاعل" in translated:
        return question_fixes.get(text.lower().strip(), "سؤال غير مفهوم")
    return translated

def translate_answer_medical(answer_en):
    return medical_terms.get(answer_en.lower().strip(), translate_en_to_ar(answer_en))

# ✅ Arabic font helper
def get_font(size=22):
    try:
        return ImageFont.truetype("Amiri-Regular.ttf", size)
    except:
        return ImageFont.load_default()

# ✅ Report generation function
def generate_report_image(image, question_ar, question_en, answer_ar, answer_en):
    width, height = 1000, 700
    background = Image.new("RGB", (width, height), color="white")
    draw = ImageDraw.Draw(background)
    font = get_font(22)
    font_bold = get_font(26)

    draw.text((40, 20), "📋 Medical VQA Screenshot Report", font=font_bold, fill="black")
    draw.text((40, 60), f"🕓 Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", font=font, fill="gray")

    img_resized = image.resize((300, 300))
    background.paste(img_resized, (50, 110))

    x, y = 380, 110
    spacing = 60
    lines = [
        f" السؤال بالعربية:\n{question_ar}",
        f" Question in English:\n{question_en}",
        f" الإجابة بالعربية:\n{answer_ar}",
        f" Answer in English:\n{answer_en}"
    ]

    for line in lines:
        for subline in line.split("\n"):
            draw.text((x, y), subline, font=font, fill="black")
            y += spacing

    file_name = f"report_{uuid.uuid4().hex[:8]}.png"
    background.save(file_name)
    return file_name

# ✅ Main VQA function
def vqa_multilingual(image, question):
    if not image or not question.strip():
        return "يرجى رفع صورة وكتابة سؤال.", "", "", "", None

    is_arabic = any('\u0600' <= c <= '\u06FF' for c in question)
    question_ar = question.strip() if is_arabic else translate_en_to_ar(question)
    question_en = translate_ar_to_en(question) if is_arabic else question.strip()

    inputs = processor(image, question_en, return_tensors="pt")
    with torch.no_grad():
        output = blip_model.generate(**inputs)
    answer_en = processor.decode(output[0], skip_special_tokens=True).strip()
    answer_ar = translate_answer_medical(answer_en)

    report_image_path = generate_report_image(image, question_ar, question_en, answer_ar, answer_en)

    return (
        question_ar,
        question_en,
        answer_ar,
        answer_en,
        report_image_path
    )

# ✅ Gradio interface
gr.Interface(
    fn=vqa_multilingual,
    inputs=[
        gr.Image(type="pil", label="📷 Upload Medical Image"),
        gr.Textbox(label="💬 Your Question (Arabic or English)")
    ],
    outputs=[
        gr.Textbox(label="🟠 Arabic Question"),
        gr.Textbox(label="🟢 English Question"),
        gr.Textbox(label="🟠 Arabic Answer"),
        gr.Textbox(label="🟢 English Answer"),
        gr.Image(type="filepath", label="📸 Report Screenshot")
    ],
    title="🧠 Bilingual Medical VQA",
    description="Upload an X-ray or medical image and ask a question in Arabic or English. Get bilingual answers and an image-based report."
).launch()