import gradio as gr from transformers import BlipProcessor, BlipForQuestionAnswering, MarianMTModel, MarianTokenizer from PIL import Image, ImageDraw, ImageFont import torch, uuid, os from datetime import datetime # ✅ Load BLIP model blip_model = BlipForQuestionAnswering.from_pretrained("sharawy53/diploma") processor = BlipProcessor.from_pretrained("sharawy53/diploma") # ✅ Load translation models ar_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en") ar_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en") en_ar_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar") en_ar_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ar") # ✅ Manual Arabic medical term dictionary medical_terms = { "chest x-ray": "أشعة سينية للصدر", "x-ray": "أشعة سينية", "ct scan": "تصوير مقطعي محوسب", "mri": "تصوير بالرنين المغناطيسي", "ultrasound": "تصوير بالموجات فوق الصوتية", "normal": "طبيعي", "abnormal": "غير طبيعي", "brain": "الدماغ", "fracture": "كسر", "no abnormality detected": "لا توجد شذوذات", "left lung": "الرئة اليسرى", "right lung": "الرئة اليمنى", "x - ray": "أشعة سينية", "chest x - ray": "أشعة سينية", "cardiomegaly": "تضخم القلب" ,"ct":"المسح المقطعي", "CT":"المسح المقطعي", "MRI": "تصوير بالرنين المغناطيسي" } # تصحيحات لبعض الأسئلة المعروفة question_fixes = { "what is the unnatural in this image?": "ما الشيء غير الطبيعي في هذه الصورة؟", "what is abnormal in this image?": "ما الشيء غير الطبيعي في هذه الصورة؟", "is this image normal?": "هل هذه الصورة طبيعية؟" } # ✅ Translation utilities def translate_ar_to_en(text): inputs = ar_en_tokenizer(text, return_tensors="pt", padding=True, truncation=True) outputs = ar_en_model.generate(**inputs) return ar_en_tokenizer.decode(outputs[0], skip_special_tokens=True).strip() def translate_en_to_ar(text): if text.lower().strip() in question_fixes: return question_fixes[text.lower().strip()] inputs = en_ar_tokenizer(text, return_tensors="pt", padding=True, truncation=True) outputs = en_ar_model.generate(**inputs) translated = en_ar_tokenizer.decode(outputs[0], skip_special_tokens=True).strip() if "القرآن" in translated or "يتفاعل" in translated: return question_fixes.get(text.lower().strip(), "سؤال غير مفهوم") return translated def translate_answer_medical(answer_en): return medical_terms.get(answer_en.lower().strip(), translate_en_to_ar(answer_en)) # ✅ Arabic font helper def get_font(size=22): try: return ImageFont.truetype("Amiri-Regular.ttf", size) except: return ImageFont.load_default() # ✅ Report generation function def generate_report_image(image, question_ar, question_en, answer_ar, answer_en): width, height = 1000, 700 background = Image.new("RGB", (width, height), color="white") draw = ImageDraw.Draw(background) font = get_font(22) font_bold = get_font(26) draw.text((40, 20), "📋 Medical VQA Screenshot Report", font=font_bold, fill="black") draw.text((40, 60), f"🕓 Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", font=font, fill="gray") img_resized = image.resize((300, 300)) background.paste(img_resized, (50, 110)) x, y = 380, 110 spacing = 60 lines = [ f" السؤال بالعربية:\n{question_ar}", f" Question in English:\n{question_en}", f" الإجابة بالعربية:\n{answer_ar}", f" Answer in English:\n{answer_en}" ] for line in lines: for subline in line.split("\n"): draw.text((x, y), subline, font=font, fill="black") y += spacing file_name = f"report_{uuid.uuid4().hex[:8]}.png" background.save(file_name) return file_name # ✅ Main VQA function def vqa_multilingual(image, question): if not image or not question.strip(): return "يرجى رفع صورة وكتابة سؤال.", "", "", "", None is_arabic = any('\u0600' <= c <= '\u06FF' for c in question) question_ar = question.strip() if is_arabic else translate_en_to_ar(question) question_en = translate_ar_to_en(question) if is_arabic else question.strip() inputs = processor(image, question_en, return_tensors="pt") with torch.no_grad(): output = blip_model.generate(**inputs) answer_en = processor.decode(output[0], skip_special_tokens=True).strip() answer_ar = translate_answer_medical(answer_en) report_image_path = generate_report_image(image, question_ar, question_en, answer_ar, answer_en) return ( question_ar, question_en, answer_ar, answer_en, report_image_path ) # ✅ Gradio interface gr.Interface( fn=vqa_multilingual, inputs=[ gr.Image(type="pil", label="📷 Upload Medical Image"), gr.Textbox(label="💬 Your Question (Arabic or English)") ], outputs=[ gr.Textbox(label="🟠 Arabic Question"), gr.Textbox(label="🟢 English Question"), gr.Textbox(label="🟠 Arabic Answer"), gr.Textbox(label="🟢 English Answer"), gr.Image(type="filepath", label="📸 Report Screenshot") ], title="🧠 Bilingual Medical VQA", description="Upload an X-ray or medical image and ask a question in Arabic or English. Get bilingual answers and an image-based report." ).launch()