Spaces:
Sleeping
Sleeping
File size: 5,797 Bytes
4fb205c 80ffccc 3f70aaa 8469ec9 3f70aaa 4fb205c 861816c 4fb205c 861816c 77a1fa3 3f70aaa b7c9222 3f70aaa 786b9d4 3f70aaa 786b9d4 3f70aaa 2536648 80ffccc 2536648 80ffccc 786b9d4 4fb205c 3f70aaa 4fb205c 3f70aaa 4fb205c 3f70aaa 80ffccc 3f70aaa 4fb205c 3f70aaa 4fb205c 3f70aaa 4fb205c 3f70aaa 4fb205c 3f70aaa 4fb205c 3f70aaa 80ffccc 4fb205c 80ffccc 4fb205c 3f70aaa 8469ec9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import gradio as gr
from transformers import BlipProcessor, BlipForQuestionAnswering, MarianMTModel, MarianTokenizer
from PIL import Image, ImageDraw, ImageFont
import torch, uuid, os
from datetime import datetime
# ✅ Load BLIP model
blip_model = BlipForQuestionAnswering.from_pretrained("sharawy53/diploma")
processor = BlipProcessor.from_pretrained("sharawy53/diploma")
# ✅ Load translation models
ar_en_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
ar_en_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-ar-en")
en_ar_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
en_ar_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-en-ar")
# ✅ Manual Arabic medical term dictionary
medical_terms = {
"chest x-ray": "أشعة سينية للصدر",
"x-ray": "أشعة سينية",
"ct scan": "تصوير مقطعي محوسب",
"mri": "تصوير بالرنين المغناطيسي",
"ultrasound": "تصوير بالموجات فوق الصوتية",
"normal": "طبيعي",
"abnormal": "غير طبيعي",
"brain": "الدماغ",
"fracture": "كسر",
"no abnormality detected": "لا توجد شذوذات",
"left lung": "الرئة اليسرى",
"right lung": "الرئة اليمنى",
"x - ray": "أشعة سينية",
"chest x - ray": "أشعة سينية",
"cardiomegaly": "تضخم القلب"
,"ct":"المسح المقطعي",
"CT":"المسح المقطعي",
"MRI": "تصوير بالرنين المغناطيسي"
}
# تصحيحات لبعض الأسئلة المعروفة
question_fixes = {
"what is the unnatural in this image?": "ما الشيء غير الطبيعي في هذه الصورة؟",
"what is abnormal in this image?": "ما الشيء غير الطبيعي في هذه الصورة؟",
"is this image normal?": "هل هذه الصورة طبيعية؟"
}
# ✅ Translation utilities
def translate_ar_to_en(text):
inputs = ar_en_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = ar_en_model.generate(**inputs)
return ar_en_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
def translate_en_to_ar(text):
if text.lower().strip() in question_fixes:
return question_fixes[text.lower().strip()]
inputs = en_ar_tokenizer(text, return_tensors="pt", padding=True, truncation=True)
outputs = en_ar_model.generate(**inputs)
translated = en_ar_tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
if "القرآن" in translated or "يتفاعل" in translated:
return question_fixes.get(text.lower().strip(), "سؤال غير مفهوم")
return translated
def translate_answer_medical(answer_en):
return medical_terms.get(answer_en.lower().strip(), translate_en_to_ar(answer_en))
# ✅ Arabic font helper
def get_font(size=22):
try:
return ImageFont.truetype("Amiri-Regular.ttf", size)
except:
return ImageFont.load_default()
# ✅ Report generation function
def generate_report_image(image, question_ar, question_en, answer_ar, answer_en):
width, height = 1000, 700
background = Image.new("RGB", (width, height), color="white")
draw = ImageDraw.Draw(background)
font = get_font(22)
font_bold = get_font(26)
draw.text((40, 20), "📋 Medical VQA Screenshot Report", font=font_bold, fill="black")
draw.text((40, 60), f"🕓 Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", font=font, fill="gray")
img_resized = image.resize((300, 300))
background.paste(img_resized, (50, 110))
x, y = 380, 110
spacing = 60
lines = [
f" السؤال بالعربية:\n{question_ar}",
f" Question in English:\n{question_en}",
f" الإجابة بالعربية:\n{answer_ar}",
f" Answer in English:\n{answer_en}"
]
for line in lines:
for subline in line.split("\n"):
draw.text((x, y), subline, font=font, fill="black")
y += spacing
file_name = f"report_{uuid.uuid4().hex[:8]}.png"
background.save(file_name)
return file_name
# ✅ Main VQA function
def vqa_multilingual(image, question):
if not image or not question.strip():
return "يرجى رفع صورة وكتابة سؤال.", "", "", "", None
is_arabic = any('\u0600' <= c <= '\u06FF' for c in question)
question_ar = question.strip() if is_arabic else translate_en_to_ar(question)
question_en = translate_ar_to_en(question) if is_arabic else question.strip()
inputs = processor(image, question_en, return_tensors="pt")
with torch.no_grad():
output = blip_model.generate(**inputs)
answer_en = processor.decode(output[0], skip_special_tokens=True).strip()
answer_ar = translate_answer_medical(answer_en)
report_image_path = generate_report_image(image, question_ar, question_en, answer_ar, answer_en)
return (
question_ar,
question_en,
answer_ar,
answer_en,
report_image_path
)
# ✅ Gradio interface
gr.Interface(
fn=vqa_multilingual,
inputs=[
gr.Image(type="pil", label="📷 Upload Medical Image"),
gr.Textbox(label="💬 Your Question (Arabic or English)")
],
outputs=[
gr.Textbox(label="🟠 Arabic Question"),
gr.Textbox(label="🟢 English Question"),
gr.Textbox(label="🟠 Arabic Answer"),
gr.Textbox(label="🟢 English Answer"),
gr.Image(type="filepath", label="📸 Report Screenshot")
],
title="🧠 Bilingual Medical VQA",
description="Upload an X-ray or medical image and ask a question in Arabic or English. Get bilingual answers and an image-based report."
).launch()
|