import io from typing import Optional import gradio as gr import matplotlib.pyplot as plt from PIL import Image from transformers.utils.processor_visualizer_utils import ImageVisualizer MODELS = [ "openai/clip-vit-base-patch32", "HuggingFaceM4/Idefics3-8B-Llama3", "llava-hf/llava-1.5-7b-hf", "OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL3-8B-hf", "Salesforce/blip-image-captioning-base", "Salesforce/blip2-flan-t5-xl", "Qwen/Qwen2-VL-2B-Instruct", "Qwen/Qwen2.5-VL-3B-Instruct", "meta-llama/Llama-3.2-11B-Vision", "microsoft/Florence-2-base", "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", ] def _fig_to_pil(fig) -> Image.Image: buf = io.BytesIO() fig.savefig(buf, format="png", bbox_inches="tight", dpi=160) buf.seek(0) return Image.open(buf).convert("RGB") def _run(model_id: str, image: Optional[Image.Image], use_sample: bool, add_grid: bool): viz = ImageVisualizer(model_id) captured = [] orig_show = plt.show def _capture_show(*_, **__): fig = plt.gcf() captured.append(fig) try: plt.show = _capture_show viz.visualize(images=None if use_sample else image, add_grid=add_grid) finally: plt.show = orig_show imgs = [_fig_to_pil(fig) for fig in captured] if captured else [] prompt_preview = viz.default_message(full_output=False) return imgs, prompt_preview with gr.Blocks(title="Transformers Processor Visualizer") as demo: gr.Markdown("Switch models and see what the processor feeds them (uses the existing `ImageVisualizer`).") with gr.Row(): model_id = gr.Dropdown( label="Model repo_id", choices=MODELS, value=MODELS[0], allow_custom_value=True, filterable=True, ) add_grid = gr.Checkbox(label="Show patch grid", value=True) use_sample = gr.Checkbox(label="Use HF logo sample", value=True) image = gr.Image(label="Upload custom image", type="pil", height=140, width=140, sources=["upload"]) def _on_image_change(img): return False # uncheck the sample toggle when a custom image is set image.change(_on_image_change, inputs=image, outputs=use_sample) run_btn = gr.Button("Render") gallery = gr.Gallery(label="Processor output") prompt = gr.Textbox(label="Compact chat template preview") # Render on demand run_btn.click(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt]) # Also render once on load with defaults so there is an example before clicking demo.load(_run, inputs=[model_id, image, use_sample, add_grid], outputs=[gallery, prompt]) if __name__ == "__main__": demo.launch()