Spaces:

V-E-D
/

paligamma

Running

App Files Files Community

ved1beta commited on 30 days ago

Commit

cf83b3d

1 Parent(s): ef13ec4

hope

Browse files

Files changed (4) hide show

app.py +26 -38
image1.jpeg +0 -0
image2.jpg +0 -0
image3.jpeg +0 -0

app.py CHANGED Viewed

@@ -1,33 +1,20 @@
-import os
 import gradio as gr
 from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
 from PIL import Image
 import torch
-import requests
-# Get token from environment variable
-HF_TOKEN = os.getenv('HF_TOKEN')
 # Load the model and processor
 model_id = "google/paligemma-3b-mix-224"
 model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, token=HF_TOKEN).eval()
 processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN)
-# Supported languages and example prompts
-LANGUAGES = {
-    "English": "caption en",
-    "Spanish": "caption es",
-    "French": "caption fr",
-    "German": "caption de"
-}
-def generate_caption(image, language, max_tokens=100):
-    """Generate image caption in specified language"""
     if image is None:
         return "Please upload an image."
-    prompt = LANGUAGES.get(language, "caption en")
     # Preprocess inputs
     model_inputs = processor(text=prompt, images=image, return_tensors="pt")
     input_len = model_inputs["input_ids"].shape[-1]
@@ -40,46 +27,47 @@ def generate_caption(image, language, max_tokens=100):
     return decoded
-def load_example_image(url):
-    """Load example image from URL"""
-    return Image.open(requests.get(url, stream=True).raw)
 # Prepare example images
-EXAMPLE_IMAGES = [
-    load_example_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"),
-    load_example_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/food.jpg"),
-    load_example_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/city.jpg")
-]
 # Create Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("# PaliGemma Image Captioning")
-    gr.Markdown("Upload an image and get a caption in your preferred language!")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="pil", label="Upload Image")
-            language_dropdown = gr.Dropdown(
-                list(LANGUAGES.keys()),
-                value="English",
-                label="Caption Language"
-            )
-            submit_btn = gr.Button("Generate Caption")
         with gr.Column():
-            output_text = gr.Textbox(label="Generated Caption")
     # Connect components
     submit_btn.click(
         fn=generate_caption,
-        inputs=[input_image, language_dropdown],
         outputs=output_text
     )
     # Add example images
     gr.Examples(
-        examples=[[img, lang] for img in EXAMPLE_IMAGES for lang in LANGUAGES.keys()],
-        inputs=[input_image, language_dropdown],
         fn=generate_caption,
         outputs=output_text
     )

 import gradio as gr
 from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
 from PIL import Image
 import torch
+import os
 # Load the model and processor
 model_id = "google/paligemma-3b-mix-224"
+HF_TOKEN = os.getenv('HF_TOKEN')
 model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, token=HF_TOKEN).eval()
 processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN)
+def generate_caption(image, prompt="What is in this image?", max_tokens=100):
+    """Generate image description"""
     if image is None:
         return "Please upload an image."
     # Preprocess inputs
     model_inputs = processor(text=prompt, images=image, return_tensors="pt")
     input_len = model_inputs["input_ids"].shape[-1]
     return decoded
+# Load local example images
+def load_local_images():
+    """Load images from the repository"""
+    image_files = ['image1.jpeg', 'image2.jpg', 'image3.jpeg']
+    local_images = []
+    for img_file in image_files:
+        try:
+            img_path = os.path.join('.', img_file)
+            if os.path.exists(img_path):
+                local_images.append(Image.open(img_path))
+        except Exception as e:
+            print(f"Could not load {img_file}: {e}")
+    return local_images
 # Prepare example images
+EXAMPLE_IMAGES = load_local_images()
 # Create Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# PaliGemma Image Analysis")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(type="pil", label="Upload or Select Image")
+            custom_prompt = gr.Textbox(label="Custom Prompt", value="What is in this image?")
+            submit_btn = gr.Button("Analyze Image")
         with gr.Column():
+            output_text = gr.Textbox(label="Image Description")
     # Connect components
     submit_btn.click(
         fn=generate_caption,
+        inputs=[input_image, custom_prompt],
         outputs=output_text
     )
     # Add example images
     gr.Examples(
+        examples=[[img, "What is in this image?"] for img in EXAMPLE_IMAGES],
+        inputs=[input_image, custom_prompt],
         fn=generate_caption,
         outputs=output_text
     )

image1.jpeg ADDED Viewed

image2.jpg ADDED Viewed

image3.jpeg ADDED Viewed