ved1beta commited on
Commit
cf83b3d
·
1 Parent(s): ef13ec4
Files changed (4) hide show
  1. app.py +26 -38
  2. image1.jpeg +0 -0
  3. image2.jpg +0 -0
  4. image3.jpeg +0 -0
app.py CHANGED
@@ -1,33 +1,20 @@
1
- import os
2
  import gradio as gr
3
  from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
4
  from PIL import Image
5
  import torch
6
- import requests
7
-
8
- # Get token from environment variable
9
- HF_TOKEN = os.getenv('HF_TOKEN')
10
 
11
  # Load the model and processor
12
  model_id = "google/paligemma-3b-mix-224"
 
13
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, token=HF_TOKEN).eval()
14
  processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN)
15
 
16
- # Supported languages and example prompts
17
- LANGUAGES = {
18
- "English": "caption en",
19
- "Spanish": "caption es",
20
- "French": "caption fr",
21
- "German": "caption de"
22
- }
23
-
24
- def generate_caption(image, language, max_tokens=100):
25
- """Generate image caption in specified language"""
26
  if image is None:
27
  return "Please upload an image."
28
 
29
- prompt = LANGUAGES.get(language, "caption en")
30
-
31
  # Preprocess inputs
32
  model_inputs = processor(text=prompt, images=image, return_tensors="pt")
33
  input_len = model_inputs["input_ids"].shape[-1]
@@ -40,46 +27,47 @@ def generate_caption(image, language, max_tokens=100):
40
 
41
  return decoded
42
 
43
- def load_example_image(url):
44
- """Load example image from URL"""
45
- return Image.open(requests.get(url, stream=True).raw)
 
 
 
 
 
 
 
 
 
 
46
 
47
  # Prepare example images
48
- EXAMPLE_IMAGES = [
49
- load_example_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg"),
50
- load_example_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/food.jpg"),
51
- load_example_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/city.jpg")
52
- ]
53
 
54
  # Create Gradio Interface
55
  with gr.Blocks() as demo:
56
- gr.Markdown("# PaliGemma Image Captioning")
57
- gr.Markdown("Upload an image and get a caption in your preferred language!")
58
 
59
  with gr.Row():
60
  with gr.Column():
61
- input_image = gr.Image(type="pil", label="Upload Image")
62
- language_dropdown = gr.Dropdown(
63
- list(LANGUAGES.keys()),
64
- value="English",
65
- label="Caption Language"
66
- )
67
- submit_btn = gr.Button("Generate Caption")
68
 
69
  with gr.Column():
70
- output_text = gr.Textbox(label="Generated Caption")
71
 
72
  # Connect components
73
  submit_btn.click(
74
  fn=generate_caption,
75
- inputs=[input_image, language_dropdown],
76
  outputs=output_text
77
  )
78
 
79
  # Add example images
80
  gr.Examples(
81
- examples=[[img, lang] for img in EXAMPLE_IMAGES for lang in LANGUAGES.keys()],
82
- inputs=[input_image, language_dropdown],
83
  fn=generate_caption,
84
  outputs=output_text
85
  )
 
 
1
  import gradio as gr
2
  from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
3
  from PIL import Image
4
  import torch
5
+ import os
 
 
 
6
 
7
  # Load the model and processor
8
  model_id = "google/paligemma-3b-mix-224"
9
+ HF_TOKEN = os.getenv('HF_TOKEN')
10
  model = PaliGemmaForConditionalGeneration.from_pretrained(model_id, token=HF_TOKEN).eval()
11
  processor = AutoProcessor.from_pretrained(model_id, token=HF_TOKEN)
12
 
13
+ def generate_caption(image, prompt="What is in this image?", max_tokens=100):
14
+ """Generate image description"""
 
 
 
 
 
 
 
 
15
  if image is None:
16
  return "Please upload an image."
17
 
 
 
18
  # Preprocess inputs
19
  model_inputs = processor(text=prompt, images=image, return_tensors="pt")
20
  input_len = model_inputs["input_ids"].shape[-1]
 
27
 
28
  return decoded
29
 
30
+ # Load local example images
31
+ def load_local_images():
32
+ """Load images from the repository"""
33
+ image_files = ['image1.jpeg', 'image2.jpg', 'image3.jpeg']
34
+ local_images = []
35
+ for img_file in image_files:
36
+ try:
37
+ img_path = os.path.join('.', img_file)
38
+ if os.path.exists(img_path):
39
+ local_images.append(Image.open(img_path))
40
+ except Exception as e:
41
+ print(f"Could not load {img_file}: {e}")
42
+ return local_images
43
 
44
  # Prepare example images
45
+ EXAMPLE_IMAGES = load_local_images()
 
 
 
 
46
 
47
  # Create Gradio Interface
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("# PaliGemma Image Analysis")
 
50
 
51
  with gr.Row():
52
  with gr.Column():
53
+ input_image = gr.Image(type="pil", label="Upload or Select Image")
54
+ custom_prompt = gr.Textbox(label="Custom Prompt", value="What is in this image?")
55
+ submit_btn = gr.Button("Analyze Image")
 
 
 
 
56
 
57
  with gr.Column():
58
+ output_text = gr.Textbox(label="Image Description")
59
 
60
  # Connect components
61
  submit_btn.click(
62
  fn=generate_caption,
63
+ inputs=[input_image, custom_prompt],
64
  outputs=output_text
65
  )
66
 
67
  # Add example images
68
  gr.Examples(
69
+ examples=[[img, "What is in this image?"] for img in EXAMPLE_IMAGES],
70
+ inputs=[input_image, custom_prompt],
71
  fn=generate_caption,
72
  outputs=output_text
73
  )
image1.jpeg ADDED
image2.jpg ADDED
image3.jpeg ADDED