akhaliq HF Staff commited on
Commit
b7bf121
·
verified ·
1 Parent(s): 98c4ce9

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +22 -11
app.py CHANGED
@@ -2,22 +2,31 @@ import gradio as gr
2
  import torch
3
  from PIL import Image
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
 
6
  # Model configuration
7
  MID = "apple/FastVLM-0.5B"
8
  IMAGE_TOKEN_INDEX = -200
9
 
10
- # Load model and tokenizer once at startup
11
- print("Loading model...")
12
- tok = AutoTokenizer.from_pretrained(MID, trust_remote_code=True)
13
- model = AutoModelForCausalLM.from_pretrained(
14
- MID,
15
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
16
- device_map="auto",
17
- trust_remote_code=True,
18
- )
19
- print("Model loaded successfully!")
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def caption_image(image, custom_prompt=None):
22
  """
23
  Generate a caption for the input image.
@@ -33,6 +42,8 @@ def caption_image(image, custom_prompt=None):
33
  return "Please upload an image first."
34
 
35
  try:
 
 
36
  # Convert image to RGB if needed
37
  if image.mode != "RGB":
38
  image = image.convert("RGB")
@@ -149,7 +160,7 @@ with gr.Blocks(title="FastVLM Image Captioning") as demo:
149
  ---
150
  **Model:** [apple/FastVLM-0.5B](https://huggingface.co/apple/FastVLM-0.5B)
151
 
152
- **Note:** This model runs best on GPU. CPU inference may be slower.
153
  """
154
  )
155
 
 
2
  import torch
3
  from PIL import Image
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import spaces
6
 
7
  # Model configuration
8
  MID = "apple/FastVLM-0.5B"
9
  IMAGE_TOKEN_INDEX = -200
10
 
11
+ # Load model and tokenizer (will be loaded on first GPU allocation)
12
+ tok = None
13
+ model = None
 
 
 
 
 
 
 
14
 
15
+ def load_model():
16
+ global tok, model
17
+ if tok is None or model is None:
18
+ print("Loading model...")
19
+ tok = AutoTokenizer.from_pretrained(MID, trust_remote_code=True)
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ MID,
22
+ torch_dtype=torch.float16,
23
+ device_map="cuda",
24
+ trust_remote_code=True,
25
+ )
26
+ print("Model loaded successfully!")
27
+ return tok, model
28
+
29
+ @spaces.GPU(duration=60)
30
  def caption_image(image, custom_prompt=None):
31
  """
32
  Generate a caption for the input image.
 
42
  return "Please upload an image first."
43
 
44
  try:
45
+ # Load model if not already loaded
46
+ tok, model = load_model()
47
  # Convert image to RGB if needed
48
  if image.mode != "RGB":
49
  image = image.convert("RGB")
 
160
  ---
161
  **Model:** [apple/FastVLM-0.5B](https://huggingface.co/apple/FastVLM-0.5B)
162
 
163
+ **Note:** This Space uses ZeroGPU for dynamic GPU allocation.
164
  """
165
  )
166