cocktailpeanut commited on
Commit
aa0d3af
·
1 Parent(s): 82e8993
Files changed (2) hide show
  1. app.py +11 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -4,10 +4,16 @@ import re
4
  import time
5
  from PIL import Image
6
  import torch
7
- import spaces
8
  import subprocess
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
 
 
 
 
 
 
11
 
12
  processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
13
 
@@ -15,9 +21,9 @@ model = Idefics2ForConditionalGeneration.from_pretrained(
15
  "HuggingFaceM4/idefics2-8b",
16
  torch_dtype=torch.bfloat16,
17
  #_attn_implementation="flash_attention_2",
18
- trust_remote_code=True).to("cuda")
19
 
20
- @spaces.GPU(duration=180)
21
  def model_inference(
22
  image, text, decoding_strategy, temperature,
23
  max_new_tokens, repetition_penalty, top_p
@@ -40,7 +46,7 @@ def model_inference(
40
 
41
  prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
42
  inputs = processor(text=prompt, images=[image], return_tensors="pt")
43
- inputs = {k: v.to("cuda") for k, v in inputs.items()}
44
 
45
  generation_args = {
46
  "max_new_tokens": max_new_tokens,
@@ -173,4 +179,4 @@ with gr.Blocks(fill_height=True) as demo:
173
  max_new_tokens, repetition_penalty, top_p], outputs=output)
174
 
175
 
176
- demo.launch(debug=True)
 
4
  import time
5
  from PIL import Image
6
  import torch
7
+ #import spaces
8
  import subprocess
9
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
10
 
11
+ if torch.cuda.is_available():
12
+ device = "cuda"
13
+ elif torch.backends.mps.is_available():
14
+ device = "mps"
15
+ else:
16
+ device = "cpu"
17
 
18
  processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
19
 
 
21
  "HuggingFaceM4/idefics2-8b",
22
  torch_dtype=torch.bfloat16,
23
  #_attn_implementation="flash_attention_2",
24
+ trust_remote_code=True).to(device)
25
 
26
+ #@spaces.GPU(duration=180)
27
  def model_inference(
28
  image, text, decoding_strategy, temperature,
29
  max_new_tokens, repetition_penalty, top_p
 
46
 
47
  prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
48
  inputs = processor(text=prompt, images=[image], return_tensors="pt")
49
+ inputs = {k: v.to(device) for k, v in inputs.items()}
50
 
51
  generation_args = {
52
  "max_new_tokens": max_new_tokens,
 
179
  max_new_tokens, repetition_penalty, top_p], outputs=output)
180
 
181
 
182
+ demo.launch(debug=True)
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
- spaces
2
  git+https://github.com/huggingface/transformers.git
 
1
+ #spaces
2
  git+https://github.com/huggingface/transformers.git