Spaces:
Runtime error
Runtime error
Commit
·
aa0d3af
1
Parent(s):
82e8993
update
Browse files- app.py +11 -5
- requirements.txt +1 -1
app.py
CHANGED
@@ -4,10 +4,16 @@ import re
|
|
4 |
import time
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
-
import spaces
|
8 |
import subprocess
|
9 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
|
13 |
|
@@ -15,9 +21,9 @@ model = Idefics2ForConditionalGeneration.from_pretrained(
|
|
15 |
"HuggingFaceM4/idefics2-8b",
|
16 |
torch_dtype=torch.bfloat16,
|
17 |
#_attn_implementation="flash_attention_2",
|
18 |
-
trust_remote_code=True).to(
|
19 |
|
20 |
-
|
21 |
def model_inference(
|
22 |
image, text, decoding_strategy, temperature,
|
23 |
max_new_tokens, repetition_penalty, top_p
|
@@ -40,7 +46,7 @@ def model_inference(
|
|
40 |
|
41 |
prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
|
42 |
inputs = processor(text=prompt, images=[image], return_tensors="pt")
|
43 |
-
inputs = {k: v.to(
|
44 |
|
45 |
generation_args = {
|
46 |
"max_new_tokens": max_new_tokens,
|
@@ -173,4 +179,4 @@ with gr.Blocks(fill_height=True) as demo:
|
|
173 |
max_new_tokens, repetition_penalty, top_p], outputs=output)
|
174 |
|
175 |
|
176 |
-
demo.launch(debug=True)
|
|
|
4 |
import time
|
5 |
from PIL import Image
|
6 |
import torch
|
7 |
+
#import spaces
|
8 |
import subprocess
|
9 |
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
10 |
|
11 |
+
if torch.cuda.is_available():
|
12 |
+
device = "cuda"
|
13 |
+
elif torch.backends.mps.is_available():
|
14 |
+
device = "mps"
|
15 |
+
else:
|
16 |
+
device = "cpu"
|
17 |
|
18 |
processor = AutoProcessor.from_pretrained("HuggingFaceM4/idefics2-8b")
|
19 |
|
|
|
21 |
"HuggingFaceM4/idefics2-8b",
|
22 |
torch_dtype=torch.bfloat16,
|
23 |
#_attn_implementation="flash_attention_2",
|
24 |
+
trust_remote_code=True).to(device)
|
25 |
|
26 |
+
#@spaces.GPU(duration=180)
|
27 |
def model_inference(
|
28 |
image, text, decoding_strategy, temperature,
|
29 |
max_new_tokens, repetition_penalty, top_p
|
|
|
46 |
|
47 |
prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
|
48 |
inputs = processor(text=prompt, images=[image], return_tensors="pt")
|
49 |
+
inputs = {k: v.to(device) for k, v in inputs.items()}
|
50 |
|
51 |
generation_args = {
|
52 |
"max_new_tokens": max_new_tokens,
|
|
|
179 |
max_new_tokens, repetition_penalty, top_p], outputs=output)
|
180 |
|
181 |
|
182 |
+
demo.launch(debug=True)
|
requirements.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
spaces
|
2 |
git+https://github.com/huggingface/transformers.git
|
|
|
1 |
+
#spaces
|
2 |
git+https://github.com/huggingface/transformers.git
|