Update handler.py
Browse files- handler.py +19 -39
handler.py
CHANGED
@@ -3,40 +3,43 @@ import torch
|
|
3 |
from PIL import Image
|
4 |
import base64
|
5 |
import io
|
6 |
-
import os
|
7 |
import sys
|
8 |
|
9 |
class EndpointHandler:
|
10 |
def __init__(self, path="."):
|
11 |
"""
|
12 |
Initialize the model and tokenizer for inference.
|
13 |
-
Args:
|
14 |
-
path (str): Path to the model directory
|
15 |
"""
|
16 |
try:
|
17 |
-
# Add the model's directory to the Python path
|
18 |
if path not in sys.path:
|
19 |
sys.path.append(path)
|
20 |
|
21 |
-
# Import transformers
|
22 |
-
from
|
23 |
|
24 |
print(f"Loading model from {path}")
|
25 |
|
|
|
|
|
|
|
26 |
# Load tokenizer
|
27 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
28 |
path,
|
29 |
trust_remote_code=True
|
30 |
)
|
31 |
|
32 |
-
# Load model
|
33 |
-
self.model =
|
34 |
path,
|
35 |
-
|
|
|
36 |
device_map="auto",
|
37 |
trust_remote_code=True
|
38 |
)
|
39 |
|
|
|
|
|
|
|
40 |
# Set model to evaluation mode
|
41 |
self.model.eval()
|
42 |
|
@@ -58,7 +61,6 @@ class EndpointHandler:
|
|
58 |
image_data = data.get("image", None)
|
59 |
max_new_tokens = data.get("max_new_tokens", 100)
|
60 |
|
61 |
-
# Check if image is provided
|
62 |
if not image_data:
|
63 |
return {"error": "No image provided"}
|
64 |
|
@@ -76,46 +78,24 @@ class EndpointHandler:
|
|
76 |
return {"error": f"Error processing image: {str(e)}"}
|
77 |
|
78 |
try:
|
79 |
-
# Prepare messages
|
80 |
messages = [
|
81 |
-
{"role": "user", "content": f"<|image
|
82 |
{"role": "assistant", "content": ""}
|
83 |
]
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
-
print("Model structure:", dir(self.model))
|
88 |
|
89 |
-
#
|
90 |
-
if hasattr(self.model, "init_processor"):
|
91 |
-
processor = self.model.init_processor(self.tokenizer)
|
92 |
-
elif hasattr(self.model, "model") and hasattr(self.model.model, "init_processor"):
|
93 |
-
processor = self.model.model.init_processor(self.tokenizer)
|
94 |
-
else:
|
95 |
-
# Let's try to find the processor in the model's attributes
|
96 |
-
for attr_name in dir(self.model):
|
97 |
-
if attr_name.startswith("_"):
|
98 |
-
continue
|
99 |
-
attr = getattr(self.model, attr_name)
|
100 |
-
if hasattr(attr, "init_processor"):
|
101 |
-
processor = attr.init_processor(self.tokenizer)
|
102 |
-
print(f"Found processor in {attr_name}")
|
103 |
-
break
|
104 |
-
else:
|
105 |
-
return {"error": "Could not find processor in model"}
|
106 |
-
|
107 |
-
# Process inputs
|
108 |
-
model_inputs = processor(messages, images=[image], videos=None)
|
109 |
-
|
110 |
-
# Move inputs to the same device as the model
|
111 |
device = next(self.model.parameters()).device
|
112 |
model_inputs = model_inputs.to(device)
|
113 |
|
114 |
-
# Add
|
115 |
model_inputs.update({
|
116 |
'tokenizer': self.tokenizer,
|
117 |
'max_new_tokens': max_new_tokens,
|
118 |
-
'decode_text': True
|
119 |
})
|
120 |
|
121 |
# Generate output
|
|
|
3 |
from PIL import Image
|
4 |
import base64
|
5 |
import io
|
|
|
6 |
import sys
|
7 |
|
8 |
class EndpointHandler:
|
9 |
def __init__(self, path="."):
|
10 |
"""
|
11 |
Initialize the model and tokenizer for inference.
|
|
|
|
|
12 |
"""
|
13 |
try:
|
|
|
14 |
if path not in sys.path:
|
15 |
sys.path.append(path)
|
16 |
|
17 |
+
# Import from modelscope instead of transformers
|
18 |
+
from modelscope import AutoConfig, AutoModel, AutoTokenizer
|
19 |
|
20 |
print(f"Loading model from {path}")
|
21 |
|
22 |
+
# Load config first
|
23 |
+
self.config = AutoConfig.from_pretrained(path, trust_remote_code=True)
|
24 |
+
|
25 |
# Load tokenizer
|
26 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
27 |
path,
|
28 |
trust_remote_code=True
|
29 |
)
|
30 |
|
31 |
+
# Load model with correct parameters
|
32 |
+
self.model = AutoModel.from_pretrained(
|
33 |
path,
|
34 |
+
attn_implementation='sdpa', # or 'flash_attention_2'
|
35 |
+
torch_dtype=torch.bfloat16,
|
36 |
device_map="auto",
|
37 |
trust_remote_code=True
|
38 |
)
|
39 |
|
40 |
+
# Initialize processor
|
41 |
+
self.processor = self.model.init_processor(self.tokenizer)
|
42 |
+
|
43 |
# Set model to evaluation mode
|
44 |
self.model.eval()
|
45 |
|
|
|
61 |
image_data = data.get("image", None)
|
62 |
max_new_tokens = data.get("max_new_tokens", 100)
|
63 |
|
|
|
64 |
if not image_data:
|
65 |
return {"error": "No image provided"}
|
66 |
|
|
|
78 |
return {"error": f"Error processing image: {str(e)}"}
|
79 |
|
80 |
try:
|
81 |
+
# Prepare messages following mPLUG-Owl3 format
|
82 |
messages = [
|
83 |
+
{"role": "user", "content": f"<|image|>\n{prompt}"},
|
84 |
{"role": "assistant", "content": ""}
|
85 |
]
|
86 |
|
87 |
+
# Process inputs using the processor
|
88 |
+
model_inputs = self.processor(messages, images=[image], videos=None)
|
|
|
89 |
|
90 |
+
# Move inputs to the correct device
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
device = next(self.model.parameters()).device
|
92 |
model_inputs = model_inputs.to(device)
|
93 |
|
94 |
+
# Add required parameters
|
95 |
model_inputs.update({
|
96 |
'tokenizer': self.tokenizer,
|
97 |
'max_new_tokens': max_new_tokens,
|
98 |
+
'decode_text': True
|
99 |
})
|
100 |
|
101 |
# Generate output
|