assentian1970 commited on
Commit
0d2d202
·
verified ·
1 Parent(s): 95dc275

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +19 -39
handler.py CHANGED
@@ -3,40 +3,43 @@ import torch
3
  from PIL import Image
4
  import base64
5
  import io
6
- import os
7
  import sys
8
 
9
  class EndpointHandler:
10
  def __init__(self, path="."):
11
  """
12
  Initialize the model and tokenizer for inference.
13
- Args:
14
- path (str): Path to the model directory
15
  """
16
  try:
17
- # Add the model's directory to the Python path
18
  if path not in sys.path:
19
  sys.path.append(path)
20
 
21
- # Import transformers
22
- from transformers import AutoModelForCausalLM, AutoTokenizer
23
 
24
  print(f"Loading model from {path}")
25
 
 
 
 
26
  # Load tokenizer
27
  self.tokenizer = AutoTokenizer.from_pretrained(
28
  path,
29
  trust_remote_code=True
30
  )
31
 
32
- # Load model
33
- self.model = AutoModelForCausalLM.from_pretrained(
34
  path,
35
- torch_dtype=torch.float16,
 
36
  device_map="auto",
37
  trust_remote_code=True
38
  )
39
 
 
 
 
40
  # Set model to evaluation mode
41
  self.model.eval()
42
 
@@ -58,7 +61,6 @@ class EndpointHandler:
58
  image_data = data.get("image", None)
59
  max_new_tokens = data.get("max_new_tokens", 100)
60
 
61
- # Check if image is provided
62
  if not image_data:
63
  return {"error": "No image provided"}
64
 
@@ -76,46 +78,24 @@ class EndpointHandler:
76
  return {"error": f"Error processing image: {str(e)}"}
77
 
78
  try:
79
- # Prepare messages for the model
80
  messages = [
81
- {"role": "user", "content": f"<|image|> {prompt}"},
82
  {"role": "assistant", "content": ""}
83
  ]
84
 
85
- # For mPLUG-Owl3, the processor is directly in the model
86
- # Let's inspect the model structure to find the processor
87
- print("Model structure:", dir(self.model))
88
 
89
- # Try different ways to access the processor
90
- if hasattr(self.model, "init_processor"):
91
- processor = self.model.init_processor(self.tokenizer)
92
- elif hasattr(self.model, "model") and hasattr(self.model.model, "init_processor"):
93
- processor = self.model.model.init_processor(self.tokenizer)
94
- else:
95
- # Let's try to find the processor in the model's attributes
96
- for attr_name in dir(self.model):
97
- if attr_name.startswith("_"):
98
- continue
99
- attr = getattr(self.model, attr_name)
100
- if hasattr(attr, "init_processor"):
101
- processor = attr.init_processor(self.tokenizer)
102
- print(f"Found processor in {attr_name}")
103
- break
104
- else:
105
- return {"error": "Could not find processor in model"}
106
-
107
- # Process inputs
108
- model_inputs = processor(messages, images=[image], videos=None)
109
-
110
- # Move inputs to the same device as the model
111
  device = next(self.model.parameters()).device
112
  model_inputs = model_inputs.to(device)
113
 
114
- # Add additional parameters
115
  model_inputs.update({
116
  'tokenizer': self.tokenizer,
117
  'max_new_tokens': max_new_tokens,
118
- 'decode_text': True,
119
  })
120
 
121
  # Generate output
 
3
  from PIL import Image
4
  import base64
5
  import io
 
6
  import sys
7
 
8
  class EndpointHandler:
9
  def __init__(self, path="."):
10
  """
11
  Initialize the model and tokenizer for inference.
 
 
12
  """
13
  try:
 
14
  if path not in sys.path:
15
  sys.path.append(path)
16
 
17
+ # Import from modelscope instead of transformers
18
+ from modelscope import AutoConfig, AutoModel, AutoTokenizer
19
 
20
  print(f"Loading model from {path}")
21
 
22
+ # Load config first
23
+ self.config = AutoConfig.from_pretrained(path, trust_remote_code=True)
24
+
25
  # Load tokenizer
26
  self.tokenizer = AutoTokenizer.from_pretrained(
27
  path,
28
  trust_remote_code=True
29
  )
30
 
31
+ # Load model with correct parameters
32
+ self.model = AutoModel.from_pretrained(
33
  path,
34
+ attn_implementation='sdpa', # or 'flash_attention_2'
35
+ torch_dtype=torch.bfloat16,
36
  device_map="auto",
37
  trust_remote_code=True
38
  )
39
 
40
+ # Initialize processor
41
+ self.processor = self.model.init_processor(self.tokenizer)
42
+
43
  # Set model to evaluation mode
44
  self.model.eval()
45
 
 
61
  image_data = data.get("image", None)
62
  max_new_tokens = data.get("max_new_tokens", 100)
63
 
 
64
  if not image_data:
65
  return {"error": "No image provided"}
66
 
 
78
  return {"error": f"Error processing image: {str(e)}"}
79
 
80
  try:
81
+ # Prepare messages following mPLUG-Owl3 format
82
  messages = [
83
+ {"role": "user", "content": f"<|image|>\n{prompt}"},
84
  {"role": "assistant", "content": ""}
85
  ]
86
 
87
+ # Process inputs using the processor
88
+ model_inputs = self.processor(messages, images=[image], videos=None)
 
89
 
90
+ # Move inputs to the correct device
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  device = next(self.model.parameters()).device
92
  model_inputs = model_inputs.to(device)
93
 
94
+ # Add required parameters
95
  model_inputs.update({
96
  'tokenizer': self.tokenizer,
97
  'max_new_tokens': max_new_tokens,
98
+ 'decode_text': True
99
  })
100
 
101
  # Generate output