assentian1970
/

mPLUG-Owl3-7B-241101-API

@@ -3,40 +3,43 @@ import torch
 from PIL import Image
 import base64
 import io
-import os
 import sys
 class EndpointHandler:
     def __init__(self, path="."):
         """
         Initialize the model and tokenizer for inference.
-        Args:
-            path (str): Path to the model directory
         """
         try:
-            # Add the model's directory to the Python path
             if path not in sys.path:
                 sys.path.append(path)
-            # Import transformers
-            from transformers import AutoModelForCausalLM, AutoTokenizer
             print(f"Loading model from {path}")
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(
                 path,
                 trust_remote_code=True
             )
-            # Load model
-            self.model = AutoModelForCausalLM.from_pretrained(
                 path,
-                torch_dtype=torch.float16,
                 device_map="auto",
                 trust_remote_code=True
             )
             # Set model to evaluation mode
             self.model.eval()
@@ -58,7 +61,6 @@ class EndpointHandler:
             image_data = data.get("image", None)
             max_new_tokens = data.get("max_new_tokens", 100)
-            # Check if image is provided
             if not image_data:
                 return {"error": "No image provided"}
@@ -76,46 +78,24 @@ class EndpointHandler:
                 return {"error": f"Error processing image: {str(e)}"}
             try:
-                # Prepare messages for the model
                 messages = [
-                    {"role": "user", "content": f"<|image|> {prompt}"},
                     {"role": "assistant", "content": ""}
                 ]
-                # For mPLUG-Owl3, the processor is directly in the model
-                # Let's inspect the model structure to find the processor
-                print("Model structure:", dir(self.model))
-                # Try different ways to access the processor
-                if hasattr(self.model, "init_processor"):
-                    processor = self.model.init_processor(self.tokenizer)
-                elif hasattr(self.model, "model") and hasattr(self.model.model, "init_processor"):
-                    processor = self.model.model.init_processor(self.tokenizer)
-                else:
-                    # Let's try to find the processor in the model's attributes
-                    for attr_name in dir(self.model):
-                        if attr_name.startswith("_"):
-                            continue
-                        attr = getattr(self.model, attr_name)
-                        if hasattr(attr, "init_processor"):
-                            processor = attr.init_processor(self.tokenizer)
-                            print(f"Found processor in {attr_name}")
-                            break
-                    else:
-                        return {"error": "Could not find processor in model"}
-                # Process inputs
-                model_inputs = processor(messages, images=[image], videos=None)
-                # Move inputs to the same device as the model
                 device = next(self.model.parameters()).device
                 model_inputs = model_inputs.to(device)
-                # Add additional parameters
                 model_inputs.update({
                     'tokenizer': self.tokenizer,
                     'max_new_tokens': max_new_tokens,
-                    'decode_text': True,
                 })
                 # Generate output

 from PIL import Image
 import base64
 import io
 import sys
 class EndpointHandler:
     def __init__(self, path="."):
         """
         Initialize the model and tokenizer for inference.
         """
         try:
             if path not in sys.path:
                 sys.path.append(path)
+            # Import from modelscope instead of transformers
+            from modelscope import AutoConfig, AutoModel, AutoTokenizer
             print(f"Loading model from {path}")
+            # Load config first
+            self.config = AutoConfig.from_pretrained(path, trust_remote_code=True)
             # Load tokenizer
             self.tokenizer = AutoTokenizer.from_pretrained(
                 path,
                 trust_remote_code=True
             )
+            # Load model with correct parameters
+            self.model = AutoModel.from_pretrained(
                 path,
+                attn_implementation='sdpa',  # or 'flash_attention_2'
+                torch_dtype=torch.bfloat16,
                 device_map="auto",
                 trust_remote_code=True
             )
+            # Initialize processor
+            self.processor = self.model.init_processor(self.tokenizer)
             # Set model to evaluation mode
             self.model.eval()
             image_data = data.get("image", None)
             max_new_tokens = data.get("max_new_tokens", 100)
             if not image_data:
                 return {"error": "No image provided"}
                 return {"error": f"Error processing image: {str(e)}"}
             try:
+                # Prepare messages following mPLUG-Owl3 format
                 messages = [
+                    {"role": "user", "content": f"<|image|>\n{prompt}"},
                     {"role": "assistant", "content": ""}
                 ]
+                # Process inputs using the processor
+                model_inputs = self.processor(messages, images=[image], videos=None)
+                # Move inputs to the correct device
                 device = next(self.model.parameters()).device
                 model_inputs = model_inputs.to(device)
+                # Add required parameters
                 model_inputs.update({
                     'tokenizer': self.tokenizer,
                     'max_new_tokens': max_new_tokens,
+                    'decode_text': True
                 })
                 # Generate output