""" ONNX Inference implementation for Kaloscope LSNet model """ import numpy as np from timm.data import resolve_data_config from timm.data.transforms_factory import create_transform from timm.models import create_model # Import to register lsnet models try: import lsnet.lsnet_artist # noqa: F401 except ImportError as e: print(f"Error importing lsnet: {e}") raise class ONNXInference: def __init__(self, model_path, model_arch="lsnet_xl_artist", device="cpu"): """ Initialize ONNX inference session Tries CUDA GPU execution when selected and available, and OpenVINO for CPU with CPUExecutionProvider as last fallback Args: model_path: Path to ONNX model file model_arch: Model architecture name (for preprocessing) device: Device to use ('cpu' or 'cuda') """ self.model_path = model_path self.model_arch = model_arch self.device = device self.use_openvino = False # Hardcoded input size mapping - based on actual model definitions self.input_size = self._get_input_size(model_arch) print(f"Using input size: {self.input_size} for model {model_arch}") if device == "cuda": # Try CUDA first for GPU try: import onnxruntime as ort # Set session options to suppress warnings sess_options = ort.SessionOptions() sess_options.log_severity_level = 3 # 0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] self.session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers) self.execution_provider = self.session.get_providers()[0] # Check if CUDA is actually being used if self.execution_provider == "CUDAExecutionProvider": print(f"Using ONNX Runtime with {self.execution_provider}") # Get transform with correct input size self.transform = self._get_transform() return else: # CUDA failed, fall through to CPU logic print("CUDA not available in ONNX Runtime, falling back to CPU options") except Exception as e: print(f"ONNX Runtime CUDA initialization failed: {e}, falling back to CPU options") # For CPU or if CUDA failed, prefer OpenVINO try: import openvino as ov # error here on purpose # raise ImportError("aaa") core = ov.Core() self.model = core.read_model(model_path) self.session = core.compile_model(self.model, "CPU") self.execution_provider = "CPU – OpenVINO™" self.use_openvino = True print("Using OpenVINO runtime for inference on CPU") except ImportError: print("OpenVINO not available, falling back to ONNX Runtime CPU") self._init_onnx_runtime_cpu(model_path) except Exception as e: print(f"OpenVINO initialization failed: {e}, falling back to ONNX Runtime CPU") self._init_onnx_runtime_cpu(model_path) # Get transform with correct input size self.transform = self._get_transform() def _init_onnx_runtime_cpu(self, model_path): """Initialize ONNX Runtime with CPU as fallback""" import onnxruntime as ort # Set session options to suppress warnings sess_options = ort.SessionOptions() sess_options.log_severity_level = 3 # Only show errors and fatal messages providers = ["CPUExecutionProvider"] self.session = ort.InferenceSession(model_path, sess_options=sess_options, providers=providers) self.execution_provider = self.session.get_providers()[0] print(f"Using ONNX Runtime with {self.execution_provider}") def _get_input_size(self, model_arch): """Get input size based on model architecture - hardcoded to match actual model definitions""" if model_arch == 'lsnet_xl_artist_448': return 448 else: # All other artist models use 224 return 224 def _get_transform(self): """Create preprocessing transform with correct input size""" # Create a dummy model to get the base config model = create_model(self.model_arch, pretrained=False) model.eval() # Override the input size with our hardcoded value config = resolve_data_config({'input_size': (3, self.input_size, self.input_size)}, model=model) transform = create_transform(**config) print(f"Created ONNX transform with input size: {self.input_size}") return transform def preprocess(self, image): """ Preprocess PIL image for ONNX inference Args: image: PIL Image Returns: numpy array ready for ONNX inference """ image = image.convert("RGB") tensor = self.transform(image) print(f"Preprocessed image to tensor shape: {tensor.shape}") return tensor.unsqueeze(0).cpu().numpy() def predict(self, image, top_k=5, threshold=0.0): """ Run inference on image Args: image: PIL Image top_k: Number of top predictions to return threshold: Minimum confidence threshold Returns: logits: Raw model output """ input_tensor = self.preprocess(image) if self.use_openvino: # OpenVINO inference results = self.session(input_tensor) logits = list(results.values())[0][0] else: # ONNX Runtime inference input_name = self.session.get_inputs()[0].name output_name = self.session.get_outputs()[0].name results = self.session.run([output_name], {input_name: input_tensor}) logits = results[0][0] return logits def softmax(x): """Compute softmax values for a set of scores.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum(axis=0)