Skip to content

Commit da6f46e

Browse files
authored
feat: preprocessing in python for NPU (#414)
* fix * reshape inputs * handle npu anomaly resize
1 parent 228920b commit da6f46e

File tree

6 files changed

+182
-48
lines changed

6 files changed

+182
-48
lines changed

src/model_api/adapters/onnx_adapter.py

Lines changed: 13 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
#
2-
# Copyright (C) 2020-2024 Intel Corporation
2+
# Copyright (C) 2020-2025 Intel Corporation
33
# SPDX-License-Identifier: Apache-2.0
44
#
55

66
from __future__ import annotations
77

88
import sys
9-
from functools import partial, reduce
109
from typing import Any, Callable
1110

1211
import numpy as np
1312

14-
from .utils import INTERPOLATION_TYPES, RESIZE_TYPES, InputTransform
13+
from .utils import setup_python_preprocessing_pipeline
1514

1615
try:
1716
import onnx
@@ -145,30 +144,17 @@ def embed_preprocessing(
145144
"""
146145
Adds external preprocessing steps done before ONNX model execution.
147146
"""
148-
preproc_funcs = [np.squeeze]
149-
if resize_mode != "crop":
150-
if resize_mode == "fit_to_window_letterbox":
151-
resize_fn = partial(
152-
RESIZE_TYPES[resize_mode],
153-
size=target_shape,
154-
interpolation=INTERPOLATION_TYPES[interpolation_mode],
155-
pad_value=pad_value,
156-
)
157-
else:
158-
resize_fn = partial(
159-
RESIZE_TYPES[resize_mode],
160-
size=target_shape,
161-
interpolation=INTERPOLATION_TYPES[interpolation_mode],
162-
)
163-
else:
164-
resize_fn = partial(RESIZE_TYPES[resize_mode], size=target_shape)
165-
preproc_funcs.append(resize_fn)
166-
input_transform = InputTransform(brg2rgb, mean, scale)
167-
preproc_funcs.extend((input_transform.__call__, partial(change_layout, layout=layout)))
168-
169-
self.preprocessor = reduce(
170-
lambda f, g: lambda x: f(g(x)),
171-
reversed(preproc_funcs),
147+
self.preprocessor = setup_python_preprocessing_pipeline(
148+
layout=layout,
149+
resize_mode=resize_mode,
150+
interpolation_mode=interpolation_mode,
151+
target_shape=target_shape,
152+
pad_value=pad_value,
153+
dtype=dtype,
154+
brg2rgb=brg2rgb,
155+
mean=mean,
156+
scale=scale,
157+
input_idx=input_idx,
172158
)
173159

174160
def get_model(self):
@@ -227,18 +213,3 @@ def get_shape_from_onnx(onnx_shape):
227213
if isinstance(item, str):
228214
onnx_shape[i] = -1
229215
return tuple(onnx_shape)
230-
231-
232-
def change_layout(image, layout):
233-
"""Changes the input image layout to fit the layout of the model input layer.
234-
235-
Args:
236-
inputs (ndarray): a single image as 3D array in HWC layout
237-
238-
Returns:
239-
- the image with layout aligned with the model layout
240-
"""
241-
if "CHW" in layout:
242-
image = image.transpose((2, 0, 1)) # HWC->CHW
243-
image = image.reshape((1, *image.shape))
244-
return image

src/model_api/adapters/openvino_adapter.py

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
resize_image,
4242
resize_image_letterbox,
4343
resize_image_with_aspect,
44+
setup_python_preprocessing_pipeline,
4445
)
4546

4647

@@ -143,6 +144,8 @@ def __init__(
143144
)
144145
self.is_onnx_file = False
145146
self.onnx_metadata = {}
147+
self.preprocessor = lambda arg: arg
148+
self.use_python_preprocessing = False
146149

147150
if isinstance(self.model_path, (str, Path)):
148151
if Path(self.model_path).suffix == ".onnx" and weights_path:
@@ -175,7 +178,52 @@ def __init__(
175178
msg = "Model must be bytes or a file"
176179
raise RuntimeError(msg)
177180

181+
def reshape_dynamic_inputs(self) -> None:
182+
"""For NPU devices, set static shape if the model has dynamic shapes"""
183+
for input in self.model.inputs:
184+
if input.partial_shape.is_dynamic:
185+
input_name = input.get_any_name()
186+
shape = get_input_shape(input)
187+
static_shape = []
188+
189+
# Detect likely layout for 4D shapes
190+
is_nchw = False
191+
if len(shape) == 4 and not isinstance(shape[1], tuple) and shape[1] != -1 and shape[1] <= 4:
192+
is_nchw = True
193+
194+
for i, dim in enumerate(shape):
195+
if isinstance(dim, tuple):
196+
static_shape.append((dim[0] + dim[1]) // 2)
197+
elif dim == -1:
198+
if i == 0:
199+
static_shape.append(1)
200+
elif len(shape) == 4:
201+
if is_nchw:
202+
if i == 1:
203+
static_shape.append(3)
204+
else:
205+
static_shape.append(224)
206+
else:
207+
if i == 3:
208+
static_shape.append(3)
209+
else:
210+
static_shape.append(224)
211+
else:
212+
static_shape.append(1)
213+
else:
214+
static_shape.append(dim)
215+
216+
log.info(
217+
f"NPU: Reshaping input '{input_name}' from dynamic {shape} to static {static_shape}",
218+
)
219+
self.reshape_model({input_name: static_shape})
220+
178221
def load_model(self) -> None:
222+
"""Loads the model to the device specified in the constructor"""
223+
devices = parse_devices(self.device)
224+
if any("NPU" in dev.upper() for dev in devices) and self.model.is_dynamic():
225+
self.reshape_dynamic_inputs()
226+
179227
self.compiled_model = self.core.compile_model(
180228
self.model,
181229
self.device,
@@ -280,11 +328,17 @@ def copy_raw_result(self, request):
280328
return {key: request.get_tensor(key).data.copy() for key in self.get_output_layers()}
281329

282330
def infer_sync(self, dict_data: dict[str, ndarray]) -> dict[str, ndarray]:
331+
if self.use_python_preprocessing:
332+
for key in dict_data:
333+
dict_data[key] = self.preprocessor(dict_data[key])
283334
self.infer_request = self.async_queue[self.async_queue.get_idle_request_id()]
284335
self.infer_request.infer(dict_data)
285336
return self.get_raw_result(self.infer_request)
286337

287338
def infer_async(self, dict_data, callback_data) -> None:
339+
if self.use_python_preprocessing:
340+
for key in dict_data:
341+
dict_data[key] = self.preprocessor(dict_data[key])
288342
self.async_queue.start_async(dict_data, callback_data)
289343

290344
def set_callback(self, callback_fn: Callable):
@@ -347,8 +401,32 @@ def embed_preprocessing(
347401
input_idx: int = 0,
348402
) -> None:
349403
"""
350-
Embeds OpenVINO PrePostProcessor module into the model.
404+
Embeds preprocessing into the model, or sets up Python preprocessing for NPU devices.
351405
"""
406+
# Check if we should use Python preprocessing for NPU devices
407+
devices = parse_devices(self.device)
408+
if any("NPU" in dev.upper() for dev in devices):
409+
self.preprocessor = setup_python_preprocessing_pipeline(
410+
layout=layout,
411+
resize_mode=resize_mode,
412+
interpolation_mode=interpolation_mode,
413+
target_shape=target_shape,
414+
pad_value=pad_value,
415+
dtype=dtype,
416+
brg2rgb=brg2rgb,
417+
mean=mean,
418+
scale=scale,
419+
input_idx=input_idx,
420+
)
421+
self.use_python_preprocessing = True
422+
input_name = self.model.inputs[input_idx].get_any_name()
423+
if layout == "NCHW":
424+
static_shape = [1, 3, target_shape[1], target_shape[0]]
425+
else:
426+
static_shape = [1, target_shape[1], target_shape[0], 3]
427+
self.reshape_model({input_name: static_shape})
428+
return
429+
352430
ppp = PrePostProcessor(self.model)
353431

354432
# Change the input type to the 8-bit image

src/model_api/adapters/utils.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,81 @@ def crop_resize_ocv(image: np.ndarray, size: tuple[int, int]) -> np.ndarray:
517517
return cv2.resize(cropped_frame, size)
518518

519519

520+
def setup_python_preprocessing_pipeline(
521+
layout: str,
522+
resize_mode: str,
523+
interpolation_mode: str,
524+
target_shape: tuple[int, ...],
525+
pad_value: int,
526+
dtype: type = int,
527+
brg2rgb: bool = False,
528+
mean: list[Any] | None = None,
529+
scale: list[Any] | None = None,
530+
input_idx: int = 0,
531+
):
532+
"""
533+
Sets up a Python preprocessing pipeline for model adapters.
534+
535+
Args:
536+
layout: Target layout for the input (e.g., "NCHW", "NHWC")
537+
resize_mode: Type of resizing ("crop", "standard", "fit_to_window", "fit_to_window_letterbox")
538+
interpolation_mode: Interpolation method ("LINEAR", "CUBIC", "NEAREST")
539+
target_shape: Target shape for resizing
540+
pad_value: Padding value for letterbox resizing
541+
dtype: Data type for preprocessing
542+
brg2rgb: Whether to convert BGR to RGB
543+
mean: Mean values for normalization
544+
scale: Scale values for normalization
545+
input_idx: Input index (unused but kept for compatibility)
546+
547+
Returns:
548+
Callable: A preprocessing function that can be applied to input data
549+
"""
550+
from functools import partial, reduce
551+
552+
preproc_funcs = [np.squeeze]
553+
if resize_mode != "crop":
554+
if resize_mode == "fit_to_window_letterbox":
555+
resize_fn = partial(
556+
RESIZE_TYPES[resize_mode],
557+
size=target_shape,
558+
interpolation=INTERPOLATION_TYPES[interpolation_mode],
559+
pad_value=pad_value,
560+
)
561+
else:
562+
resize_fn = partial(
563+
RESIZE_TYPES[resize_mode],
564+
size=target_shape,
565+
interpolation=INTERPOLATION_TYPES[interpolation_mode],
566+
)
567+
else:
568+
resize_fn = partial(RESIZE_TYPES[resize_mode], size=target_shape)
569+
preproc_funcs.append(resize_fn)
570+
input_transform = InputTransform(brg2rgb, mean, scale)
571+
preproc_funcs.extend((input_transform.__call__, partial(change_layout, layout=layout)))
572+
573+
return reduce(
574+
lambda f, g: lambda x: f(g(x)),
575+
reversed(preproc_funcs),
576+
)
577+
578+
579+
def change_layout(image, layout):
580+
"""Changes the input image layout to fit the layout of the model input layer.
581+
582+
Args:
583+
image (ndarray): a single image as 3D array in HWC layout
584+
layout (str): target layout
585+
586+
Returns:
587+
ndarray: the image with layout aligned with the model layout
588+
"""
589+
if "CHW" in layout:
590+
image = image.transpose((2, 0, 1)) # HWC->CHW
591+
image = image.reshape((1, *image.shape))
592+
return image
593+
594+
520595
RESIZE_TYPES: dict[str, Callable] = {
521596
"crop": crop_resize_ocv,
522597
"standard": resize_image_ocv,

src/model_api/models/anomaly.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
8080
"""
8181
original_shape = inputs.shape
8282

83+
if (
84+
self._is_dynamic
85+
and getattr(self.inference_adapter, "device", "") == "NPU"
86+
and hasattr(self.inference_adapter, "compiled_model")
87+
):
88+
_, self.c, self.h, self.w = self.inference_adapter.compiled_model.inputs[0].get_shape()
89+
self._is_dynamic = False
90+
8391
if self._is_dynamic:
8492
h, w, c = inputs.shape
8593
resized_shape = (w, h, c)
@@ -98,11 +106,13 @@ def preprocess(self, inputs: np.ndarray) -> list[dict]:
98106
if self.embedded_processing:
99107
processed_image = inputs[None]
100108
else:
109+
# Resize image to expected model input dimensions
110+
resized_image = self.resize(inputs, (self.w, self.h))
101111
# Convert to float32 and normalize for anomalib
102-
if inputs.dtype == np.uint8:
103-
processed_image = inputs.astype(np.float32) / 255.0
112+
if resized_image.dtype == np.uint8:
113+
processed_image = resized_image.astype(np.float32) / 255.0
104114
else:
105-
processed_image = inputs.astype(np.float32)
115+
processed_image = resized_image.astype(np.float32)
106116
processed_image = self._change_layout(processed_image)
107117

108118
return [

src/model_api/tilers/detection.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def _merge_results(self, results: list[dict], shape: tuple[int, int, int]) -> De
112112

113113
merged_vector = np.mean(feature_vectors, axis=0) if feature_vectors else np.ndarray(0)
114114
saliency_map = self._merge_saliency_maps(saliency_maps, shape, tiles_coords) if saliency_maps else np.ndarray(0)
115-
label_names = [self.model.labels[int(label_idx)] for label_idx in detections_array[:, 0]]
115+
label_names = [self.model.get_label_name(int(label_idx)) for label_idx in detections_array[:, 0]]
116116

117117
return DetectionResult(
118118
bboxes=detections_array[:, 2:].astype(np.int32),

src/model_api/tilers/instance_segmentation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def _merge_results(self, results, shape) -> InstanceSegmentationResult:
123123
labels = labels.astype(np.int32)
124124
resized_masks, label_names = [], []
125125
for mask, box, label_idx in zip(masks, bboxes, labels):
126-
label_names.append(self.model.labels[int(label_idx.squeeze())])
126+
label_names.append(self.model.get_label_name(int(label_idx.squeeze())))
127127
resized_masks.append(_segm_postprocess(box, mask, *shape[:-1]))
128128

129129
resized_masks = np.stack(resized_masks) if resized_masks else masks

0 commit comments

Comments
 (0)