Skip to content
21 changes: 8 additions & 13 deletions docs/source/python/models/detection_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,12 @@ A single input image of shape (H, W, 3) where H and W are the height and width o

### Outputs

Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes:
Detection model outputs a `DetectionResult` objects containing the following attributes:

- `score` (float) - Confidence score of the object.
- `id` (int) - Class label of the object.
- `str_label` (str) - String label of the object.
- `xmin` (int) - X-coordinate of the top-left corner of the bounding box.
- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box.
- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box.
- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box.
- `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2.
- `scores` (np.ndarray) - Confidence scores of the detected objects.
- `labels` (np.ndarray) - Class labels of the detected objects.
- `label_names` (list[str]) - List of class names of the detected objects.

## Example

Expand All @@ -34,11 +31,9 @@ model = SSD.create_model("model.xml")
# Forward pass
predictions = model(image)

# Iterate over the segmented objects
for pred_obj in predictions.objects:
pred_score = pred_obj.score
label_id = pred_obj.id
bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax]
# Iterate over detection result
for box, score, label in zip(predictions.boxes, predictions.scores, predictions.labels):
print(f"Box: {box}, Score: {score}, Label: {label}")
```

```{eval-rst}
Expand Down
2 changes: 0 additions & 2 deletions model_api/python/model_api/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
ClassificationResult,
Contour,
DetectedKeypoints,
Detection,
DetectionResult,
ImageResultWithSoftPrediction,
InstanceSegmentationResult,
Expand Down Expand Up @@ -90,7 +89,6 @@
"SAMImageEncoder",
"ClassificationResult",
"Prompt",
"Detection",
"DetectionResult",
"DetectedKeypoints",
"classification_models",
Expand Down
77 changes: 24 additions & 53 deletions model_api/python/model_api/models/detection_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
# SPDX-License-Identifier: Apache-2.0
#

import numpy as np

from .image_model import ImageModel
from .result_types import Detection
from .result_types import DetectionResult
from .types import ListValue, NumericalValue, StringValue
from .utils import load_labels

Expand Down Expand Up @@ -65,18 +67,15 @@ def parameters(cls):

return parameters

def _resize_detections(self, detections: list[Detection], meta):
def _resize_detections(self, detection_result: DetectionResult, meta: dict):
"""Resizes detection bounding boxes according to initial image shape.

It implements image resizing depending on the set `resize_type`(see `ImageModel` for details).
Next, it applies bounding boxes clipping.

Args:
detections (List[Detection]): list of detections with coordinates in normalized form
detection_result (DetectionList): detection result with coordinates in normalized form
meta (dict): the input metadata obtained from `preprocess` method

Returns:
- list of detections with resized and clipped coordinates to fit the initial image
"""
input_img_height, input_img_widht = meta["original_shape"][:2]
inverted_scale_x = input_img_widht / self.w
Expand All @@ -92,63 +91,35 @@ def _resize_detections(self, detections: list[Detection], meta):
pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2

def _clamp_and_round(val, min_value, max_value):
return round(max(min_value, min(max_value, val)))
boxes = detection_result.bboxes
boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
np.round(boxes, out=boxes)
boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
detection_result.bboxes = boxes.astype(np.int32)

for detection in detections:
detection.xmin = _clamp_and_round(
(detection.xmin * self.w - pad_left) * inverted_scale_x,
0,
input_img_widht,
)
detection.ymin = _clamp_and_round(
(detection.ymin * self.h - pad_top) * inverted_scale_y,
0,
input_img_height,
)
detection.xmax = _clamp_and_round(
(detection.xmax * self.w - pad_left) * inverted_scale_x,
0,
input_img_widht,
)
detection.ymax = _clamp_and_round(
(detection.ymax * self.h - pad_top) * inverted_scale_y,
0,
input_img_height,
)

return detections

def _filter_detections(self, detections: list[Detection], box_area_threshold=0.0):
def _filter_detections(self, detection_result: DetectionResult, box_area_threshold=0.0):
"""Filters detections by confidence threshold and box size threshold

Args:
detections (List[Detection]): list of detections with coordinates in normalized form
detection_result (DetectionResult): DetectionResult object with coordinates in normalized form
box_area_threshold (float): minimal area of the bounding to be considered

Returns:
- list of detections with confidence above the threshold
"""
filtered_detections = []
for detection in detections:
if (
detection.score < self.confidence_threshold
or (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin) < box_area_threshold
):
continue
filtered_detections.append(detection)

return filtered_detections

def _add_label_names(self, detections: list[Detection]):
keep = (detection_result.get_obj_sizes() > box_area_threshold) & (
detection_result.scores > self.confidence_threshold
)
detection_result.bboxes = detection_result.bboxes[keep]
detection_result.labels = detection_result.labels[keep]
detection_result.scores = detection_result.scores[keep]

def _add_label_names(self, detection_result: DetectionResult) -> None:
"""Adds labels names to detections if they are available

Args:
detections (List[Detection]): list of detections with coordinates in normalized form

Returns:
- list of detections with label strings
detection_result (List[Detection]): list of detections with coordinates in normalized form
"""
for detection in detections:
detection.str_label = self.get_label_name(detection.id)
return detections
detection_result.label_names = [self.get_label_name(label_idx) for label_idx in detection_result.labels]
16 changes: 9 additions & 7 deletions model_api/python/model_api/models/keypoint_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import numpy as np

from .image_model import ImageModel
from .result_types import DetectedKeypoints, Detection
from .result_types import DetectedKeypoints, DetectionResult
from .types import ListValue


Expand Down Expand Up @@ -77,25 +77,27 @@ def __init__(self, base_model: KeypointDetectionModel) -> None:
def predict(
self,
image: np.ndarray,
detections: list[Detection],
detection_result: DetectionResult,
) -> list[DetectedKeypoints]:
"""Predicts keypoints for the given image and detections.

Args:
image (np.ndarray): input full-size image
detections (list[Detection]): detections located within the given image
detection_result (detection_result): detections located within the given image

Returns:
list[DetectedKeypoints]: per detection keypoints in detection coordinates
"""
crops = []
for det in detections:
crops.append(image[det.ymin : det.ymax, det.xmin : det.xmax])
for box in detection_result.bboxes:
x1, y1, x2, y2 = box
crops.append(image[y1:y2, x1:x2])

crops_results = self.predict_crops(crops)
for i, det in enumerate(detections):
for i, box in enumerate(detection_result.bboxes):
x1, y1, x2, y2 = box
crops_results[i] = DetectedKeypoints(
crops_results[i].keypoints + np.array([det.xmin, det.ymin]),
crops_results[i].keypoints + np.array([x1, y1]),
crops_results[i].scores,
)

Expand Down
11 changes: 9 additions & 2 deletions model_api/python/model_api/models/result_types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

from .anomaly import AnomalyResult
from .classification import ClassificationResult
from .detection import Detection, DetectionResult
from .detection import (
BoxesLabelsParser,
DetectionResult,
MultipleOutputParser,
SingleOutputParser,
)
from .keypoint import DetectedKeypoints
from .segmentation import (
Contour,
Expand All @@ -18,13 +23,15 @@

__all__ = [
"AnomalyResult",
"BoxesLabelsParser",
"ClassificationResult",
"Contour",
"Detection",
"DetectionResult",
"DetectedKeypoints",
"MultipleOutputParser",
"SegmentedObject",
"SegmentedObjectWithRects",
"SingleOutputParser",
"ImageResultWithSoftPrediction",
"InstanceSegmentationResult",
"PredictedMask",
Expand Down
Loading