open-edge-platform · sovrasov · Nov 26, 2024 · Nov 20, 2024 · Nov 22, 2024 · Nov 22, 2024
@@ -12,15 +12,12 @@ A single input image of shape (H, W, 3) where H and W are the height and width o
 
 ### Outputs
 
-Detection model outputs a list of detection objects (i.e `list[Detection]`) wrapped in `DetectionResult`, each object containing the following attributes:
+Detection model outputs a `DetectionResult` objects containing the following attributes:
 
-- `score` (float) - Confidence score of the object.
-- `id` (int) - Class label of the object.
-- `str_label` (str) - String label of the object.
-- `xmin` (int) - X-coordinate of the top-left corner of the bounding box.
-- `ymin` (int) - Y-coordinate of the top-left corner of the bounding box.
-- `xmax` (int) - X-coordinate of the bottom-right corner of the bounding box.
-- `ymax` (int) - Y-coordinate of the bottom-right corner of the bounding box.
+- `boxes` (np.ndarray) - Bounding boxes of the detected objects. Each in format of x1, y1, x2 y2.
+- `scores` (np.ndarray) - Confidence scores of the detected objects.
+- `labels` (np.ndarray) - Class labels of the detected objects.
+- `label_names` (list[str]) - List of class names of the detected objects.
 
 ## Example
 
@@ -34,11 +31,9 @@ model = SSD.create_model("model.xml")
 # Forward pass
 predictions = model(image)
 
-# Iterate over the segmented objects
-for pred_obj in predictions.objects:
-    pred_score = pred_obj.score
-    label_id = pred_obj.id
-    bbox = [pred_obj.xmin, pred_obj.ymin, pred_obj.xmax, pred_obj.ymax]
+# Iterate over detection result
+for box, score, label in zip(predictions.boxes, predictions.scores, predictions.labels):
+    print(f"Box: {box}, Score: {score}, Label: {label}")
 ```
 
 ```{eval-rst}

@@ -16,7 +16,6 @@
     ClassificationResult,
     Contour,
     DetectedKeypoints,
-    Detection,
     DetectionResult,
     ImageResultWithSoftPrediction,
     InstanceSegmentationResult,
@@ -90,7 +89,6 @@
     "SAMImageEncoder",
     "ClassificationResult",
     "Prompt",
-    "Detection",
     "DetectionResult",
     "DetectedKeypoints",
     "classification_models",

@@ -3,8 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 
+import numpy as np
+
 from .image_model import ImageModel
-from .result_types import Detection
+from .result_types import DetectionResult
 from .types import ListValue, NumericalValue, StringValue
 from .utils import load_labels
 
@@ -65,18 +67,15 @@ def parameters(cls):
 
         return parameters
 
-    def _resize_detections(self, detections: list[Detection], meta):
+    def _resize_detections(self, detection_result: DetectionResult, meta: dict):
         """Resizes detection bounding boxes according to initial image shape.
 
         It implements image resizing depending on the set `resize_type`(see `ImageModel` for details).
         Next, it applies bounding boxes clipping.
 
         Args:
-            detections (List[Detection]): list of detections with coordinates in normalized form
+            detection_result (DetectionList): detection result with coordinates in normalized form
             meta (dict): the input metadata obtained from `preprocess` method
-
-        Returns:
-            - list of detections with resized and clipped coordinates to fit the initial image
         """
         input_img_height, input_img_widht = meta["original_shape"][:2]
         inverted_scale_x = input_img_widht / self.w
@@ -92,63 +91,35 @@ def _resize_detections(self, detections: list[Detection], meta):
                 pad_left = (self.w - round(input_img_widht / inverted_scale_x)) // 2
                 pad_top = (self.h - round(input_img_height / inverted_scale_y)) // 2
 
-        def _clamp_and_round(val, min_value, max_value):
-            return round(max(min_value, min(max_value, val)))
+        boxes = detection_result.bboxes
+        boxes[:, 0::2] = (boxes[:, 0::2] * self.w - pad_left) * inverted_scale_x
+        boxes[:, 1::2] = (boxes[:, 1::2] * self.h - pad_top) * inverted_scale_y
+        np.round(boxes, out=boxes)
+        boxes[:, 0::2] = np.clip(boxes[:, 0::2], 0, input_img_widht)
+        boxes[:, 1::2] = np.clip(boxes[:, 1::2], 0, input_img_height)
+        detection_result.bboxes = boxes.astype(np.int32)
 
-        for detection in detections:
-            detection.xmin = _clamp_and_round(
-                (detection.xmin * self.w - pad_left) * inverted_scale_x,
-                0,
-                input_img_widht,
-            )
-            detection.ymin = _clamp_and_round(
-                (detection.ymin * self.h - pad_top) * inverted_scale_y,
-                0,
-                input_img_height,
-            )
-            detection.xmax = _clamp_and_round(
-                (detection.xmax * self.w - pad_left) * inverted_scale_x,
-                0,
-                input_img_widht,
-            )
-            detection.ymax = _clamp_and_round(
-                (detection.ymax * self.h - pad_top) * inverted_scale_y,
-                0,
-                input_img_height,
-            )
-
-        return detections
-
-    def _filter_detections(self, detections: list[Detection], box_area_threshold=0.0):
+    def _filter_detections(self, detection_result: DetectionResult, box_area_threshold=0.0):
         """Filters detections by confidence threshold and box size threshold
 
         Args:
-            detections (List[Detection]): list of detections with coordinates in normalized form
+            detection_result (DetectionResult): DetectionResult object with coordinates in normalized form
             box_area_threshold (float): minimal area of the bounding to be considered
 
         Returns:
             - list of detections with confidence above the threshold
         """
-        filtered_detections = []
-        for detection in detections:
-            if (
-                detection.score < self.confidence_threshold
-                or (detection.xmax - detection.xmin) * (detection.ymax - detection.ymin) < box_area_threshold
-            ):
-                continue
-            filtered_detections.append(detection)
-
-        return filtered_detections
-
-    def _add_label_names(self, detections: list[Detection]):
+        keep = (detection_result.get_obj_sizes() > box_area_threshold) & (
+            detection_result.scores > self.confidence_threshold
+        )
+        detection_result.bboxes = detection_result.bboxes[keep]
+        detection_result.labels = detection_result.labels[keep]
+        detection_result.scores = detection_result.scores[keep]
+
+    def _add_label_names(self, detection_result: DetectionResult) -> None:
         """Adds labels names to detections if they are available
 
         Args:
-            detections (List[Detection]): list of detections with coordinates in normalized form
-
-        Returns:
-            - list of detections with label strings
+            detection_result (List[Detection]): list of detections with coordinates in normalized form
         """
-        for detection in detections:
-            detection.str_label = self.get_label_name(detection.id)
-        return detections
+        detection_result.label_names = [self.get_label_name(label_idx) for label_idx in detection_result.labels]
@@ -10,7 +10,7 @@
 import numpy as np
 
 from .image_model import ImageModel
-from .result_types import DetectedKeypoints, Detection
+from .result_types import DetectedKeypoints, DetectionResult
 from .types import ListValue
 
 
@@ -77,25 +77,27 @@ def __init__(self, base_model: KeypointDetectionModel) -> None:
     def predict(
         self,
         image: np.ndarray,
-        detections: list[Detection],
+        detection_result: DetectionResult,
     ) -> list[DetectedKeypoints]:
         """Predicts keypoints for the given image and detections.
 
         Args:
             image (np.ndarray): input full-size image
-            detections (list[Detection]): detections located within the given image
+            detection_result (detection_result): detections located within the given image
 
         Returns:
             list[DetectedKeypoints]: per detection keypoints in detection coordinates
         """
         crops = []
-        for det in detections:
-            crops.append(image[det.ymin : det.ymax, det.xmin : det.xmax])
+        for box in detection_result.bboxes:
+            x1, y1, x2, y2 = box
+            crops.append(image[y1:y2, x1:x2])
 
         crops_results = self.predict_crops(crops)
-        for i, det in enumerate(detections):
+        for i, box in enumerate(detection_result.bboxes):
+            x1, y1, x2, y2 = box
             crops_results[i] = DetectedKeypoints(
-                crops_results[i].keypoints + np.array([det.xmin, det.ymin]),
+                crops_results[i].keypoints + np.array([x1, y1]),
                 crops_results[i].scores,
             )
 

@@ -5,7 +5,12 @@
 
 from .anomaly import AnomalyResult
 from .classification import ClassificationResult
-from .detection import Detection, DetectionResult
+from .detection import (
+    BoxesLabelsParser,
+    DetectionResult,
+    MultipleOutputParser,
+    SingleOutputParser,
+)
 from .keypoint import DetectedKeypoints
 from .segmentation import (
     Contour,
@@ -18,13 +23,15 @@
 
 __all__ = [
     "AnomalyResult",
+    "BoxesLabelsParser",
     "ClassificationResult",
     "Contour",
-    "Detection",
     "DetectionResult",
     "DetectedKeypoints",
+    "MultipleOutputParser",
     "SegmentedObject",
     "SegmentedObjectWithRects",
+    "SingleOutputParser",
     "ImageResultWithSoftPrediction",
     "InstanceSegmentationResult",
     "PredictedMask",