roboflow · SkalskiP · May 27, 2024 · Oct 13, 2023 · Oct 18, 2023 · Oct 18, 2023
@@ -35,7 +35,7 @@
     DetectionDataset,
 )
 from supervision.detection.annotate import BoxAnnotator
-from supervision.detection.core import Detections
+from supervision.detection.core import Detections, merge_object_detection_pair
 from supervision.detection.line_zone import LineZone, LineZoneAnnotator
 from supervision.detection.tools.csv_sink import CSVSink
 from supervision.detection.tools.inference_slicer import InferenceSlicer
@@ -44,6 +44,8 @@
 from supervision.detection.tools.smoother import DetectionsSmoother
 from supervision.detection.utils import (
     box_iou_batch,
+    box_non_max_merge,
+    box_non_max_merge_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     clip_boxes,

@@ -8,6 +8,8 @@
 
 from supervision.config import CLASS_NAME_DATA_FIELD, ORIENTED_BOX_COORDINATES
 from supervision.detection.utils import (
+    box_non_max_merge,
+    box_non_max_merge_batch,
     box_non_max_suppression,
     calculate_masks_centroids,
     extract_ultralytics_masks,
@@ -1066,6 +1068,33 @@ def __setitem__(self, key: str, value: Union[np.ndarray, List]):
 
         self.data[key] = value
 
+    def _set_at_index(self, index: int, other: Detections):
+        """
+        Set detection values (xyxy, confidence, ...) at a specified index
+        to those of another Detections object, at index 0.
+
+        Args:
+            index (int): The index in current detection, where values
+                will be set.
+            other (Detections): Detections object with exactly one element
+                to set the values from.
+
+        Raises:
+            ValueError: If `other` is not made of exactly one element.
+        """
+        if len(other) != 1:
+            raise ValueError("Detection to set from must have exactly one element.")
+
+        self.xyxy[index] = other.xyxy[0]
+        if self.mask is not None and other.mask is not None:
+            self.mask[index] = other.mask[0]
+        if self.confidence is not None and other.confidence is not None:
+            self.confidence[index] = other.confidence[0]
+        if self.class_id is not None and other.class_id is not None:
+            self.class_id[index] = other.class_id[0]
+        if self.tracker_id is not None and other.tracker_id is not None:
+            self.tracker_id[index] = other.tracker_id[0]
+
     @property
     def area(self) -> np.ndarray:
         """
@@ -1150,3 +1179,154 @@ def with_nms(
             )
 
         return self[indices]
+
+    def with_nmm(
+        self, threshold: float = 0.5, class_agnostic: bool = False
+    ) -> Detections:
+        """
+        Perform non-maximum merging on the current set of object detections.
+
+        Args:
+            threshold (float, optional): The intersection-over-union threshold
+                to use for non-maximum merging. Defaults to 0.5.
+            class_agnostic (bool, optional): Whether to perform class-agnostic
+                non-maximum merging. If True, the class_id of each detection
+                will be ignored. Defaults to False.
+
+        Returns:
+            Detections: A new Detections object containing the subset of detections
+                after non-maximum merging.
+
+        Raises:
+            AssertionError: If `confidence` is None and class_agnostic is False.
+                If `class_id` is None and class_agnostic is False.
+        """
+        if len(self) == 0:
+            return self
+
+        assert 0.0 <= threshold <= 1.0, "Threshold must be between 0 and 1."
+
+        assert (
+            self.confidence is not None
+        ), "Detections confidence must be given for NMM to be executed."
+
+        if class_agnostic:
+            predictions = np.hstack((self.xyxy, self.confidence.reshape(-1, 1)))
+            keep_to_merge_list = box_non_max_merge(predictions, threshold)
+        else:
+            assert self.class_id is not None, (
+                "Detections class_id must be given for NMS to be executed. If you"
+                " intended to perform class agnostic NMM set class_agnostic=True."
+            )
+            predictions = np.hstack(
+                (
+                    self.xyxy,
+                    self.confidence.reshape(-1, 1),
+                    self.class_id.reshape(-1, 1),
+                )
+            )
+            keep_to_merge_list = box_non_max_merge_batch(predictions, threshold)
+
+        result = []
+        for keep_ind, merge_ind_list in keep_to_merge_list.items():
+            for merge_ind in merge_ind_list:
+                merged_detection = merge_object_detection_pair(
+                    self[keep_ind], self[merge_ind]
+                )
+                self._set_at_index(keep_ind, merged_detection)
+            result.append(self[keep_ind])
+
+        return Detections.merge(result)
+
+
+def merge_object_detection_pair(det1: Detections, det2: Detections) -> Detections:
+    """
+    Merges two Detections object into a single Detections object.
+    Assumes each Detections contains exactly one object.
+
+    A `winning` detection is determined based on the confidence score of the two
+    input detections. This winning detection is then used to specify which
+    `class_id`, `tracker_id`, and `data` to include in the merged Detections object.
+
+    The resulting `confidence` of the merged object is calculated by the weighted
+    contribution of ea detection to the merged object.
+    The bounding boxes and masks of the two input detections are merged into a
+    single bounding box and mask, respectively.
+
+    Args:
+        det1 (Detections):
+            The first Detections object
+        det2 (Detections):
+            The second Detections object
+
+    Returns:
+        Detections: A new Detections object, with merged attributes.
+
+    Raises:
+        ValueError: If the input Detections objects do not have exactly 1 detected
+            object.
+
+    Example:
+        ```python
+        import cv2
+        import supervision as sv
+        from inference import get_model
+
+        image = cv2.imread(<SOURCE_IMAGE_PATH>)
+        model = get_model(model_id="yolov8s-640")
+
+        result = model.infer(image)[0]
+        detections = sv.Detections.from_inference(result)
+
+        merged_detections = merge_object_detection_pair(
+            detections[0], detections[1])
+        ```
+    """
+    if len(det1) != 1 or len(det2) != 1:
+        raise ValueError("Both Detections should have exactly 1 detected object.")
+
+    if det2.confidence is None:
+        winning_det = det1
+    elif det1.confidence is None:
+        winning_det = det2
+    elif det1.confidence[0] >= det2.confidence[0]:
+        winning_det = det1
+    else:
+        winning_det = det2
+
+    area_det1 = (det1.xyxy[0][2] - det1.xyxy[0][0]) * (
+        det1.xyxy[0][3] - det1.xyxy[0][1]
+    )
+    area_det2 = (det2.xyxy[0][2] - det2.xyxy[0][0]) * (
+        det2.xyxy[0][3] - det2.xyxy[0][1]
+    )
+
+    merged_x1, merged_y1 = np.minimum(det1.xyxy[0][:2], det2.xyxy[0][:2])
+    merged_x2, merged_y2 = np.maximum(det1.xyxy[0][2:], det2.xyxy[0][2:])
+    merged_xy = np.array([[merged_x1, merged_y1, merged_x2, merged_y2]])
+
+    if det2.mask is None or det1.mask is None:
+        merged_mask = winning_det.mask
+    else:
+        merged_mask = np.logical_or(det1.mask, det2.mask)
+
+    if det1.confidence is None or det2.confidence is None:
+        merged_confidence = winning_det.confidence
+    else:
+        merged_confidence = (
+            area_det1 * det1.confidence[0] + area_det2 * det2.confidence[0]
+        ) / (area_det1 + area_det2)
+        merged_confidence = np.array([merged_confidence])
+
+    winning_class_id = winning_det.class_id
+    winning_tracker_id = winning_det.tracker_id
+    winning_data = winning_det.data
+
+    return Detections(
+        xyxy=merged_xy,
+        mask=merged_mask,
+        confidence=merged_confidence,
+        class_id=winning_class_id,
+        tracker_id=winning_tracker_id,
+        data=winning_data,
+    )