Added diou and ciou losses for bbox regression

jebastin-nadar · facebook-github-bot · commit 0e6702d8c558 · 2021-10-12T03:16:43.000-07:00
Summary: Resolves #1085 references and credits: https://github.com/Zzh-tju/DIoU-pytorch-detectron/blob/master/lib/utils/net.py https://github.com/facebookresearch/fvcore/blob/master/fvcore/nn/giou_loss.py Pull Request resolved: #3481 Test Plan: sandcastle Differential Revision: D31463505 Pulled By: ppwwyyxx fbshipit-source-id: 04d815f979b589b7e3b3e5d9c55eab318762efe8
diff --git a/detectron2/config/defaults.py b/detectron2/config/defaults.py
@@ -219,7 +219,7 @@
 _C.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 256
 # Target fraction of foreground (positive) examples per RPN minibatch
 _C.MODEL.RPN.POSITIVE_FRACTION = 0.5
-# Options are: "smooth_l1", "giou"
+# Options are: "smooth_l1", "giou", "diou", "ciou"
 _C.MODEL.RPN.BBOX_REG_LOSS_TYPE = "smooth_l1"
 _C.MODEL.RPN.BBOX_REG_LOSS_WEIGHT = 1.0
 # Weights on (dx, dy, dw, dh) for normalizing RPN anchor regression targets
@@ -290,7 +290,7 @@
 # C4 don't use head name option
 # Options for non-C4 models: FastRCNNConvFCHead,
 _C.MODEL.ROI_BOX_HEAD.NAME = ""
-# Options are: "smooth_l1", "giou"
+# Options are: "smooth_l1", "giou", "diou", "ciou"
 _C.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_TYPE = "smooth_l1"
 # The final scaling coefficient on the box regression loss, used to balance the magnitude of its
 # gradients with other losses in the model. See also `MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT`.
@@ -455,7 +455,7 @@
 _C.MODEL.RETINANET.FOCAL_LOSS_GAMMA = 2.0
 _C.MODEL.RETINANET.FOCAL_LOSS_ALPHA = 0.25
 _C.MODEL.RETINANET.SMOOTH_L1_LOSS_BETA = 0.1
-# Options are: "smooth_l1", "giou"
+# Options are: "smooth_l1", "giou", "diou", "ciou"
 _C.MODEL.RETINANET.BBOX_REG_LOSS_TYPE = "smooth_l1"
 
 # One of BN, SyncBN, FrozenBN, GN
diff --git a/detectron2/layers/__init__.py b/detectron2/layers/__init__.py
@@ -19,5 +19,6 @@
 )
 from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
 from .aspp import ASPP
+from .losses import ciou_loss, diou_loss
 
 __all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/detectron2/layers/losses.py b/detectron2/layers/losses.py
@@ -0,0 +1,133 @@
+import math
+import torch
+
+
+def diou_loss(
+    boxes1: torch.Tensor,
+    boxes2: torch.Tensor,
+    reduction: str = "none",
+    eps: float = 1e-7,
+) -> torch.Tensor:
+    """
+    Distance Intersection over Union Loss (Zhaohui Zheng et. al)
+    https://arxiv.org/abs/1911.08287
+    Args:
+        boxes1, boxes2 (Tensor): box locations in XYXY format, shape (N, 4) or (4,).
+        reduction: 'none' | 'mean' | 'sum'
+                 'none': No reduction will be applied to the output.
+                 'mean': The output will be averaged.
+                 'sum': The output will be summed.
+        eps (float): small number to prevent division by zero
+    """
+
+    x1, y1, x2, y2 = boxes1.unbind(dim=-1)
+    x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)
+
+    # TODO: use torch._assert_async() when pytorch 1.8 support is dropped
+    assert (x2 >= x1).all(), "bad box: x1 larger than x2"
+    assert (y2 >= y1).all(), "bad box: y1 larger than y2"
+
+    # Intersection keypoints
+    xkis1 = torch.max(x1, x1g)
+    ykis1 = torch.max(y1, y1g)
+    xkis2 = torch.min(x2, x2g)
+    ykis2 = torch.min(y2, y2g)
+
+    intsct = torch.zeros_like(x1)
+    mask = (ykis2 > ykis1) & (xkis2 > xkis1)
+    intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
+    union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps
+    iou = intsct / union
+
+    # smallest enclosing box
+    xc1 = torch.min(x1, x1g)
+    yc1 = torch.min(y1, y1g)
+    xc2 = torch.max(x2, x2g)
+    yc2 = torch.max(y2, y2g)
+    diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps
+
+    # centers of boxes
+    x_p = (x2 + x1) / 2
+    y_p = (y2 + y1) / 2
+    x_g = (x1g + x2g) / 2
+    y_g = (y1g + y2g) / 2
+    distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2)
+
+    # Eqn. (7)
+    loss = 1 - iou + (distance / diag_len)
+    if reduction == "mean":
+        loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum()
+    elif reduction == "sum":
+        loss = loss.sum()
+
+    return loss
+
+
+def ciou_loss(
+    boxes1: torch.Tensor,
+    boxes2: torch.Tensor,
+    reduction: str = "none",
+    eps: float = 1e-7,
+) -> torch.Tensor:
+    """
+    Complete Intersection over Union Loss (Zhaohui Zheng et. al)
+    https://arxiv.org/abs/1911.08287
+    Args:
+        boxes1, boxes2 (Tensor): box locations in XYXY format, shape (N, 4) or (4,).
+        reduction: 'none' | 'mean' | 'sum'
+                 'none': No reduction will be applied to the output.
+                 'mean': The output will be averaged.
+                 'sum': The output will be summed.
+        eps (float): small number to prevent division by zero
+    """
+
+    x1, y1, x2, y2 = boxes1.unbind(dim=-1)
+    x1g, y1g, x2g, y2g = boxes2.unbind(dim=-1)
+
+    # TODO: use torch._assert_async() when pytorch 1.8 support is dropped
+    assert (x2 >= x1).all(), "bad box: x1 larger than x2"
+    assert (y2 >= y1).all(), "bad box: y1 larger than y2"
+
+    # Intersection keypoints
+    xkis1 = torch.max(x1, x1g)
+    ykis1 = torch.max(y1, y1g)
+    xkis2 = torch.min(x2, x2g)
+    ykis2 = torch.min(y2, y2g)
+
+    intsct = torch.zeros_like(x1)
+    mask = (ykis2 > ykis1) & (xkis2 > xkis1)
+    intsct[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
+    union = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsct + eps
+    iou = intsct / union
+
+    # smallest enclosing box
+    xc1 = torch.min(x1, x1g)
+    yc1 = torch.min(y1, y1g)
+    xc2 = torch.max(x2, x2g)
+    yc2 = torch.max(y2, y2g)
+    diag_len = ((xc2 - xc1) ** 2) + ((yc2 - yc1) ** 2) + eps
+
+    # centers of boxes
+    x_p = (x2 + x1) / 2
+    y_p = (y2 + y1) / 2
+    x_g = (x1g + x2g) / 2
+    y_g = (y1g + y2g) / 2
+    distance = ((x_p - x_g) ** 2) + ((y_p - y_g) ** 2)
+
+    # width and height of boxes
+    w_pred = x2 - x1
+    h_pred = y2 - y1
+    w_gt = x2g - x1g
+    h_gt = y2g - y1g
+    v = (4 / (math.pi ** 2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2)
+    with torch.no_grad():
+        alpha = v / (1 - iou + v + eps)
+
+    # Eqn. (10)
+    loss = 1 - iou + (distance / diag_len) + alpha * v
+    if reduction == "mean":
+        loss = loss.mean() if loss.numel() > 0 else 0.0 * loss.sum()
+    elif reduction == "sum":
+        loss = loss.sum()
+
+    return loss
diff --git a/detectron2/modeling/box_regression.py b/detectron2/modeling/box_regression.py
@@ -4,7 +4,7 @@
 import torch
 from fvcore.nn import giou_loss, smooth_l1_loss
 
-from detectron2.layers import cat
+from detectron2.layers import cat, ciou_loss, diou_loss
 from detectron2.structures import Boxes
 
 # Value for clamping large dw and dh predictions. The heuristic is that we clamp
@@ -315,7 +315,8 @@ def _dense_box_regression_loss(
         pred_anchor_deltas: #lvl predictions, each is (N, HixWixA, 4)
         gt_boxes: N ground truth boxes, each has shape (R, 4) (R = sum(Hi * Wi * A))
         fg_mask: the foreground boolean mask of shape (N, R) to compute loss on
-        box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou".
+        box_reg_loss_type (str): Loss type to use. Supported losses: "smooth_l1", "giou",
+            "diou", "ciou".
         smooth_l1_beta (float): beta parameter for the smooth L1 regression loss. Default to
             use L1 loss. Only used when `box_reg_loss_type` is "smooth_l1"
     """
@@ -336,6 +337,20 @@ def _dense_box_regression_loss(
         loss_box_reg = giou_loss(
             torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum"
         )
+    elif box_reg_loss_type == "diou":
+        pred_boxes = [
+            box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1)
+        ]
+        loss_box_reg = diou_loss(
+            torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum"
+        )
+    elif box_reg_loss_type == "ciou":
+        pred_boxes = [
+            box2box_transform.apply_deltas(k, anchors) for k in cat(pred_anchor_deltas, dim=1)
+        ]
+        loss_box_reg = ciou_loss(
+            torch.stack(pred_boxes)[fg_mask], torch.stack(gt_boxes)[fg_mask], reduction="sum"
+        )
     else:
         raise ValueError(f"Invalid dense box regression loss type '{box_reg_loss_type}'")
     return loss_box_reg
diff --git a/detectron2/modeling/meta_arch/retinanet.py b/detectron2/modeling/meta_arch/retinanet.py
@@ -88,7 +88,7 @@ def __init__(
             focal_loss_alpha (float): focal_loss_alpha
             focal_loss_gamma (float): focal_loss_gamma
             smooth_l1_beta (float): smooth_l1_beta
-            box_reg_loss_type (str): Options are "smooth_l1", "giou"
+            box_reg_loss_type (str): Options are "smooth_l1", "giou", "diou", "ciou"
 
             # Inference parameters:
             test_score_thresh (float): Inference cls score threshold, only anchors with
diff --git a/detectron2/modeling/roi_heads/fast_rcnn.py b/detectron2/modeling/roi_heads/fast_rcnn.py
@@ -7,7 +7,15 @@
 from torch.nn import functional as F
 
 from detectron2.config import configurable
-from detectron2.layers import ShapeSpec, batched_nms, cat, cross_entropy, nonzero_tuple
+from detectron2.layers import (
+    ShapeSpec,
+    batched_nms,
+    cat,
+    ciou_loss,
+    cross_entropy,
+    diou_loss,
+    nonzero_tuple,
+)
 from detectron2.modeling.box_regression import Box2BoxTransform
 from detectron2.structures import Boxes, Instances
 from detectron2.utils.events import get_event_storage
@@ -207,7 +215,8 @@ def __init__(
             cls_agnostic_bbox_reg (bool): whether to use class agnostic for bbox regression
             smooth_l1_beta (float): transition point from L1 to L2 loss. Only used if
                 `box_reg_loss_type` is "smooth_l1"
-            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
+            box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou",
+                "diou", "ciou"
             loss_weight (float|dict): weights to use for losses. Can be single float for weighting
                 all losses, or a dict of individual weightings. Valid dict keys are:
                     * "loss_cls": applied to classification loss
@@ -347,6 +356,16 @@ def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes):
                 fg_pred_deltas, proposal_boxes[fg_inds]
             )
             loss_box_reg = giou_loss(fg_pred_boxes, gt_boxes[fg_inds], reduction="sum")
+        elif self.box_reg_loss_type == "diou":
+            fg_pred_boxes = self.box2box_transform.apply_deltas(
+                fg_pred_deltas, proposal_boxes[fg_inds]
+            )
+            loss_box_reg = diou_loss(fg_pred_boxes, gt_boxes[fg_inds], reduction="sum")
+        elif self.box_reg_loss_type == "ciou":
+            fg_pred_boxes = self.box2box_transform.apply_deltas(
+                fg_pred_deltas, proposal_boxes[fg_inds]
+            )
+            loss_box_reg = ciou_loss(fg_pred_boxes, gt_boxes[fg_inds], reduction="sum")
         else:
             raise ValueError(f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
         # The reg loss is normalized using the total number of regions (R), not the number
diff --git a/tests/layers/test_losses.py b/tests/layers/test_losses.py
@@ -0,0 +1,82 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import numpy as np
+import unittest
+import torch
+
+from detectron2.layers import ciou_loss, diou_loss
+
+
+class TestLosses(unittest.TestCase):
+    def test_diou_loss(self):
+        """
+        loss = 1 - iou + d/c
+        where,
+        d = (distance between centers of the 2 boxes)^2
+        c = (diagonal length of the smallest enclosing box covering the 2 boxes)^2
+        """
+        # Identical boxes should have loss of 0
+        box = torch.tensor([-1, -1, 1, 1], dtype=torch.float32)
+        loss = diou_loss(box, box)
+        self.assertTrue(np.allclose(loss, [0.0]))
+
+        # Half size box inside other box
+        # iou = 0.5, d = 0.25, c = 8
+        box2 = torch.tensor([0, -1, 1, 1], dtype=torch.float32)
+        loss = diou_loss(box, box2)
+        self.assertTrue(np.allclose(loss, [0.53125]))
+
+        # Two diagonally adjacent boxes
+        # iou = 0, d = 2, c = 8
+        box3 = torch.tensor([0, 0, 1, 1], dtype=torch.float32)
+        box4 = torch.tensor([1, 1, 2, 2], dtype=torch.float32)
+        loss = diou_loss(box3, box4)
+        self.assertTrue(np.allclose(loss, [1.25]))
+
+        # Test batched loss and reductions
+        box1s = torch.stack([box, box3], dim=0)
+        box2s = torch.stack([box2, box4], dim=0)
+
+        loss = diou_loss(box1s, box2s, reduction="sum")
+        self.assertTrue(np.allclose(loss, [1.78125]))
+
+        loss = diou_loss(box1s, box2s, reduction="mean")
+        self.assertTrue(np.allclose(loss, [0.890625]))
+
+    def test_ciou_loss(self):
+        """
+        loss = 1 - iou + d/c + alpha*v
+        where,
+        d = (distance between centers of the 2 boxes)^2
+        c = (diagonal length of the smallest enclosing box covering the 2 boxes)^2
+        v = (4/pi^2) * (arctan(box1_w/box1_h) - arctan(box2_w/box2_h))^2
+        alpha = v/(1 - iou + v)
+        """
+        # Identical boxes should have loss of 0
+        box = torch.tensor([-1, -1, 1, 1], dtype=torch.float32)
+        loss = ciou_loss(box, box)
+        self.assertTrue(np.allclose(loss, [0.0]))
+
+        # Half size box inside other box
+        # iou = 0.5, d = 0.25, c = 8
+        # v = (4/pi^2) * (arctan(1) - arctan(0.5))^2 = 0.042
+        # alpha = 0.0775
+        box2 = torch.tensor([0, -1, 1, 1], dtype=torch.float32)
+        loss = ciou_loss(box, box2)
+        self.assertTrue(np.allclose(loss, [0.5345]))
+
+        # Two diagonally adjacent boxes
+        # iou = 0, d = 2, c = 8, v = 0, alpha = 0
+        box3 = torch.tensor([0, 0, 1, 1], dtype=torch.float32)
+        box4 = torch.tensor([1, 1, 2, 2], dtype=torch.float32)
+        loss = ciou_loss(box3, box4)
+        self.assertTrue(np.allclose(loss, [1.25]))
+
+        # Test batched loss and reductions
+        box1s = torch.stack([box, box3], dim=0)
+        box2s = torch.stack([box2, box4], dim=0)
+
+        loss = ciou_loss(box1s, box2s, reduction="sum")
+        self.assertTrue(np.allclose(loss, [1.7845]))
+
+        loss = ciou_loss(box1s, box2s, reduction="mean")
+        self.assertTrue(np.allclose(loss, [0.89225]))

Original file line number	Diff line number	Diff line change
`@@ -19,5 +19,6 @@`
`19`	`19`	`)`
`20`	`20`	`from .blocks import CNNBlockBase, DepthwiseSeparableConv2d`
`21`	`21`	`from .aspp import ASPP`
	`22`	`+from .losses import ciou_loss, diou_loss`
`22`	`23`
`23`	`24`	`__all__ = [k for k in globals().keys() if not k.startswith("_")]`