pytorch · NicolasHug · Jul 2, 2025 · Jun 30, 2025 · Jun 30, 2025 · Jul 1, 2025
diff --git a/setup.py b/setup.py
@@ -111,7 +111,8 @@ def get_dist(pkgname):
     ]
 
     # Excluding 8.3.* because of https://github.com/pytorch/vision/issues/4934
-    pillow_ver = " >= 5.3.0, !=8.3.*"
+    # TODO remove <11.3 bound and address corresponding deprecation warnings
+    pillow_ver = " >= 5.3.0, !=8.3.*, <11.3"
     pillow_req = "pillow-simd" if get_dist("pillow-simd") is not None else "pillow"
     requirements.append(pillow_req + pillow_ver)
 

diff --git a/test/assets/fakedata/draw_rotated_boxes_fill.png b/test/assets/fakedata/draw_rotated_boxes_fill.png
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -21,7 +21,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import io, tv_tensors
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
+from torchvision.transforms.v2.functional import to_image, to_pil_image
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -410,6 +410,7 @@ def make_bounding_boxes(
     canvas_size=DEFAULT_SIZE,
     *,
     format=tv_tensors.BoundingBoxFormat.XYXY,
+    clamping_mode="soft",
     num_boxes=1,
     dtype=None,
     device="cpu",
@@ -423,13 +424,6 @@ def sample_position(values, max_value):
         format = tv_tensors.BoundingBoxFormat[format]
 
     dtype = dtype or torch.float32
-    int_dtype = dtype in (
-        torch.uint8,
-        torch.int8,
-        torch.int16,
-        torch.int32,
-        torch.int64,
-    )
 
     h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
     y = sample_position(h, canvas_size[0])
@@ -456,31 +450,19 @@ def sample_position(values, max_value):
     elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
         r_rad = r * torch.pi / 180.0
         cos, sin = torch.cos(r_rad), torch.sin(r_rad)
-        x1 = torch.round(x) if int_dtype else x
-        y1 = torch.round(y) if int_dtype else y
-        x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
-        y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
-        x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
-        y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
-        x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
-        y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
+        x1 = x
+        y1 = y
+        x2 = x1 + w * cos
+        y2 = y1 - w * sin
+        x3 = x2 + h * sin
+        y3 = y2 + h * cos
+        x4 = x1 + h * sin
+        y4 = y1 + h * cos
         parts = (x1, y1, x2, y2, x3, y3, x4, y4)
     else:
         raise ValueError(f"Format {format} is not supported")
     out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
-    if tv_tensors.is_rotated_bounding_format(format):
-        # The rotated bounding boxes are not guaranteed to be within the canvas by design,
-        # so we apply clamping. We also add a 2 buffer to the canvas size to avoid
-        # numerical issues during the testing
-        buffer = 4
-        out_boxes = clamp_bounding_boxes(
-            out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
-        )
-        if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
-            out_boxes[:, :2] += buffer // 2
-        elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
-            out_boxes[:, :] += buffer // 2
-    return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
+    return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode)
 
 
 def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"):

diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
diff --git a/test/test_tv_tensors.py b/test/test_tv_tensors.py
@@ -69,15 +69,39 @@ def test_bbox_instance(data, format):
 )
 @pytest.mark.parametrize("scripted", (False, True))
 def test_bbox_format(format, is_rotated_expected, scripted):
-    if isinstance(format, str):
-        format = tv_tensors.BoundingBoxFormat[(format.upper())]
-
     fn = tv_tensors.is_rotated_bounding_format
     if scripted:
         fn = torch.jit.script(fn)
     assert fn(format) == is_rotated_expected
 
 
+@pytest.mark.parametrize(
+    "format, support_integer_dtype",
+    [
+        ("XYXY", True),
+        ("XYWH", True),
+        ("CXCYWH", True),
+        ("XYXYXYXY", False),
+        ("XYWHR", False),
+        ("CXCYWHR", False),
+        (tv_tensors.BoundingBoxFormat.XYXY, True),
+        (tv_tensors.BoundingBoxFormat.XYWH, True),
+        (tv_tensors.BoundingBoxFormat.CXCYWH, True),
+        (tv_tensors.BoundingBoxFormat.XYXYXYXY, False),
+        (tv_tensors.BoundingBoxFormat.XYWHR, False),
+        (tv_tensors.BoundingBoxFormat.CXCYWHR, False),
+    ],
+)
+@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
+def test_bbox_format_dtype(format, support_integer_dtype, input_dtype):
+    tensor = torch.randint(0, 32, size=(5, 2), dtype=input_dtype)
+    if not input_dtype.is_floating_point and not support_integer_dtype:
+        with pytest.raises(ValueError, match="Rotated bounding boxes should be floating point tensors"):
+            tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))
+    else:
+        tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))
+
+
 def test_bbox_dim_error():
     data_3d = [[[1, 2, 3, 4]]]
     with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"):
@@ -406,3 +430,16 @@ def test_return_type_input():
         tv_tensors.set_return_type("typo")
 
     tv_tensors.set_return_type("tensor")
+
+
+def test_box_clamping_mode_default_and_error():
+    assert (
+        tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
+    )
+    assert (
+        tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0, 0.0], format="XYWHR", canvas_size=(100, 100)).clamping_mode
+        == "soft"
+    )
+
+    with pytest.raises(ValueError, match="clamping_mode must be"):
+        tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100), clamping_mode="bad")
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -177,6 +177,17 @@ def test_draw_rotated_boxes():
     assert_equal(result, expected)
 
 
+@pytest.mark.skipif(PILLOW_VERSION < (10, 1), reason="The reference image is only valid for PIL >= 10.1")
+def test_draw_rotated_boxes_fill():
+    img = torch.full((3, 500, 500), 255, dtype=torch.uint8)
+    colors = ["blue", "yellow", (0, 255, 0), "black"]
+
+    result = utils.draw_bounding_boxes(img, rotated_boxes, colors=colors, fill=True)
+    path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_rotated_boxes_fill.png")
+    expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1)
+    assert_equal(result, expected)
+
+
 @pytest.mark.parametrize("fill", [True, False])
 def test_draw_boxes_dtypes(fill):
     img_uint8 = torch.full((3, 100, 100), 255, dtype=torch.uint8)

diff --git a/torchvision/datasets/fakedata.py b/torchvision/datasets/fakedata.py
@@ -11,7 +11,7 @@ class FakeData(VisionDataset):
 
     Args:
         size (int, optional): Size of the dataset. Default: 1000 images
-        image_size(tuple, optional): Size if the returned images. Default: (3, 224, 224)
+        image_size(tuple, optional): Size of the returned images. Default: (3, 224, 224)
         num_classes(int, optional): Number of classes in the dataset. Default: 10
         transform (callable, optional): A function/transform that takes in a PIL image
             and returns a transformed version. E.g, ``transforms.RandomCrop``

diff --git a/torchvision/transforms/v2/__init__.py b/torchvision/transforms/v2/__init__.py
@@ -41,7 +41,7 @@
     ScaleJitter,
     TenCrop,
 )
-from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat
+from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat, SetClampingMode
 from ._misc import (
     ConvertImageDtype,
     GaussianBlur,

diff --git a/torchvision/transforms/v2/_meta.py b/torchvision/transforms/v2/_meta.py
@@ -2,6 +2,7 @@
 
 from torchvision import tv_tensors
 from torchvision.transforms.v2 import functional as F, Transform
+from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE
 
 
 class ConvertBoundingBoxFormat(Transform):
@@ -28,12 +29,19 @@ class ClampBoundingBoxes(Transform):
 
     The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.
 
+    Args:
+        clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute.
+
     """
 
+    def __init__(self, clamping_mode: Union[CLAMPING_MODE_TYPE, str] = "auto") -> None:
+        super().__init__()
+        self.clamping_mode = clamping_mode
+
     _transformed_types = (tv_tensors.BoundingBoxes,)
 
     def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
-        return F.clamp_bounding_boxes(inpt)  # type: ignore[return-value]
+        return F.clamp_bounding_boxes(inpt, clamping_mode=self.clamping_mode)  # type: ignore[return-value]
 
 
 class ClampKeyPoints(Transform):
@@ -46,3 +54,21 @@ class ClampKeyPoints(Transform):
 
     def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_tensors.KeyPoints:
         return F.clamp_keypoints(inpt)  # type: ignore[return-value]
+
+
+class SetClampingMode(Transform):
+    """TODOBB"""
+
+    def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None:
+        super().__init__()
+        self.clamping_mode = clamping_mode
+
+        if self.clamping_mode not in (None, "soft", "hard"):
+            raise ValueError(f"clamping_mode must be soft, hard or None, got {clamping_mode}")
+
+    _transformed_types = (tv_tensors.BoundingBoxes,)
+
+    def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
+        out: tv_tensors.BoundingBoxes = inpt.clone()  # type: ignore[assignment]
+        out.clamping_mode = self.clamping_mode
+        return out