Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ def get_dist(pkgname):
]

# Excluding 8.3.* because of https://github.com/pytorch/vision/issues/4934
pillow_ver = " >= 5.3.0, !=8.3.*"
# TODO remove <11.3 bound and address corresponding deprecation warnings
pillow_ver = " >= 5.3.0, !=8.3.*, <11.3"
pillow_req = "pillow-simd" if get_dist("pillow-simd") is not None else "pillow"
requirements.append(pillow_req + pillow_ver)

Expand Down
Binary file added test/assets/fakedata/draw_rotated_boxes_fill.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
40 changes: 11 additions & 29 deletions test/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
from torchvision import io, tv_tensors
from torchvision.transforms._functional_tensor import _max_value as get_max_value
from torchvision.transforms.v2.functional import clamp_bounding_boxes, to_image, to_pil_image
from torchvision.transforms.v2.functional import to_image, to_pil_image


IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
Expand Down Expand Up @@ -410,6 +410,7 @@ def make_bounding_boxes(
canvas_size=DEFAULT_SIZE,
*,
format=tv_tensors.BoundingBoxFormat.XYXY,
clamping_mode="soft",
num_boxes=1,
dtype=None,
device="cpu",
Expand All @@ -423,13 +424,6 @@ def sample_position(values, max_value):
format = tv_tensors.BoundingBoxFormat[format]

dtype = dtype or torch.float32
int_dtype = dtype in (
torch.uint8,
torch.int8,
torch.int16,
torch.int32,
torch.int64,
)

h, w = (torch.randint(1, s, (num_boxes,)) for s in canvas_size)
y = sample_position(h, canvas_size[0])
Expand All @@ -456,31 +450,19 @@ def sample_position(values, max_value):
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
r_rad = r * torch.pi / 180.0
cos, sin = torch.cos(r_rad), torch.sin(r_rad)
x1 = torch.round(x) if int_dtype else x
y1 = torch.round(y) if int_dtype else y
x2 = torch.round(x1 + w * cos) if int_dtype else x1 + w * cos
y2 = torch.round(y1 - w * sin) if int_dtype else y1 - w * sin
x3 = torch.round(x2 + h * sin) if int_dtype else x2 + h * sin
y3 = torch.round(y2 + h * cos) if int_dtype else y2 + h * cos
x4 = torch.round(x1 + h * sin) if int_dtype else x1 + h * sin
y4 = torch.round(y1 + h * cos) if int_dtype else y1 + h * cos
x1 = x
y1 = y
x2 = x1 + w * cos
y2 = y1 - w * sin
x3 = x2 + h * sin
y3 = y2 + h * cos
x4 = x1 + h * sin
y4 = y1 + h * cos
parts = (x1, y1, x2, y2, x3, y3, x4, y4)
else:
raise ValueError(f"Format {format} is not supported")
out_boxes = torch.stack(parts, dim=-1).to(dtype=dtype, device=device)
if tv_tensors.is_rotated_bounding_format(format):
# The rotated bounding boxes are not guaranteed to be within the canvas by design,
# so we apply clamping. We also add a 2 buffer to the canvas size to avoid
# numerical issues during the testing
buffer = 4
out_boxes = clamp_bounding_boxes(
out_boxes, format=format, canvas_size=(canvas_size[0] - buffer, canvas_size[1] - buffer)
)
if format is tv_tensors.BoundingBoxFormat.XYWHR or format is tv_tensors.BoundingBoxFormat.CXCYWHR:
out_boxes[:, :2] += buffer // 2
elif format is tv_tensors.BoundingBoxFormat.XYXYXYXY:
out_boxes[:, :] += buffer // 2
return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size)
return tv_tensors.BoundingBoxes(out_boxes, format=format, canvas_size=canvas_size, clamping_mode=clamping_mode)


def make_detection_masks(size=DEFAULT_SIZE, *, num_masks=1, dtype=None, device="cpu"):
Expand Down
228 changes: 181 additions & 47 deletions test/test_transforms_v2.py

Large diffs are not rendered by default.

43 changes: 40 additions & 3 deletions test/test_tv_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,39 @@ def test_bbox_instance(data, format):
)
@pytest.mark.parametrize("scripted", (False, True))
def test_bbox_format(format, is_rotated_expected, scripted):
if isinstance(format, str):
format = tv_tensors.BoundingBoxFormat[(format.upper())]

fn = tv_tensors.is_rotated_bounding_format
if scripted:
fn = torch.jit.script(fn)
assert fn(format) == is_rotated_expected


@pytest.mark.parametrize(
"format, support_integer_dtype",
[
("XYXY", True),
("XYWH", True),
("CXCYWH", True),
("XYXYXYXY", False),
("XYWHR", False),
("CXCYWHR", False),
(tv_tensors.BoundingBoxFormat.XYXY, True),
(tv_tensors.BoundingBoxFormat.XYWH, True),
(tv_tensors.BoundingBoxFormat.CXCYWH, True),
(tv_tensors.BoundingBoxFormat.XYXYXYXY, False),
(tv_tensors.BoundingBoxFormat.XYWHR, False),
(tv_tensors.BoundingBoxFormat.CXCYWHR, False),
],
)
@pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8])
def test_bbox_format_dtype(format, support_integer_dtype, input_dtype):
tensor = torch.randint(0, 32, size=(5, 2), dtype=input_dtype)
if not input_dtype.is_floating_point and not support_integer_dtype:
with pytest.raises(ValueError, match="Rotated bounding boxes should be floating point tensors"):
tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))
else:
tv_tensors.BoundingBoxes(tensor, format=format, canvas_size=(32, 32))


def test_bbox_dim_error():
data_3d = [[[1, 2, 3, 4]]]
with pytest.raises(ValueError, match="Expected a 1D or 2D tensor, got 3D"):
Expand Down Expand Up @@ -406,3 +430,16 @@ def test_return_type_input():
tv_tensors.set_return_type("typo")

tv_tensors.set_return_type("tensor")


def test_box_clamping_mode_default_and_error():
assert (
tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0], format="XYXY", canvas_size=(100, 100)).clamping_mode == "soft"
)
assert (
tv_tensors.BoundingBoxes([0.0, 0.0, 10.0, 10.0, 0.0], format="XYWHR", canvas_size=(100, 100)).clamping_mode
== "soft"
)

with pytest.raises(ValueError, match="clamping_mode must be"):
tv_tensors.BoundingBoxes([0, 0, 10, 10], format="XYXY", canvas_size=(100, 100), clamping_mode="bad")
11 changes: 11 additions & 0 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,17 @@ def test_draw_rotated_boxes():
assert_equal(result, expected)


@pytest.mark.skipif(PILLOW_VERSION < (10, 1), reason="The reference image is only valid for PIL >= 10.1")
def test_draw_rotated_boxes_fill():
img = torch.full((3, 500, 500), 255, dtype=torch.uint8)
colors = ["blue", "yellow", (0, 255, 0), "black"]

result = utils.draw_bounding_boxes(img, rotated_boxes, colors=colors, fill=True)
path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "fakedata", "draw_rotated_boxes_fill.png")
expected = torch.as_tensor(np.array(Image.open(path))).permute(2, 0, 1)
assert_equal(result, expected)


@pytest.mark.parametrize("fill", [True, False])
def test_draw_boxes_dtypes(fill):
img_uint8 = torch.full((3, 100, 100), 255, dtype=torch.uint8)
Expand Down
2 changes: 1 addition & 1 deletion torchvision/datasets/fakedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class FakeData(VisionDataset):

Args:
size (int, optional): Size of the dataset. Default: 1000 images
image_size(tuple, optional): Size if the returned images. Default: (3, 224, 224)
image_size(tuple, optional): Size of the returned images. Default: (3, 224, 224)
num_classes(int, optional): Number of classes in the dataset. Default: 10
transform (callable, optional): A function/transform that takes in a PIL image
and returns a transformed version. E.g, ``transforms.RandomCrop``
Expand Down
2 changes: 1 addition & 1 deletion torchvision/transforms/v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
ScaleJitter,
TenCrop,
)
from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat
from ._meta import ClampBoundingBoxes, ClampKeyPoints, ConvertBoundingBoxFormat, SetClampingMode
from ._misc import (
ConvertImageDtype,
GaussianBlur,
Expand Down
28 changes: 27 additions & 1 deletion torchvision/transforms/v2/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from torchvision import tv_tensors
from torchvision.transforms.v2 import functional as F, Transform
from torchvision.tv_tensors._bounding_boxes import CLAMPING_MODE_TYPE


class ConvertBoundingBoxFormat(Transform):
Expand All @@ -28,12 +29,19 @@ class ClampBoundingBoxes(Transform):

The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.

Args:
clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute.

"""

def __init__(self, clamping_mode: Union[CLAMPING_MODE_TYPE, str] = "auto") -> None:
super().__init__()
self.clamping_mode = clamping_mode

_transformed_types = (tv_tensors.BoundingBoxes,)

def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
return F.clamp_bounding_boxes(inpt) # type: ignore[return-value]
return F.clamp_bounding_boxes(inpt, clamping_mode=self.clamping_mode) # type: ignore[return-value]


class ClampKeyPoints(Transform):
Expand All @@ -46,3 +54,21 @@ class ClampKeyPoints(Transform):

def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_tensors.KeyPoints:
return F.clamp_keypoints(inpt) # type: ignore[return-value]


class SetClampingMode(Transform):
"""TODOBB"""

def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None:
super().__init__()
self.clamping_mode = clamping_mode

if self.clamping_mode not in (None, "soft", "hard"):
raise ValueError(f"clamping_mode must be soft, hard or None, got {clamping_mode}")

_transformed_types = (tv_tensors.BoundingBoxes,)

def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> tv_tensors.BoundingBoxes:
out: tv_tensors.BoundingBoxes = inpt.clone() # type: ignore[assignment]
out.clamping_mode = self.clamping_mode
return out
Loading
Loading