Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __init__(self, src_dir):
"plot_transforms_illustrations.py",
"plot_transforms_e2e.py",
"plot_cutmix_mixup.py",
"plot_rotated_box_transforms.py",
"plot_custom_transforms.py",
"plot_tv_tensors.py",
"plot_custom_tv_tensors.py",
Expand Down
22 changes: 14 additions & 8 deletions docs/source/transforms.rst
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
.. _transforms:

Transforming and augmenting images
==================================
Transforming images, videos, boxes and more
===========================================

.. currentmodule:: torchvision.transforms

Torchvision supports common computer vision transformations in the
``torchvision.transforms`` and ``torchvision.transforms.v2`` modules. Transforms
can be used to transform or augment data for training or inference of different
tasks (image classification, detection, segmentation, video classification).
``torchvision.transforms.v2`` module. Transforms can be used to transform and
augment data, for both training or inference. The following objects are
supported:

- Images as pure tensors, :class:`~torchvision.tv_tensors.Image` or PIL image
- Videos as :class:`~torchvision.tv_tensors.Video`
- Axis-aligned and rotated bounding boxes as :class:`~torchvision.tv_tensors.BoundingBoxes`
- Segmentation and detection masks as :class:`~torchvision.tv_tensors.Mask`
- KeyPoints as :class:`~torchvision.tv_tensors.KeyPoints`.

.. code:: python

Expand Down Expand Up @@ -111,9 +117,9 @@ In Torchvision 0.15 (March 2023), we released a new set of transforms available
in the ``torchvision.transforms.v2`` namespace. These transforms have a lot of
advantages compared to the v1 ones (in ``torchvision.transforms``):

- They can transform images **but also** bounding boxes, masks, or videos. This
provides support for tasks beyond image classification: detection, segmentation,
video classification, etc. See
- They can transform images **and also** bounding boxes, masks, videos and
keypoints. This provides support for tasks beyond image classification:
detection, segmentation, video classification, pose estimation, etc. See
:ref:`sphx_glr_auto_examples_transforms_plot_transforms_getting_started.py`
and :ref:`sphx_glr_auto_examples_transforms_plot_transforms_e2e.py`.
- They support more transforms like :class:`~torchvision.transforms.v2.CutMix`
Expand Down
Binary file added gallery/assets/leaning_tower.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 8 additions & 2 deletions gallery/transforms/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import torch
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
from torchvision import tv_tensors
from torchvision.transforms import v2
from torchvision.transforms.v2 import functional as F


def plot(imgs, row_title=None, **imshow_kwargs):
def plot(imgs, row_title=None, bbox_width=3, **imshow_kwargs):
if not isinstance(imgs[0], list):
# Make a 2d grid even if there's just 1 row
imgs = [imgs]
Expand All @@ -24,6 +25,11 @@ def plot(imgs, row_title=None, **imshow_kwargs):
masks = target.get("masks")
elif isinstance(target, tv_tensors.BoundingBoxes):
boxes = target

# Conversion necessary because draw_bounding_boxes() only
# work with this specific format.
if tv_tensors.is_rotated_bounding_format(boxes.format):
boxes = v2.ConvertBoundingBoxFormat("xyxyxyxy")(boxes)
else:
raise ValueError(f"Unexpected target type: {type(target)}")
img = F.to_image(img)
Expand All @@ -35,7 +41,7 @@ def plot(imgs, row_title=None, **imshow_kwargs):

img = F.to_dtype(img, torch.uint8, scale=True)
if boxes is not None:
img = draw_bounding_boxes(img, boxes, colors="yellow", width=3)
img = draw_bounding_boxes(img, boxes, colors="yellow", width=bbox_width)
if masks is not None:
img = draw_segmentation_masks(img, masks.to(torch.bool), colors=["green"] * masks.shape[0], alpha=.65)

Expand Down
195 changes: 195 additions & 0 deletions gallery/transforms/plot_rotated_box_transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""
===============================================================
Transforms on Rotated Bounding Boxes
===============================================================

This example illustrates how to define and use rotated bounding boxes.

.. note::
Support for rotated bounding boxes was released in TorchVision 0.23 and is
currently a BETA feature. We don't expect the API to change, but there may
be some rare edge-cases. If you find any issues, please report them on
our bug tracker: https://github.com/pytorch/vision/issues?q=is:open+is:issue

First, a bit of setup code:
"""

# %%
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt


import torch
from torchvision.tv_tensors import BoundingBoxes
from torchvision.transforms import v2
from helpers import plot

plt.rcParams["figure.figsize"] = [10, 5]
plt.rcParams["savefig.bbox"] = "tight"

# if you change the seed, make sure that the randomly-applied transforms
# properly show that the image can be both transformed and *not* transformed!
torch.manual_seed(0)

# If you're trying to run that on Colab, you can download the assets and the
# helpers from https://github.com/pytorch/vision/tree/main/gallery/
orig_img = Image.open(Path('../assets') / 'leaning_tower.jpg')

# %%
# Creating a Rotated Bounding Box
# -------------------------------
# Rotated bounding boxes are created by instantiating the
# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the ``format``
# parameter of the constructor that determines if a bounding box is rotated or
# not. In this instance, we use the CXCYWHR
# :attr:`~torchvision.tv_tensors.BoundingBoxFormat`. The first two values are
# the X and Y coordinates of the center of the bounding box. The next two
# values are the width and height of the bounding box, and the last value is the
# rotation of the bounding box, in degrees.


orig_box = BoundingBoxes(
[
[860.0, 1100, 570, 1840, -7],
],
format="CXCYWHR",
canvas_size=(orig_img.size[1], orig_img.size[0]),
)

plot([(orig_img, orig_box)], bbox_width=10)

# %%
# Transforms illustrations
# ------------------------
#
# Using :class:`~torchvision.transforms.RandomRotation`:
rotater = v2.RandomRotation(degrees=(0, 180), expand=True)
rotated_imgs = [rotater((orig_img, orig_box)) for _ in range(4)]
plot([(orig_img, orig_box)] + rotated_imgs, bbox_width=10)

# %%
# Using :class:`~torchvision.transforms.Pad`:
padded_imgs_and_boxes = [
v2.Pad(padding=padding)(orig_img, orig_box)
for padding in (30, 50, 100, 200)
]
plot([(orig_img, orig_box)] + padded_imgs_and_boxes, bbox_width=10)

# %%
# Using :class:`~torchvision.transforms.Resize`:
resized_imgs = [
v2.Resize(size=size)(orig_img, orig_box)
for size in (30, 50, 100, orig_img.size)
]
plot([(orig_img, orig_box)] + resized_imgs, bbox_width=5)

# %%
# Note that the bounding box looking bigger in the images with less pixels is
# an artifact, not reality. That is merely the rasterised representation of the
# bounding box's boundaries appearing bigger because we specify a fixed width of
# that rasterized line. When the image is, say, only 30 pixels wide, a
# line that is 3 pixels wide is relatively large.
#
# .. _clamping_mode_tuto:
#
# Clamping Mode, and its effect on transforms
# -------------------------------------------
#
# Some transforms, such as :class:`~torchvision.transforms.CenterCrop`, may
# result in having the transformed bounding box partially outside of the
# transformed (cropped) image. In general, this may happen on most of the
# :ref:`geometric transforms <v2_api_ref>`.
#
# In such cases, the bounding box is clamped to the transformed image size based
# on its ``clamping_mode`` attribute. There are three values for
# ``clamping_mode``, which determines how the box is clamped after a
# transformation:
#
# - ``None``: No clamping is applied, and the bounding box may be partially
# outside of the image.
# - `"hard"`: The box is clamped to the image size, such that all its corners
# are within the image canvas. This potentially results in a loss of
# information, and it can lead to unintuitive resuts. But may be necessary
# for some applications e.g. if the model doesn't support boxes outside of
# their image.
# - `"soft"`: . This is an intermediate mode between ``None`` and "hard": the
# box is clamped, but not as strictly as in "hard" mode. Some box dimensions
# may still be outside of the image. This is the default when constucting
# :class:`~torchvision.tv_tensors.BoundingBoxes`.
#
# .. note::
#
# For axis-aligned bounding boxes, the `"soft"` and `"hard"` modes behave
# the same, as the bounding box is always clamped to the image size.
#
# Let's illustrate the clamping modes with
# :class:`~torchvision.transforms.CenterCrop` transform:

assert orig_box.clamping_mode == "soft"

box_hard_clamping = BoundingBoxes(orig_box, format=orig_box.format, canvas_size=orig_box.canvas_size, clamping_mode="hard")

box_no_clamping = BoundingBoxes(orig_box, format=orig_box.format, canvas_size=orig_box.canvas_size, clamping_mode=None)

crop_sizes = (800, 1200, 2000, orig_img.size)
soft_center_crops_and_boxes = [
v2.CenterCrop(size=size)(orig_img, orig_box)
for size in crop_sizes
]

hard_center_crops_and_boxes = [
v2.CenterCrop(size=size)(orig_img, box_hard_clamping)
for size in crop_sizes
]

no_clamping_center_crops_and_boxes = [
v2.CenterCrop(size=size)(orig_img, box_no_clamping)
for size in crop_sizes
]

plot([[(orig_img, box_hard_clamping)] + hard_center_crops_and_boxes,
[(orig_img, orig_box)] + soft_center_crops_and_boxes,
[(orig_img, box_no_clamping)] + no_clamping_center_crops_and_boxes],
bbox_width=10)

# %%
# The plot above shows the "hard" clamping mode, "soft" and ``None``, in this
# order. While "soft" and ``None`` result in similar plots, they do not lead to
# the exact same clamped boxes. The non-clamped boxes will show dimensions that are further away from the image:
print("boxes with soft clamping:")
print(soft_center_crops_and_boxes)
print()
print("boxes with no clamping:")
print(no_clamping_center_crops_and_boxes)

# %%
#
# Setting the clamping mode
# --------------------------
#
# The ``clamping_mode`` attribute, which determines the clamping strategy that
# is applied to a box, can be set in different ways:
#
# - When constructing the bounding box with its
# :class:`~torchvision.tv_tensors.BoundingBoxes` constructor, as done in the example above.
# - By directly setting the attribute on an existing instance, e.g. ``boxes.clamping_mode = "hard"``.
# - By calling the :class:`~torchvision.transforms.v2.SetClampingMode` transform.
#
# Also, remember that you can always clamp the bounding box manually by
# calling the :meth:`~torchvision.transforms.v2.ClampBoundingBoxes` transform!
# Here's an example illustrating all of these option:

t = v2.Compose([
v2.CenterCrop(size=(800,)), # clamps according to the current clamping_mode
# attribute, in this case set by the constructor
v2.SetClampingMode(None), # sets the clamping_mode attribute for future transforms
v2.Pad(padding=3), # clamps according to the current clamping_mode
# i.e. ``None``
v2.ClampBoundingBoxes(clamping_mode="soft"), # clamps with "soft" mode.
])

out_img, out_box = t(orig_img, orig_box)
plot([(orig_img, orig_box), (out_img, out_box)], bbox_width=10)

# %%
14 changes: 8 additions & 6 deletions gallery/transforms/plot_transforms_getting_started.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,13 @@
# very easy: the v2 transforms are fully compatible with the v1 API, so you
# only need to change the import!
#
# Detection, Segmentation, Videos
# Videos, boxes, masks, keypoints
# -------------------------------
#
# The new Torchvision transforms in the ``torchvision.transforms.v2`` namespace
# support tasks beyond image classification: they can also transform bounding
# boxes, segmentation / detection masks, or videos.
# The Torchvision transforms in the ``torchvision.transforms.v2`` namespace
# support tasks beyond image classification: they can also transform rotated or
# axis-aligned bounding boxes, segmentation / detection masks, videos, and
# keypoints.
#
# Let's briefly look at a detection example with bounding boxes.

Expand Down Expand Up @@ -129,8 +130,9 @@
# TVTensors are :class:`torch.Tensor` subclasses. The available TVTensors are
# :class:`~torchvision.tv_tensors.Image`,
# :class:`~torchvision.tv_tensors.BoundingBoxes`,
# :class:`~torchvision.tv_tensors.Mask`, and
# :class:`~torchvision.tv_tensors.Video`.
# :class:`~torchvision.tv_tensors.Mask`,
# :class:`~torchvision.tv_tensors.Video`, and
# :class:`~torchvision.tv_tensors.KeyPoints`.
#
# TVTensors look and feel just like regular tensors - they **are** tensors.
# Everything that is supported on a plain :class:`torch.Tensor` like ``.sum()``
Expand Down
17 changes: 12 additions & 5 deletions torchvision/transforms/v2/_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@ def transform(self, inpt: tv_tensors.BoundingBoxes, params: dict[str, Any]) -> t
class ClampBoundingBoxes(Transform):
"""Clamp bounding boxes to their corresponding image dimensions.

The clamping is done according to the bounding boxes' ``canvas_size`` meta-data.

Args:
clamping_mode: TODOBB more docs. Default is None which relies on the input box' clamping_mode attribute.

clamping_mode: Default is "auto" which relies on the input box'
``clamping_mode`` attribute. Read more in :ref:`clamping_mode_tuto`
for more details on how to use this transform.
"""

def __init__(self, clamping_mode: Union[CLAMPING_MODE_TYPE, str] = "auto") -> None:
Expand All @@ -57,7 +56,15 @@ def transform(self, inpt: tv_tensors.KeyPoints, params: dict[str, Any]) -> tv_te


class SetClampingMode(Transform):
"""TODOBB"""
"""Sets the ``clamping_mode`` attribute of the bounding boxes for future transforms.



Args:
clamping_mode: The clamping mode to set. Possible values are: "soft",
"hard", or ``None``. Read more in :ref:`clamping_mode_tuto` for more
details on how to use this transform.
"""

def __init__(self, clamping_mode: CLAMPING_MODE_TYPE) -> None:
super().__init__()
Expand Down
33 changes: 21 additions & 12 deletions torchvision/tv_tensors/_bounding_boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@ class BoundingBoxFormat(Enum):

Available formats are:

* ``XYXY``
* ``XYWH``
* ``CXCYWH``
* ``XYWHR``: rotated boxes represented via corner, width and height, x1, y1
being top left, w, h being width and height. r is rotation angle in
* ``XYXY``: bounding box represented via corners; x1, y1 being top left;
x2, y2 being bottom right.
* ``XYWH``: bounding box represented via corner, width and height; x1, y1
being top left; w, h being width and height.
* ``CXCYWH``: bounding box represented via centre, width and height; cx,
cy being center of box; w, h being width and height.
* ``XYWHR``: rotated boxes represented via corner, width and height; x1, y1
being top left; w, h being width and height. r is rotation angle in
degrees.
* ``CXCYWHR``: rotated boxes represented via centre, width and height, cx,
cy being center of box, w, h being width and height. r is rotation angle
* ``CXCYWHR``: rotated boxes represented via center, width and height; cx,
cy being center of box; w, h being width and height. r is rotation angle
in degrees.
* ``XYXYXYXY``: rotated boxes represented via corners, x1, y1 being top
left, x2, y2 being top right, x3, y3 being bottom right, x4, y4 being
* ``XYXYXYXY``: rotated boxes represented via corners; x1, y1 being top
left; x2, y2 being top right; x3, y3 being bottom right; x4, y4 being
bottom left.
"""

Expand Down Expand Up @@ -56,12 +59,17 @@ def is_rotated_bounding_format(format: BoundingBoxFormat | str) -> bool:
# This should ideally be a Literal, but torchscript fails.
CLAMPING_MODE_TYPE = Optional[str]

# TODOBB All docs. Add any new API to rst files, add tutorial[s].


class BoundingBoxes(TVTensor):
""":class:`torch.Tensor` subclass for bounding boxes with shape ``[N, K]``.

.. note::
Support for rotated bounding boxes was released in TorchVision 0.23 and
is currently a BETA feature. We don't expect the API to change, but
there may be some rare edge-cases. If you find any issues, please report
them on our bug tracker:
https://github.com/pytorch/vision/issues?q=is:open+is:issue

Where ``N`` is the number of bounding boxes
and ``K`` is 4 for unrotated boxes, and 5 or 8 for rotated boxes.

Expand All @@ -75,7 +83,8 @@ class BoundingBoxes(TVTensor):
data: Any data that can be turned into a tensor with :func:`torch.as_tensor`.
format (BoundingBoxFormat, str): Format of the bounding box.
canvas_size (two-tuple of ints): Height and width of the corresponding image or video.
clamping_mode: TODOBB
clamping_mode: The clamping mode to use when applying transforms that may result in bounding boxes
partially outside of the image. Possible values are: "soft", "hard", or ``None``. Read more in :ref:`clamping_mode_tuto`.
dtype (torch.dtype, optional): Desired data type of the bounding box. If omitted, will be inferred from
``data``.
device (torch.device, optional): Desired device of the bounding box. If omitted and ``data`` is a
Expand Down
Loading
Loading