Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def __init__(self, src_dir):
"plot_transforms_illustrations.py",
"plot_transforms_e2e.py",
"plot_cutmix_mixup.py",
"plot_rotated_box_transforms.py",
"plot_custom_transforms.py",
"plot_tv_tensors.py",
"plot_custom_tv_tensors.py",
Expand Down
Binary file added gallery/assets/leaning_tower.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 8 additions & 2 deletions gallery/transforms/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import torch
from torchvision.utils import draw_bounding_boxes, draw_segmentation_masks
from torchvision import tv_tensors
from torchvision.transforms import v2
from torchvision.transforms.v2 import functional as F


def plot(imgs, row_title=None, **imshow_kwargs):
def plot(imgs, row_title=None, bbox_width=3, **imshow_kwargs):
if not isinstance(imgs[0], list):
# Make a 2d grid even if there's just 1 row
imgs = [imgs]
Expand All @@ -24,6 +25,11 @@ def plot(imgs, row_title=None, **imshow_kwargs):
masks = target.get("masks")
elif isinstance(target, tv_tensors.BoundingBoxes):
boxes = target

# Conversion necessary because draw_bounding_boxes() only
# work with this specific format.
if tv_tensors.is_rotated_bounding_format(boxes.format):
boxes = v2.ConvertBoundingBoxFormat("xyxyxyxy")(boxes)
else:
raise ValueError(f"Unexpected target type: {type(target)}")
img = F.to_image(img)
Expand All @@ -35,7 +41,7 @@ def plot(imgs, row_title=None, **imshow_kwargs):

img = F.to_dtype(img, torch.uint8, scale=True)
if boxes is not None:
img = draw_bounding_boxes(img, boxes, colors="yellow", width=3)
img = draw_bounding_boxes(img, boxes, colors="yellow", width=bbox_width)
if masks is not None:
img = draw_segmentation_masks(img, masks.to(torch.bool), colors=["green"] * masks.shape[0], alpha=.65)

Expand Down
156 changes: 156 additions & 0 deletions gallery/transforms/plot_rotated_box_transforms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""
===============================================================
Transforms on Rotated Bounding Boxes
===============================================================

This example illustrates how to define and use rotated bounding boxes. We'll
cover how to define them, demonstrate their usage with some of the existing
transforms, and finally some of their unique behavior in comparision to
standard bounding boxes.

First, a bit of setup code:
"""

# %%
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt


import torch
from torchvision import tv_tensors
from torchvision.transforms import v2
from helpers import plot

plt.rcParams["figure.figsize"] = [10, 5]
plt.rcParams["savefig.bbox"] = "tight"

# if you change the seed, make sure that the randomly-applied transforms
# properly show that the image can be both transformed and *not* transformed!
torch.manual_seed(0)

# If you're trying to run that on Colab, you can download the assets and the
# helpers from https://github.com/pytorch/vision/tree/main/gallery/
orig_img = Image.open(Path('../assets') / 'leaning_tower.jpg')

# %%
# Creating a Rotated Bounding Box
# -------------------------------
# Rotated bounding boxes are created by instantiating the
# :class:`~torchvision.tv_tensors.BoundingBoxes` class. It's the `format`
# parameter of the constructor that determines if a bounding box is rotated or
# not. In this instance, we use the
# :attr:`~torchvision.tv_tensors.BoundingBoxFormat` kind `CXCYWHR`. The first
# two values are the `x` and `y` coordinates of the center of the bounding box.
# The next two values are the `width` and `height` of the bounding box, and the
# last value is the `rotation` of the bounding box.


orig_box = tv_tensors.BoundingBoxes(
[
[860.0, 1100, 570, 1840, -7],
],
format="CXCYWHR",
canvas_size=(orig_img.size[1], orig_img.size[0]),
)

plot([(orig_img, orig_box)], bbox_width=10)

# %%
# Rotation
# --------
# Rotated bounding boxes maintain their rotation with respect to the image even
# when the image itself is rotated through the
# :class:`~torchvision.transforms.RandomRotation` transform.
rotater = v2.RandomRotation(degrees=(0, 180), expand=True)
rotated_imgs = [rotater((orig_img, orig_box)) for _ in range(4)]
plot([(orig_img, orig_box)] + rotated_imgs, bbox_width=10)

# %%
# Padding
# -------
# Rotated bounding boxes also maintain their properties when the image is padded using
# :class:`~torchvision.transforms.Pad`.
padded_imgs_and_boxes = [
v2.Pad(padding=padding)(orig_img, orig_box)
for padding in (30, 50, 100, 200)
]
plot([(orig_img, orig_box)] + padded_imgs_and_boxes, bbox_width=10)

# %%
# Resizing
# --------
# Rotated bounding boxes are also resized along with an image in the
# :class:`~torchvision.transforms.Resize` transform.
#
# Note that the bounding box looking bigger in the images with less pixels is
# an artifact, not reality. That is merely the rasterised representation of the
# bounding box's boundaries appearing bigger because we specify a fixed width of
# that rasterized line. When the image is, say, only 30 pixels wide, a
# line that is 3 pixels wide is relatively large.
resized_imgs = [
v2.Resize(size=size)(orig_img, orig_box)
for size in (30, 50, 100, orig_img.size)
]
plot([(orig_img, orig_box)] + resized_imgs, bbox_width=5)

# %%
# Perspective
# -----------
# The rotated bounding box is also transformed along with the image when the
# perspective is transformed with :class:`~torchvision.transforms.RandomPerspective`.
perspective_transformer = v2.RandomPerspective(distortion_scale=0.6, p=1.0)
perspective_imgs = [perspective_transformer(orig_img, orig_box) for _ in range(4)]
plot([(orig_img, orig_box)] + perspective_imgs, bbox_width=10)

# %%
# Elastic Transform
# -----------------
# The rotated bounding box is appropriately unchanged when going through the
# :class:`~torchvision.transforms.ElasticTransform`.
elastic_imgs = [
v2.ElasticTransform(alpha=alpha)(orig_img, orig_box)
for alpha in (100.0, 500.0, 1000.0, 2000.0)
]
plot([(orig_img, orig_box)] + elastic_imgs, bbox_width=10)

# %%
# Crop & Clamping Modes
# ---------------------
# The :class:`~torchvision.transforms.CenterCrop` transform selectively crops
# the image on a center location. The behavior of the rotated bounding box
# depends on its `clamping_mode`. We can set the `clamping_mode` in the
# :class:`~torchvision.tv_tensors.BoundingBoxes` constructur, or by directly
# setting it after construction as we do in the example below.
#
# There are two values for `clamping_mode`:
#
# - `"soft"`: The default when constucting
# :class:`~torchvision.tv_tensors.BoundingBoxes`. <Insert semantic
# description for soft mode.>
# - `"hard"`: <Insert semantic description for hard mode.>
#
# For standard bounding boxes, both modes behave the same. We also need to
# document:
#
# - `clamping_mode` for individual kernels.
# - `clamping_mode` in :class:`~torchvision.transforms.v2.ClampBoundingBoxes`.
# - the new :class:`~torchvision.transforms.v2.SetClampingMode` transform.
#
assert orig_box.clamping_mode == "soft"
hard_box = orig_box.clone()
hard_box.clamping_mode = "hard"

soft_center_crops_and_boxes = [
v2.CenterCrop(size=size)(orig_img, orig_box)
for size in (800, 1200, 2000, orig_img.size)
]

hard_center_crops_and_boxes = [
v2.CenterCrop(size=size)(orig_img, hard_box)
for size in (800, 1200, 2000, orig_img.size)
]

plot([[(orig_img, orig_box)] + soft_center_crops_and_boxes,
[(orig_img, hard_box)] + hard_center_crops_and_boxes],
bbox_width=10)
Loading