From 71da0d586898a04e8d9ff75cb5b7210237e736dc Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 13:09:16 +0200 Subject: [PATCH 01/20] Change padding for cityscapes --- yoeo/utils/transforms.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/yoeo/utils/transforms.py b/yoeo/utils/transforms.py index 800c457..391db9c 100644 --- a/yoeo/utils/transforms.py +++ b/yoeo/utils/transforms.py @@ -94,6 +94,13 @@ def __init__(self, ): ]) +class ResizeToSquare(ImgAug): + def __init__(self, ): + self.augmentations = iaa.Sequential([ + iaa.Resize(416).to_deterministic() # TODO dynamic resolution + ]) + + class ToTensor(object): def __init__(self, ): pass @@ -102,7 +109,7 @@ def __call__(self, data): img, boxes, seg = data # Extract image as PyTorch tensor img = transforms.ToTensor()(img) - seg = transforms.ToTensor()(seg) * 255 # Because troch maps this to 0-1 instead of 0-255 + seg = transforms.ToTensor()(seg) * 255 # Because torch maps this to 0-1 instead of 0-255 bb_targets = torch.zeros((len(boxes), 6)) bb_targets[:, 1:] = transforms.ToTensor()(boxes) @@ -123,7 +130,7 @@ def __call__(self, data): DEFAULT_TRANSFORMS = transforms.Compose([ AbsoluteLabels(), - PadSquare(), + ResizeToSquare(), RelativeLabels(), ToTensor(), ]) From 63561daecc81b658ce774e8f147df3e2a11e9007 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 13:31:21 +0200 Subject: [PATCH 02/20] Test image discovery --- yoeo/utils/datasets.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 4e3634d..81c8beb 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -6,6 +6,7 @@ import os import warnings import numpy as np +from pathlib import Path from PIL import Image from PIL import ImageFile @@ -57,9 +58,12 @@ def __len__(self): class ListDataset(Dataset): - def __init__(self, list_path, img_size=416, multiscale=True, transform=None): - with open(list_path, "r") as file: - self.img_files = file.readlines() + def __init__(self, data_path, img_size=416, multiscale=True, transform=None): + + # Get all color images for e.g. the test set + result = list(Path(data_path).rglob("*.png")) + + print(result) self.label_files = [] for path in self.img_files: From c06e93a7a2154d42079dc0d31f678ffa5dbcde4c Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 13:43:18 +0200 Subject: [PATCH 03/20] Add first mask loader --- yoeo/utils/datasets.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 81c8beb..9ae8f18 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -61,30 +61,31 @@ class ListDataset(Dataset): def __init__(self, data_path, img_size=416, multiscale=True, transform=None): # Get all color images for e.g. the test set - result = list(Path(data_path).rglob("*.png")) - - print(result) + self.img_files = list(Path(data_path).rglob("*.png")) + """ self.label_files = [] for path in self.img_files: image_dir = os.path.dirname(path) - label_dir = "labels".join(image_dir.rsplit("images", 1)) - assert label_dir != image_dir, \ - f"Image path must contain a folder named 'images'! \n'{image_dir}'" + label_dir = image_dir.replace("leftImg8bit", "gtFine") + # TODO bbox stuff label_file = os.path.join(label_dir, os.path.basename(path)) label_file = os.path.splitext(label_file)[0] + '.txt' self.label_files.append(label_file) + """ - self.mask_files = [] + self.label_files = [] for path in self.img_files: image_dir = os.path.dirname(path) - mask_dir = "segmentations".join(image_dir.rsplit("images", 1)) - assert mask_dir != image_dir, \ - f"Image path must contain a folder named 'images'! \n'{image_dir}'" + mask_dir = image_dir.replace("leftImg8bit", "gtFine") mask_file = os.path.join(mask_dir, os.path.basename(path)) - mask_file = os.path.splitext(mask_file)[0] + '.png' + mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png' self.mask_files.append(mask_file) + print(self.mask_files) + + exit(0) + self.img_size = img_size self.max_objects = 100 self.multiscale = multiscale From b293ef9613c8bd350cdedd361dd4a9f85a30fb35 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 13:44:06 +0200 Subject: [PATCH 04/20] Fix copy paste error --- yoeo/utils/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 9ae8f18..36fd373 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -74,7 +74,7 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None): self.label_files.append(label_file) """ - self.label_files = [] + self.mask_files = [] for path in self.img_files: image_dir = os.path.dirname(path) mask_dir = image_dir.replace("leftImg8bit", "gtFine") From ab59e5b9b5dd8b1b1a13fca433c23d90c522cc22 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 13:46:15 +0200 Subject: [PATCH 05/20] Fix stuff --- yoeo/utils/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 36fd373..6a7dc33 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -76,8 +76,8 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None): self.mask_files = [] for path in self.img_files: - image_dir = os.path.dirname(path) - mask_dir = image_dir.replace("leftImg8bit", "gtFine") + path = path.replace("leftImg8bit", "gtFine") + mask_dir = os.path.dirname(path) mask_file = os.path.join(mask_dir, os.path.basename(path)) mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png' self.mask_files.append(mask_file) From 2333640e4ba9fd9e72b8e61da3506829bb47d3a8 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 13:47:06 +0200 Subject: [PATCH 06/20] Fix string conversion --- yoeo/utils/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 6a7dc33..d8be999 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -76,7 +76,7 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None): self.mask_files = [] for path in self.img_files: - path = path.replace("leftImg8bit", "gtFine") + path = str(path).replace("leftImg8bit", "gtFine") mask_dir = os.path.dirname(path) mask_file = os.path.join(mask_dir, os.path.basename(path)) mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png' From f941f3187709f018a02e41696748447537ac7cf4 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 19:15:18 +0200 Subject: [PATCH 07/20] Demo segmentation load --- yoeo/utils/datasets.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index d8be999..8cfd242 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -82,10 +82,6 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None): mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png' self.mask_files.append(mask_file) - print(self.mask_files) - - exit(0) - self.img_size = img_size self.max_objects = 100 self.multiscale = multiscale @@ -107,6 +103,8 @@ def __getitem__(self, index): print(f"Could not read image '{img_path}'.") return + """ + # --------- # Label # --------- @@ -120,6 +118,7 @@ def __getitem__(self, index): except Exception: print(f"Could not read label '{label_path}'.") return + """ # --------- # Segmentation Mask @@ -127,11 +126,14 @@ def __getitem__(self, index): try: mask_path = self.mask_files[index % len(self.img_files)].rstrip() # Load segmentation mask as numpy array - mask = np.array(Image.open(mask_path).convert('RGB')) // 127 + mask = np.array(Image.open(mask_path).convert('RGB')) + print(np.unique(mask, return_counts=True)) except FileNotFoundError as e: - print(f"Could not load mask '{mask_path}'.") + print(f"Could not load mask '{mask_path}' {e}.") return + exit(0) + # ----------- # Transform # ----------- From 0a6fb562dd8b86c1db209ffcc163c0bb35989a78 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 19:22:07 +0200 Subject: [PATCH 08/20] Add string conversion --- yoeo/utils/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 8cfd242..b53d6ba 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -61,7 +61,7 @@ class ListDataset(Dataset): def __init__(self, data_path, img_size=416, multiscale=True, transform=None): # Get all color images for e.g. the test set - self.img_files = list(Path(data_path).rglob("*.png")) + self.img_files = [str(path) in Path(data_path).rglob("*.png")] """ self.label_files = [] From 7f90aa700d7cdab30de2f1fdea762c591d53df05 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 19:22:18 +0200 Subject: [PATCH 09/20] Add string conversion --- yoeo/utils/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index b53d6ba..848469e 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -61,7 +61,7 @@ class ListDataset(Dataset): def __init__(self, data_path, img_size=416, multiscale=True, transform=None): # Get all color images for e.g. the test set - self.img_files = [str(path) in Path(data_path).rglob("*.png")] + self.img_files = [str(path) for path in Path(data_path).rglob("*.png")] """ self.label_files = [] From 7b534a74a37765e66b4984dc9cd3d40fa132ca21 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sat, 24 Jul 2021 19:25:10 +0200 Subject: [PATCH 10/20] Add grouping --- yoeo/utils/datasets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 848469e..b21a6f1 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -127,9 +127,12 @@ def __getitem__(self, index): mask_path = self.mask_files[index % len(self.img_files)].rstrip() # Load segmentation mask as numpy array mask = np.array(Image.open(mask_path).convert('RGB')) + # Group classes together + mask[1 < mask <= 5] = 1 + mask[5 < mask <= 9] = 2 print(np.unique(mask, return_counts=True)) except FileNotFoundError as e: - print(f"Could not load mask '{mask_path}' {e}.") + print(f"Could not load mask '{mask_path}'.") return exit(0) From 77dca3c76a3daac91242d2abc765da0dfe788d17 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sun, 25 Jul 2021 12:40:39 +0200 Subject: [PATCH 11/20] Better indexing --- yoeo/utils/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index b21a6f1..61cc0c4 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -128,8 +128,8 @@ def __getitem__(self, index): # Load segmentation mask as numpy array mask = np.array(Image.open(mask_path).convert('RGB')) # Group classes together - mask[1 < mask <= 5] = 1 - mask[5 < mask <= 9] = 2 + mask[np.logical_and(mask <= 5, mask > 0)] = 1 + mask[np.logical_and(mask <= 10, mask > 5)] = 2 print(np.unique(mask, return_counts=True)) except FileNotFoundError as e: print(f"Could not load mask '{mask_path}'.") From 3c5587e1701c3018c9b22a7d87087e16fea92b8a Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Sun, 25 Jul 2021 12:41:16 +0200 Subject: [PATCH 12/20] Remove exit stop --- yoeo/utils/datasets.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index 61cc0c4..bdc6661 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -135,8 +135,6 @@ def __getitem__(self, index): print(f"Could not load mask '{mask_path}'.") return - exit(0) - # ----------- # Transform # ----------- From df30d74be5d6140528f62660fb9007b0416e92be Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Sun, 25 Jul 2021 18:07:41 +0200 Subject: [PATCH 13/20] Adapt model for cityscapes classes --- config/yoeo-rev-7.cfg | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/config/yoeo-rev-7.cfg b/config/yoeo-rev-7.cfg index 78e32d8..756a265 100644 --- a/config/yoeo-rev-7.cfg +++ b/config/yoeo-rev-7.cfg @@ -213,15 +213,13 @@ activation=leaky size=1 stride=1 pad=1 -filters=24 +filters=39 activation=linear - - [yolo] mask = 3,4,5 anchors = 17, 32, 13,171, 37, 67, 30,224, 69,112, 116,212 -classes=3 +classes=8 num=6 jitter=.3 ignore_thresh = .7 @@ -257,13 +255,13 @@ activation=leaky size=1 stride=1 pad=1 -filters=24 +filters=39 activation=linear [yolo] mask = 0,1,2 anchors = 17, 32, 13,171, 37, 67, 30,224, 69,112, 116,212 -classes=3 +classes=8 num=6 jitter=.3 ignore_thresh = .7 @@ -328,11 +326,11 @@ activation=leaky [convolutional] batch_normalize=1 -filters=3 +filters=6 size=1 stride=1 pad=1 activation=leaky [seg] -classes=2 +classes=5 From e54513dc92641254f41686f2022f7f53f202b279 Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Sun, 25 Jul 2021 18:08:41 +0200 Subject: [PATCH 14/20] make detect output more consistent --- yoeo/detect.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/yoeo/detect.py b/yoeo/detect.py index 98ff357..fec1ad6 100755 --- a/yoeo/detect.py +++ b/yoeo/detect.py @@ -205,19 +205,14 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa ax.imshow( - SegmentationMapsOnImage( - seg[ - int(pad_y) : int(img_size - pad_y), - int(pad_x) : int(img_size - pad_x), - ], shape=img.shape).draw_on_image(img)[0]) + SegmentationMapsOnImage(seg, shape=img.shape).draw_on_image(img, alpha=0.5)[0]) # Rescale boxes to original image detections = rescale_boxes(detections, img_size, img.shape[:2]) unique_labels = detections[:, -1].cpu().unique() n_cls_preds = len(unique_labels) # Bounding-box colors cmap = plt.get_cmap("tab20b") - colors = [cmap(i) for i in np.linspace(0, 1, n_cls_preds)] - bbox_colors = random.sample(colors, n_cls_preds) + colors = [cmap(i) for i in np.linspace(0, 1, len(classes))] for x1, y1, x2, y2, conf, cls_pred in detections: print(f"\t+ Label: {classes[int(cls_pred)]} | Confidence: {conf.item():0.4f}") @@ -225,9 +220,8 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa box_w = x2 - x1 box_h = y2 - y1 - color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])] # Create a Rectangle patch - bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none") + bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=1, edgecolor=colors[int(cls_pred)], facecolor="none") # Add the bbox to the plot ax.add_patch(bbox) # Add label @@ -237,7 +231,7 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa s=classes[int(cls_pred)], color="white", verticalalignment="top", - bbox={"color": color, "pad": 0}) + bbox={"color": colors[int(cls_pred)], "pad": 0}) # Save generated image with detections plt.axis("off") From c945da7f1be5dbb5ba237f8d9eb8f18f3b5529d2 Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Sun, 25 Jul 2021 18:09:32 +0200 Subject: [PATCH 15/20] Load and cluster cityscapes dataset --- yoeo/utils/datasets.py | 59 ++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py index bdc6661..5241001 100644 --- a/yoeo/utils/datasets.py +++ b/yoeo/utils/datasets.py @@ -6,6 +6,9 @@ import os import warnings import numpy as np +import json +from tqdm import tqdm +from collections import defaultdict from pathlib import Path from PIL import Image from PIL import ImageFile @@ -63,16 +66,20 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None): # Get all color images for e.g. the test set self.img_files = [str(path) for path in Path(data_path).rglob("*.png")] - """ - self.label_files = [] - for path in self.img_files: - image_dir = os.path.dirname(path) - label_dir = image_dir.replace("leftImg8bit", "gtFine") - # TODO bbox stuff - label_file = os.path.join(label_dir, os.path.basename(path)) - label_file = os.path.splitext(label_file)[0] + '.txt' - self.label_files.append(label_file) - """ + self.annotations = defaultdict(list) + for dset in ['train', 'val']: + with open(os.path.abspath(os.path.join(data_path, "../../", f"annotations/instancesonly_filtered_gtFine_{dset}.json")), "r") as f: + annotation_file = json.load(f) + for annotation in tqdm(annotation_file["annotations"]): + img_id = annotation["image_id"] + category_id = annotation["category_id"] + bbox = annotation["bbox"] + self.annotations[ + os.path.basename( + list(filter( + lambda x:x["id"]==img_id, + annotation_file["images"]))[0]["file_name"]) + ].append((img_id, category_id, bbox)) self.mask_files = [] for path in self.img_files: @@ -103,22 +110,26 @@ def __getitem__(self, index): print(f"Could not read image '{img_path}'.") return - """ - # --------- # Label # --------- try: - label_path = self.label_files[index % len(self.img_files)].rstrip() - - # Ignore warning if file is empty - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - boxes = np.loadtxt(label_path).reshape(-1, 5) + labels = self.annotations[os.path.basename(img_path)] + + boxes = np.zeros((len(labels), 5)) + + for idx, label in enumerate(labels): + # label_idx x_center y_center width height + boxes[idx] = np.array([ + label[1] - 1, + label[2][0] / img.shape[1] + label[2][2] / img.shape[1] / 2, + label[2][1] / img.shape[0] + label[2][3] / img.shape[0] / 2, + label[2][2] / img.shape[1], + label[2][3] / img.shape[0] + ]) except Exception: print(f"Could not read label '{label_path}'.") return - """ # --------- # Segmentation Mask @@ -128,9 +139,13 @@ def __getitem__(self, index): # Load segmentation mask as numpy array mask = np.array(Image.open(mask_path).convert('RGB')) # Group classes together - mask[np.logical_and(mask <= 5, mask > 0)] = 1 - mask[np.logical_and(mask <= 10, mask > 5)] = 2 - print(np.unique(mask, return_counts=True)) + mask[mask <= 6] = 0 + mask[np.logical_and(mask > 6, mask <= 10)] = 1 + mask[np.logical_and(mask > 10, mask <= 16)] = 2 + mask[np.logical_and(mask > 16, mask <= 20)] = 3 + mask[np.logical_and(mask > 20, mask <= 22)] = 4 + mask[mask == 23] = 5 + mask[mask > 23] = 0 except FileNotFoundError as e: print(f"Could not load mask '{mask_path}'.") return From 49b265820a539fc7e9d3649d4ed49095b44b2ff5 Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Sun, 25 Jul 2021 18:11:15 +0200 Subject: [PATCH 16/20] Fix stuff because of the new resizing --- yoeo/utils/utils.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py index d178d21..e684a63 100644 --- a/yoeo/utils/utils.py +++ b/yoeo/utils/utils.py @@ -65,19 +65,11 @@ def rescale_boxes(boxes, current_dim, original_shape): """ orig_h, orig_w = original_shape - # The amount of padding that was added - pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape)) - pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape)) - - # Image height and width after padding is removed - unpad_h = current_dim - pad_y - unpad_w = current_dim - pad_x - # Rescale bounding boxes to dimension of original image - boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w - boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h - boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w - boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h + boxes[:, 0] = boxes[:, 0] * (orig_w / current_dim) + boxes[:, 1] = boxes[:, 1] * (orig_h / current_dim) + boxes[:, 2] = boxes[:, 2] * (orig_w / current_dim) + boxes[:, 3] = boxes[:, 3] * (orig_h / current_dim) return boxes From 9ef15c542f082efb1f4c7fdaf0afce05b602ffa0 Mon Sep 17 00:00:00 2001 From: Florian Vahl Date: Tue, 3 Aug 2021 15:10:30 +0200 Subject: [PATCH 17/20] Tweak and cleanup loss function --- yoeo/utils/loss.py | 201 ++++++++++++++++----------------------------- 1 file changed, 69 insertions(+), 132 deletions(-) diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py index c240b8f..b2b9689 100644 --- a/yoeo/utils/loss.py +++ b/yoeo/utils/loss.py @@ -55,143 +55,78 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= return iou # IoU -def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441 - # return positive, negative label smoothing BCE targets - return 1.0 - 0.5 * eps, 0.5 * eps - - -class BCEBlurWithLogitsLoss(nn.Module): - # BCEwithLogitLoss() with reduced missing label effects. - def __init__(self, alpha=0.05): - super(BCEBlurWithLogitsLoss, self).__init__() - self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none') # must be nn.BCEWithLogitsLoss() - self.alpha = alpha - - def forward(self, pred, true): - loss = self.loss_fcn(pred, true) - pred = torch.sigmoid(pred) # prob from logits - dx = pred - true # reduce only missing label effects - # dx = (pred - true).abs() # reduce missing label and false label effects - alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4)) - loss *= alpha_factor - return loss.mean() - - -class FocalLoss(nn.Module): - # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) - def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): - super(FocalLoss, self).__init__() - self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() - self.gamma = gamma - self.alpha = alpha - self.reduction = loss_fcn.reduction - self.loss_fcn.reduction = 'none' # required to apply FL to each element - - def forward(self, pred, true): - loss = self.loss_fcn(pred, true) - # p_t = torch.exp(-loss) - # loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability - - # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py - pred_prob = torch.sigmoid(pred) # prob from logits - p_t = true * pred_prob + (1 - true) * (1 - pred_prob) - alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) - modulating_factor = (1.0 - p_t) ** self.gamma - loss *= alpha_factor * modulating_factor - - if self.reduction == 'mean': - return loss.mean() - elif self.reduction == 'sum': - return loss.sum() - else: # 'none' - return loss - - -class QFocalLoss(nn.Module): - # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5) - def __init__(self, loss_fcn, gamma=1.5, alpha=0.25): - super(QFocalLoss, self).__init__() - self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss() - self.gamma = gamma - self.alpha = alpha - self.reduction = loss_fcn.reduction - self.loss_fcn.reduction = 'none' # required to apply FL to each element - - def forward(self, pred, true): - loss = self.loss_fcn(pred, true) - - pred_prob = torch.sigmoid(pred) # prob from logits - alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha) - modulating_factor = torch.abs(true - pred_prob) ** self.gamma - loss *= alpha_factor * modulating_factor - - if self.reduction == 'mean': - return loss.mean() - elif self.reduction == 'sum': - return loss.sum() - else: # 'none' - return loss - - def compute_loss(combined_predictions, combined_targets, model): + # Split seg and yolo stuff yolo_targets, seg_targets = combined_targets yolo_predictions, seg_predictions = combined_predictions + # Check which device was used + device = yolo_targets.device + # Segmentation loss seg_loss = nn.CrossEntropyLoss()(seg_predictions[0], seg_targets).unsqueeze(0) - device = yolo_targets.device + # Add placeholder varables for the different losses lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device) + + # Build yolo targets tcls, tbox, indices, anchors = build_targets(yolo_predictions, yolo_targets, model) # targets - # Define criteria + # Define different loss functions classification BCEcls = nn.BCEWithLogitsLoss( pos_weight=torch.tensor([1.0], device=device)) BCEobj = nn.BCEWithLogitsLoss( pos_weight=torch.tensor([1.0], device=device)) - # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3 - cp, cn = smooth_BCE(eps=0.0) - - # Focal loss - gamma = 0 # focal loss gamma - if gamma > 0: - BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma) - - # Losses - # layer index, layer predictions + # Calculate losses for each yolo layer for layer_index, layer_predictions in enumerate(yolo_predictions): - # image, anchor, gridy, gridx + # Get image ids, anchors, grid index i and j for each target in the current yolo layer b, anchor, grid_j, grid_i = indices[layer_index] + # Build empty object target tensor with the same shape as the object prediction tobj = torch.zeros_like(layer_predictions[..., 0], device=device) # target obj - - num_targets = b.shape[0] # number of targets + # Get the number of targets for this layer. + # Each target is a label box with some scaling and the association of an anchor box. + # Label boxes may be associated to 0 or multiple anchors. So they are multiple times or not at all in the targets. + num_targets = b.shape[0] + # Check if there are targets for this batch if num_targets: - # prediction subset corresponding to targets + # Load the corresponding values from the predictions for each of the targets ps = layer_predictions[b, anchor, grid_j, grid_i] - # Regression + # Regression of the box + # Apply sigmoid to xy offset predictions in each cell that has a target pxy = ps[:, :2].sigmoid() + # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index] - pbox = torch.cat((pxy, pwh), 1) # predicted box - # iou(prediction, target) + # Build box out of xy and wh + pbox = torch.cat((pxy, pwh), 1) + # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True) + # We want to minimize our loss so we and the best possible IoU is 1 so we take 1 - IoU and reduce it with a mean lbox += (1.0 - iou).mean() # iou loss - # Objectness + # Classification of the objectness + # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets - # Classification + # Classification of the class + # Check if we need to do a classification (number of classes > 1) if ps.size(1) - 5 > 1: - t = torch.full_like(ps[:, 5:], cn, device=device) # targets - t[range(num_targets), tcls[layer_index]] = cp + # Hot one class encoding + t = torch.zeros_like(ps[:, 5:], device=device) # targets + t[range(num_targets), tcls[layer_index]] = 1 + # Use the tensor to calculate the BCE loss lcls += BCEcls(ps[:, 5:], t) # BCE + # Classification of the objectness the sequel + # Calculate the BCE loss between the on the fly generated target and the network prediction lobj += BCEobj(layer_predictions[..., 4], tobj) # obj loss + # Scalaing of losses lbox *= 0.2 lobj *= 1.0 lcls *= 0.05 + # Merge losses loss = lbox + lobj + lcls + seg_loss return loss, to_cpu(torch.cat((lbox, lobj, lcls, seg_loss, loss))) @@ -202,51 +137,53 @@ def build_targets(p, targets, model): na, nt = 3, targets.shape[0] # number of anchors, targets #TODO tcls, tbox, indices, anch = [], [], [], [] gain = torch.ones(7, device=targets.device) # normalized to gridspace gain - ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) - # append anchor indices + # Make a tensor that iterates 0-2 for 3 anchors and repeat that as many times as we have target boxes + ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) + # Copy target boxes anchor size times and append an anchor index to each copy the anchor index is also expressed by the new first dimension targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) - g = 0.5 # bias - off = torch.tensor([[0, 0]], device=targets.device).float() * g # offsets - for i, yolo_layer in enumerate(model.yolo_layers): - anchors = yolo_layer.anchors / yolo_layer.stride + # Scale anchors by the yolo grid cell size so that an anchor with the size of the cell would result in 1 + anchors = yolo_layer.anchors / yolo_layer + # Add the number of yolo cells in this layer the gain tensor + # The gain tensor matches the collums of our targets (img id, class, x, y, w, h, anchor id) gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain - - # Match targets to anchors + # Scale targets by the number of yolo layer cells, they are now in the yolo cell coordinate system t = targets * gain + # Check if we have targets if nt: - # Matches - r = t[:, :, 4:6] / anchors[:, None] # wh ratio + # Calculate ration between anchor and target box for both width and height + r = t[:, :, 4:6] / anchors[:, None] + # Select the ratios that have the highest divergence in any axis and check if the ratio is less than 4 j = torch.max(r, 1. / r).max(2)[0] < 4 # compare #TODO - # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) - t = t[j] # filter - # Offsets - gxy = t[:, 2:4] # grid xy - gxi = gain[[2, 3]] - gxy # inverse - j, k = ((gxy % 1. < g) & (gxy > 1.)).T - l, m = ((gxi % 1. < g) & (gxi > 1.)).T - j = torch.stack((torch.ones_like(j),)) - t = t.repeat((off.shape[0], 1, 1))[j] - offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] - + # Only use targets that have the correct ratios for their anchors + # That means we only keep ones that have a matching anchor and we loose the anchor dimension + # The anchor id is still saved in the 7th value of each target + t = t[j] else: t = targets[0] - offsets = 0 - # Define - b, c = t[:, :2].long().T # image, class - gxy = t[:, 2:4] # grid xy + # Extract image id in batch and class id + b, c = t[:, :2].long().T + # We isolate the target cell associations. + # x, y, w, h are allready in the cell coordinate system meaning an x = 1.2 would be 1.2 times cellwidth + gxy = t[:, 2:4] gwh = t[:, 4:6] # grid wh - gij = (gxy - offsets).long() + # Cast to int to get an cell index e.g. 1.2 gets associated to cell 1 + gij = gxy.long() + # Isolate x and y index dimensions gi, gj = gij.T # grid xy indices - # Append - a = t[:, 6].long() # anchor indices - # image, anchor, grid indices + # Convert anchor indexes to int + a = t[:, 6].long() + # Add target tensors for this yolo layer to the output lists + # Add to index list and limit index range to prevent out of bounds indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) + # Add to target box list and convert box coordinates from global grid coordinates to local offsets in the grid cell tbox.append(torch.cat((gxy - gij, gwh), 1)) # box - anch.append(anchors[a]) # anchors - tcls.append(c) # class + # Add correct anchor for each target to the list + anch.append(anchors[a]) + # Add class for each target to the list + tcls.append(c) return tcls, tbox, indices, anch From 9992bc86051ad37aaa8c5b0689c2fd1bca1d0043 Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Wed, 4 Aug 2021 12:37:42 +0200 Subject: [PATCH 18/20] Use fix obj loss value --- yoeo/utils/loss.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py index 63765f5..6f7aa72 100644 --- a/yoeo/utils/loss.py +++ b/yoeo/utils/loss.py @@ -106,7 +106,7 @@ def compute_loss(combined_predictions, combined_targets, model): # Classification of the objectness # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position - tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets + tobj[b, anchor, grid_j, grid_i] = 1#iou.detach().clamp(0).type(tobj.dtype) # Use cells with iou > 0 as object targets # Classification of the class # Check if we need to do a classification (number of classes > 1) From fae4780ff3e52a37328227706c9bad57f4ccc469 Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Wed, 4 Aug 2021 12:38:46 +0200 Subject: [PATCH 19/20] Add rev 7 with custom anchors --- config/yoeo-rev-7-anchor.cfg | 337 +++++++++++++++++++++++++++++++++++ 1 file changed, 337 insertions(+) create mode 100644 config/yoeo-rev-7-anchor.cfg diff --git a/config/yoeo-rev-7-anchor.cfg b/config/yoeo-rev-7-anchor.cfg new file mode 100644 index 0000000..d7697cf --- /dev/null +++ b/config/yoeo-rev-7-anchor.cfg @@ -0,0 +1,337 @@ +[net] +# Testing +#batch=1 +#subdivisions=1 +# Training +batch=64 +subdivisions=8 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=100 +max_batches = 4000 +policy=steps +steps=50000,60000 +scales=.1,.1 + +#### +# Like YOEO rev 2 but with deeper skip connections +#### + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers=-1 +groups=2 +group_id=1 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1,-2 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -6,-1 + +[maxpool] +size=2 +stride=2 + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +########### + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=39 +activation=linear + +[yolo] +mask = 3,4,5 +anchors = 7, 11, 15,24, 25, 52, 48,65, 90,132, 88,237 +classes=8 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 24 + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=39 +activation=linear + +[yolo] +mask = 0,1,2 +anchors = 7, 11, 15,24, 25, 52, 48,65, 90,132, 88,237 +classes=8 +classes=8 +num=6 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + +[route] +layers = 18 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, 10 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, 2 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[route] +layers = -1, 0 + +[upsample] +stride=2 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=6 +size=1 +stride=1 +pad=1 +activation=leaky + +[seg] +classes=5 From 0a1d21eb723675994f92044f4bf43192ca64bd81 Mon Sep 17 00:00:00 2001 From: Florian Vahl <7vahl@informatik.uni-hamburg.de> Date: Mon, 9 Aug 2021 13:52:12 +0200 Subject: [PATCH 20/20] Cityscapes using aug --- config/yoeo-rev-7.cfg | 4 ++-- yoeo/detect.py | 2 +- yoeo/train.py | 2 +- yoeo/utils/augmentations.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/config/yoeo-rev-7.cfg b/config/yoeo-rev-7.cfg index 756a265..699d18c 100644 --- a/config/yoeo-rev-7.cfg +++ b/config/yoeo-rev-7.cfg @@ -19,8 +19,8 @@ learning_rate=0.001 burn_in=100 max_batches = 4000 policy=steps -steps=50000,60000 -scales=.1,.1 +steps=15000,30000 +scales=.2,.1 #### # Like YOEO rev 2 but with deeper skip connections diff --git a/yoeo/detect.py b/yoeo/detect.py index fec1ad6..d6fcb29 100755 --- a/yoeo/detect.py +++ b/yoeo/detect.py @@ -239,7 +239,7 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa plt.gca().yaxis.set_major_locator(NullLocator()) filename = os.path.basename(image_path).split(".")[0] output_path_1 = os.path.join(output_path, f"{filename}.png") - plt.savefig(output_path_1, bbox_inches="tight", pad_inches=0.0) + plt.savefig(output_path_1, bbox_inches="tight", pad_inches=0.5) plt.close() diff --git a/yoeo/train.py b/yoeo/train.py index 665d4e5..6301293 100755 --- a/yoeo/train.py +++ b/yoeo/train.py @@ -48,7 +48,7 @@ def _create_data_loader(img_path, batch_size, img_size, n_cpu, multiscale_traini img_path, img_size=img_size, multiscale=multiscale_training, - transform=DEFAULT_TRANSFORMS) + transform=AUGMENTATION_TRANSFORMS) dataloader = DataLoader( dataset, batch_size=batch_size, diff --git a/yoeo/utils/augmentations.py b/yoeo/utils/augmentations.py index c29df04..000e84a 100644 --- a/yoeo/utils/augmentations.py +++ b/yoeo/utils/augmentations.py @@ -1,6 +1,6 @@ import imgaug.augmenters as iaa from torchvision import transforms -from yoeo.utils.transforms import ToTensor, PadSquare, RelativeLabels, AbsoluteLabels, ImgAug +from yoeo.utils.transforms import ToTensor, PadSquare, RelativeLabels, AbsoluteLabels, ImgAug, ResizeToSquare class DefaultAug(ImgAug): @@ -29,7 +29,7 @@ def __init__(self, ): AUGMENTATION_TRANSFORMS = transforms.Compose([ AbsoluteLabels(), DefaultAug(), - PadSquare(), + ResizeToSquare(), RelativeLabels(), ToTensor(), ])