From 71da0d586898a04e8d9ff75cb5b7210237e736dc Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 13:09:16 +0200
Subject: [PATCH 01/20] Change padding for cityscapes

---
 yoeo/utils/transforms.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/yoeo/utils/transforms.py b/yoeo/utils/transforms.py
index 800c457..391db9c 100644
--- a/yoeo/utils/transforms.py
+++ b/yoeo/utils/transforms.py
@@ -94,6 +94,13 @@ def __init__(self, ):
         ])
 
 
+class ResizeToSquare(ImgAug):
+    def __init__(self, ):
+        self.augmentations = iaa.Sequential([
+            iaa.Resize(416).to_deterministic() # TODO dynamic resolution
+        ])
+
+
 class ToTensor(object):
     def __init__(self, ):
         pass
@@ -102,7 +109,7 @@ def __call__(self, data):
         img, boxes, seg = data
         # Extract image as PyTorch tensor
         img = transforms.ToTensor()(img)
-        seg = transforms.ToTensor()(seg) * 255 # Because troch maps this to 0-1 instead of 0-255
+        seg = transforms.ToTensor()(seg) * 255 # Because torch maps this to 0-1 instead of 0-255
 
         bb_targets = torch.zeros((len(boxes), 6))
         bb_targets[:, 1:] = transforms.ToTensor()(boxes)
@@ -123,7 +130,7 @@ def __call__(self, data):
 
 DEFAULT_TRANSFORMS = transforms.Compose([
     AbsoluteLabels(),
-    PadSquare(),
+    ResizeToSquare(),
     RelativeLabels(),
     ToTensor(),
 ])

From 63561daecc81b658ce774e8f147df3e2a11e9007 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 13:31:21 +0200
Subject: [PATCH 02/20] Test image discovery

---
 yoeo/utils/datasets.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 4e3634d..81c8beb 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -6,6 +6,7 @@
 import os
 import warnings
 import numpy as np
+from pathlib import Path
 from PIL import Image
 from PIL import ImageFile
 
@@ -57,9 +58,12 @@ def __len__(self):
 
 
 class ListDataset(Dataset):
-    def __init__(self, list_path, img_size=416, multiscale=True, transform=None):
-        with open(list_path, "r") as file:
-            self.img_files = file.readlines()
+    def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
+
+        # Get all color images for e.g. the test set
+        result = list(Path(data_path).rglob("*.png"))
+
+        print(result)
 
         self.label_files = []
         for path in self.img_files:

From c06e93a7a2154d42079dc0d31f678ffa5dbcde4c Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 13:43:18 +0200
Subject: [PATCH 03/20] Add first mask loader

---
 yoeo/utils/datasets.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 81c8beb..9ae8f18 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -61,30 +61,31 @@ class ListDataset(Dataset):
     def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
 
         # Get all color images for e.g. the test set
-        result = list(Path(data_path).rglob("*.png"))
-
-        print(result)
+        self.img_files = list(Path(data_path).rglob("*.png"))
 
+        """
         self.label_files = []
         for path in self.img_files:
             image_dir = os.path.dirname(path)
-            label_dir = "labels".join(image_dir.rsplit("images", 1))
-            assert label_dir != image_dir, \
-                f"Image path must contain a folder named 'images'! \n'{image_dir}'"
+            label_dir = image_dir.replace("leftImg8bit", "gtFine")
+            # TODO bbox stuff
             label_file = os.path.join(label_dir, os.path.basename(path))
             label_file = os.path.splitext(label_file)[0] + '.txt'
             self.label_files.append(label_file)
+        """
 
-        self.mask_files = []
+        self.label_files = []
         for path in self.img_files:
             image_dir = os.path.dirname(path)
-            mask_dir = "segmentations".join(image_dir.rsplit("images", 1))
-            assert mask_dir != image_dir, \
-                f"Image path must contain a folder named 'images'! \n'{image_dir}'"
+            mask_dir = image_dir.replace("leftImg8bit", "gtFine")
             mask_file = os.path.join(mask_dir, os.path.basename(path))
-            mask_file = os.path.splitext(mask_file)[0] + '.png'
+            mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png'
             self.mask_files.append(mask_file)
 
+        print(self.mask_files)
+
+        exit(0)
+
         self.img_size = img_size
         self.max_objects = 100
         self.multiscale = multiscale

From b293ef9613c8bd350cdedd361dd4a9f85a30fb35 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 13:44:06 +0200
Subject: [PATCH 04/20] Fix copy paste error

---
 yoeo/utils/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 9ae8f18..36fd373 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -74,7 +74,7 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
             self.label_files.append(label_file)
         """
 
-        self.label_files = []
+        self.mask_files = []
         for path in self.img_files:
             image_dir = os.path.dirname(path)
             mask_dir = image_dir.replace("leftImg8bit", "gtFine")

From ab59e5b9b5dd8b1b1a13fca433c23d90c522cc22 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 13:46:15 +0200
Subject: [PATCH 05/20] Fix stuff

---
 yoeo/utils/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 36fd373..6a7dc33 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -76,8 +76,8 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
 
         self.mask_files = []
         for path in self.img_files:
-            image_dir = os.path.dirname(path)
-            mask_dir = image_dir.replace("leftImg8bit", "gtFine")
+            path = path.replace("leftImg8bit", "gtFine")
+            mask_dir = os.path.dirname(path)
             mask_file = os.path.join(mask_dir, os.path.basename(path))
             mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png'
             self.mask_files.append(mask_file)

From 2333640e4ba9fd9e72b8e61da3506829bb47d3a8 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 13:47:06 +0200
Subject: [PATCH 06/20] Fix string conversion

---
 yoeo/utils/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 6a7dc33..d8be999 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -76,7 +76,7 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
 
         self.mask_files = []
         for path in self.img_files:
-            path = path.replace("leftImg8bit", "gtFine")
+            path = str(path).replace("leftImg8bit", "gtFine")
             mask_dir = os.path.dirname(path)
             mask_file = os.path.join(mask_dir, os.path.basename(path))
             mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png'

From f941f3187709f018a02e41696748447537ac7cf4 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 19:15:18 +0200
Subject: [PATCH 07/20] Demo segmentation load

---
 yoeo/utils/datasets.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index d8be999..8cfd242 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -82,10 +82,6 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
             mask_file = os.path.splitext(mask_file)[0] + '_labelIds.png'
             self.mask_files.append(mask_file)
 
-        print(self.mask_files)
-
-        exit(0)
-
         self.img_size = img_size
         self.max_objects = 100
         self.multiscale = multiscale
@@ -107,6 +103,8 @@ def __getitem__(self, index):
             print(f"Could not read image '{img_path}'.")
             return
 
+        """
+
         # ---------
         #  Label
         # ---------
@@ -120,6 +118,7 @@ def __getitem__(self, index):
         except Exception:
             print(f"Could not read label '{label_path}'.")
             return
+        """
 
         # ---------
         #  Segmentation Mask
@@ -127,11 +126,14 @@ def __getitem__(self, index):
         try:
             mask_path = self.mask_files[index % len(self.img_files)].rstrip()
             # Load segmentation mask as numpy array
-            mask = np.array(Image.open(mask_path).convert('RGB')) // 127
+            mask = np.array(Image.open(mask_path).convert('RGB'))
+            print(np.unique(mask, return_counts=True))
         except FileNotFoundError as e:
-            print(f"Could not load mask '{mask_path}'.")
+            print(f"Could not load mask '{mask_path}' {e}.")
             return
 
+        exit(0)
+
         # -----------
         #  Transform
         # -----------

From 0a6fb562dd8b86c1db209ffcc163c0bb35989a78 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 19:22:07 +0200
Subject: [PATCH 08/20] Add string conversion

---
 yoeo/utils/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 8cfd242..b53d6ba 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -61,7 +61,7 @@ class ListDataset(Dataset):
     def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
 
         # Get all color images for e.g. the test set
-        self.img_files = list(Path(data_path).rglob("*.png"))
+        self.img_files = [str(path) in Path(data_path).rglob("*.png")]
 
         """
         self.label_files = []

From 7f90aa700d7cdab30de2f1fdea762c591d53df05 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 19:22:18 +0200
Subject: [PATCH 09/20] Add string conversion

---
 yoeo/utils/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index b53d6ba..848469e 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -61,7 +61,7 @@ class ListDataset(Dataset):
     def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
 
         # Get all color images for e.g. the test set
-        self.img_files = [str(path) in Path(data_path).rglob("*.png")]
+        self.img_files = [str(path) for path in Path(data_path).rglob("*.png")]
 
         """
         self.label_files = []

From 7b534a74a37765e66b4984dc9cd3d40fa132ca21 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sat, 24 Jul 2021 19:25:10 +0200
Subject: [PATCH 10/20] Add grouping

---
 yoeo/utils/datasets.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 848469e..b21a6f1 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -127,9 +127,12 @@ def __getitem__(self, index):
             mask_path = self.mask_files[index % len(self.img_files)].rstrip()
             # Load segmentation mask as numpy array
             mask = np.array(Image.open(mask_path).convert('RGB'))
+            # Group classes together
+            mask[1 < mask <=  5] = 1
+            mask[5 < mask <=  9] = 2
             print(np.unique(mask, return_counts=True))
         except FileNotFoundError as e:
-            print(f"Could not load mask '{mask_path}' {e}.")
+            print(f"Could not load mask '{mask_path}'.")
             return
 
         exit(0)

From 77dca3c76a3daac91242d2abc765da0dfe788d17 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sun, 25 Jul 2021 12:40:39 +0200
Subject: [PATCH 11/20] Better indexing

---
 yoeo/utils/datasets.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index b21a6f1..61cc0c4 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -128,8 +128,8 @@ def __getitem__(self, index):
             # Load segmentation mask as numpy array
             mask = np.array(Image.open(mask_path).convert('RGB'))
             # Group classes together
-            mask[1 < mask <=  5] = 1
-            mask[5 < mask <=  9] = 2
+            mask[np.logical_and(mask <=  5, mask > 0)] = 1
+            mask[np.logical_and(mask <=  10, mask > 5)] = 2
             print(np.unique(mask, return_counts=True))
         except FileNotFoundError as e:
             print(f"Could not load mask '{mask_path}'.")

From 3c5587e1701c3018c9b22a7d87087e16fea92b8a Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Sun, 25 Jul 2021 12:41:16 +0200
Subject: [PATCH 12/20] Remove exit stop

---
 yoeo/utils/datasets.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index 61cc0c4..bdc6661 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -135,8 +135,6 @@ def __getitem__(self, index):
             print(f"Could not load mask '{mask_path}'.")
             return
 
-        exit(0)
-
         # -----------
         #  Transform
         # -----------

From df30d74be5d6140528f62660fb9007b0416e92be Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Sun, 25 Jul 2021 18:07:41 +0200
Subject: [PATCH 13/20] Adapt model for cityscapes classes

---
 config/yoeo-rev-7.cfg | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/config/yoeo-rev-7.cfg b/config/yoeo-rev-7.cfg
index 78e32d8..756a265 100644
--- a/config/yoeo-rev-7.cfg
+++ b/config/yoeo-rev-7.cfg
@@ -213,15 +213,13 @@ activation=leaky
 size=1
 stride=1
 pad=1
-filters=24
+filters=39
 activation=linear
 
-
-
 [yolo]
 mask = 3,4,5
 anchors = 17, 32,  13,171,  37, 67,  30,224,  69,112, 116,212
-classes=3
+classes=8
 num=6
 jitter=.3
 ignore_thresh = .7
@@ -257,13 +255,13 @@ activation=leaky
 size=1
 stride=1
 pad=1
-filters=24
+filters=39
 activation=linear
 
 [yolo]
 mask = 0,1,2
 anchors = 17, 32,  13,171,  37, 67,  30,224,  69,112, 116,212
-classes=3
+classes=8
 num=6
 jitter=.3
 ignore_thresh = .7
@@ -328,11 +326,11 @@ activation=leaky
 
 [convolutional]
 batch_normalize=1
-filters=3
+filters=6
 size=1
 stride=1
 pad=1
 activation=leaky
 
 [seg]
-classes=2
+classes=5

From e54513dc92641254f41686f2022f7f53f202b279 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Sun, 25 Jul 2021 18:08:41 +0200
Subject: [PATCH 14/20] make detect output more consistent

---
 yoeo/detect.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/yoeo/detect.py b/yoeo/detect.py
index 98ff357..fec1ad6 100755
--- a/yoeo/detect.py
+++ b/yoeo/detect.py
@@ -205,19 +205,14 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa
 
 
     ax.imshow(
-        SegmentationMapsOnImage(
-            seg[
-                int(pad_y) : int(img_size - pad_y),
-                int(pad_x) : int(img_size - pad_x),
-                ], shape=img.shape).draw_on_image(img)[0])
+        SegmentationMapsOnImage(seg, shape=img.shape).draw_on_image(img, alpha=0.5)[0])
     # Rescale boxes to original image
     detections = rescale_boxes(detections, img_size, img.shape[:2])
     unique_labels = detections[:, -1].cpu().unique()
     n_cls_preds = len(unique_labels)
     # Bounding-box colors
     cmap = plt.get_cmap("tab20b")
-    colors = [cmap(i) for i in np.linspace(0, 1, n_cls_preds)]
-    bbox_colors = random.sample(colors, n_cls_preds)
+    colors = [cmap(i) for i in np.linspace(0, 1, len(classes))]
     for x1, y1, x2, y2, conf, cls_pred in detections:
 
         print(f"\t+ Label: {classes[int(cls_pred)]} | Confidence: {conf.item():0.4f}")
@@ -225,9 +220,8 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa
         box_w = x2 - x1
         box_h = y2 - y1
 
-        color = bbox_colors[int(np.where(unique_labels == int(cls_pred))[0])]
         # Create a Rectangle patch
-        bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=2, edgecolor=color, facecolor="none")
+        bbox = patches.Rectangle((x1, y1), box_w, box_h, linewidth=1, edgecolor=colors[int(cls_pred)], facecolor="none")
         # Add the bbox to the plot
         ax.add_patch(bbox)
         # Add label
@@ -237,7 +231,7 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa
             s=classes[int(cls_pred)],
             color="white",
             verticalalignment="top",
-            bbox={"color": color, "pad": 0})
+            bbox={"color": colors[int(cls_pred)], "pad": 0})
 
     # Save generated image with detections
     plt.axis("off")

From c945da7f1be5dbb5ba237f8d9eb8f18f3b5529d2 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Sun, 25 Jul 2021 18:09:32 +0200
Subject: [PATCH 15/20] Load and cluster cityscapes dataset

---
 yoeo/utils/datasets.py | 59 ++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/yoeo/utils/datasets.py b/yoeo/utils/datasets.py
index bdc6661..5241001 100644
--- a/yoeo/utils/datasets.py
+++ b/yoeo/utils/datasets.py
@@ -6,6 +6,9 @@
 import os
 import warnings
 import numpy as np
+import json
+from tqdm import tqdm
+from collections import defaultdict
 from pathlib import Path
 from PIL import Image
 from PIL import ImageFile
@@ -63,16 +66,20 @@ def __init__(self, data_path, img_size=416, multiscale=True, transform=None):
         # Get all color images for e.g. the test set
         self.img_files = [str(path) for path in Path(data_path).rglob("*.png")]
 
-        """
-        self.label_files = []
-        for path in self.img_files:
-            image_dir = os.path.dirname(path)
-            label_dir = image_dir.replace("leftImg8bit", "gtFine")
-            # TODO bbox stuff
-            label_file = os.path.join(label_dir, os.path.basename(path))
-            label_file = os.path.splitext(label_file)[0] + '.txt'
-            self.label_files.append(label_file)
-        """
+        self.annotations = defaultdict(list) 
+        for dset in ['train', 'val']:
+            with open(os.path.abspath(os.path.join(data_path, "../../", f"annotations/instancesonly_filtered_gtFine_{dset}.json")), "r") as f:
+                annotation_file = json.load(f)
+            for annotation in tqdm(annotation_file["annotations"]):
+                img_id = annotation["image_id"]
+                category_id = annotation["category_id"]
+                bbox = annotation["bbox"]
+                self.annotations[
+                        os.path.basename(
+                            list(filter(
+                                lambda x:x["id"]==img_id,
+                                annotation_file["images"]))[0]["file_name"])
+                            ].append((img_id, category_id, bbox))
 
         self.mask_files = []
         for path in self.img_files:
@@ -103,22 +110,26 @@ def __getitem__(self, index):
             print(f"Could not read image '{img_path}'.")
             return
 
-        """
-
         # ---------
         #  Label
         # ---------
         try:
-            label_path = self.label_files[index % len(self.img_files)].rstrip()
-
-            # Ignore warning if file is empty
-            with warnings.catch_warnings():
-                warnings.simplefilter("ignore")
-                boxes = np.loadtxt(label_path).reshape(-1, 5)
+            labels = self.annotations[os.path.basename(img_path)]
+
+            boxes = np.zeros((len(labels), 5))
+
+            for idx, label in enumerate(labels):
+                # label_idx x_center y_center width height
+                boxes[idx] = np.array([
+                        label[1] - 1, 
+                        label[2][0] / img.shape[1] + label[2][2] / img.shape[1] / 2, 
+                        label[2][1] / img.shape[0] + label[2][3] / img.shape[0] / 2,
+                        label[2][2] / img.shape[1],
+                        label[2][3] / img.shape[0]
+                    ])
         except Exception:
             print(f"Could not read label '{label_path}'.")
             return
-        """
 
         # ---------
         #  Segmentation Mask
@@ -128,9 +139,13 @@ def __getitem__(self, index):
             # Load segmentation mask as numpy array
             mask = np.array(Image.open(mask_path).convert('RGB'))
             # Group classes together
-            mask[np.logical_and(mask <=  5, mask > 0)] = 1
-            mask[np.logical_and(mask <=  10, mask > 5)] = 2
-            print(np.unique(mask, return_counts=True))
+            mask[mask <= 6] = 0
+            mask[np.logical_and(mask > 6, mask <= 10)] = 1
+            mask[np.logical_and(mask > 10, mask <= 16)] = 2
+            mask[np.logical_and(mask > 16, mask <= 20)] = 3
+            mask[np.logical_and(mask > 20, mask <= 22)] = 4
+            mask[mask == 23] = 5
+            mask[mask > 23] = 0
         except FileNotFoundError as e:
             print(f"Could not load mask '{mask_path}'.")
             return

From 49b265820a539fc7e9d3649d4ed49095b44b2ff5 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Sun, 25 Jul 2021 18:11:15 +0200
Subject: [PATCH 16/20] Fix stuff because of the new resizing

---
 yoeo/utils/utils.py | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/yoeo/utils/utils.py b/yoeo/utils/utils.py
index d178d21..e684a63 100644
--- a/yoeo/utils/utils.py
+++ b/yoeo/utils/utils.py
@@ -65,19 +65,11 @@ def rescale_boxes(boxes, current_dim, original_shape):
     """
     orig_h, orig_w = original_shape
 
-    # The amount of padding that was added
-    pad_x = max(orig_h - orig_w, 0) * (current_dim / max(original_shape))
-    pad_y = max(orig_w - orig_h, 0) * (current_dim / max(original_shape))
-
-    # Image height and width after padding is removed
-    unpad_h = current_dim - pad_y
-    unpad_w = current_dim - pad_x
-
     # Rescale bounding boxes to dimension of original image
-    boxes[:, 0] = ((boxes[:, 0] - pad_x // 2) / unpad_w) * orig_w
-    boxes[:, 1] = ((boxes[:, 1] - pad_y // 2) / unpad_h) * orig_h
-    boxes[:, 2] = ((boxes[:, 2] - pad_x // 2) / unpad_w) * orig_w
-    boxes[:, 3] = ((boxes[:, 3] - pad_y // 2) / unpad_h) * orig_h
+    boxes[:, 0] = boxes[:, 0] * (orig_w / current_dim)
+    boxes[:, 1] = boxes[:, 1] * (orig_h / current_dim)
+    boxes[:, 2] = boxes[:, 2] * (orig_w / current_dim)
+    boxes[:, 3] = boxes[:, 3] * (orig_h / current_dim)
     return boxes
 
 

From 9ef15c542f082efb1f4c7fdaf0afce05b602ffa0 Mon Sep 17 00:00:00 2001
From: Florian Vahl <florian@flova.de>
Date: Tue, 3 Aug 2021 15:10:30 +0200
Subject: [PATCH 17/20] Tweak and cleanup loss function

---
 yoeo/utils/loss.py | 201 ++++++++++++++++-----------------------------
 1 file changed, 69 insertions(+), 132 deletions(-)

diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py
index c240b8f..b2b9689 100644
--- a/yoeo/utils/loss.py
+++ b/yoeo/utils/loss.py
@@ -55,143 +55,78 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=
         return iou  # IoU
 
 
-def smooth_BCE(eps=0.1):  # https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441
-    # return positive, negative label smoothing BCE targets
-    return 1.0 - 0.5 * eps, 0.5 * eps
-
-
-class BCEBlurWithLogitsLoss(nn.Module):
-    # BCEwithLogitLoss() with reduced missing label effects.
-    def __init__(self, alpha=0.05):
-        super(BCEBlurWithLogitsLoss, self).__init__()
-        self.loss_fcn = nn.BCEWithLogitsLoss(reduction='none')  # must be nn.BCEWithLogitsLoss()
-        self.alpha = alpha
-
-    def forward(self, pred, true):
-        loss = self.loss_fcn(pred, true)
-        pred = torch.sigmoid(pred)  # prob from logits
-        dx = pred - true  # reduce only missing label effects
-        # dx = (pred - true).abs()  # reduce missing label and false label effects
-        alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
-        loss *= alpha_factor
-        return loss.mean()
-
-
-class FocalLoss(nn.Module):
-    # Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
-    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
-        super(FocalLoss, self).__init__()
-        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
-        self.gamma = gamma
-        self.alpha = alpha
-        self.reduction = loss_fcn.reduction
-        self.loss_fcn.reduction = 'none'  # required to apply FL to each element
-
-    def forward(self, pred, true):
-        loss = self.loss_fcn(pred, true)
-        # p_t = torch.exp(-loss)
-        # loss *= self.alpha * (1.000001 - p_t) ** self.gamma  # non-zero power for gradient stability
-
-        # TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
-        pred_prob = torch.sigmoid(pred)  # prob from logits
-        p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
-        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
-        modulating_factor = (1.0 - p_t) ** self.gamma
-        loss *= alpha_factor * modulating_factor
-
-        if self.reduction == 'mean':
-            return loss.mean()
-        elif self.reduction == 'sum':
-            return loss.sum()
-        else:  # 'none'
-            return loss
-
-
-class QFocalLoss(nn.Module):
-    # Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
-    def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
-        super(QFocalLoss, self).__init__()
-        self.loss_fcn = loss_fcn  # must be nn.BCEWithLogitsLoss()
-        self.gamma = gamma
-        self.alpha = alpha
-        self.reduction = loss_fcn.reduction
-        self.loss_fcn.reduction = 'none'  # required to apply FL to each element
-
-    def forward(self, pred, true):
-        loss = self.loss_fcn(pred, true)
-
-        pred_prob = torch.sigmoid(pred)  # prob from logits
-        alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
-        modulating_factor = torch.abs(true - pred_prob) ** self.gamma
-        loss *= alpha_factor * modulating_factor
-
-        if self.reduction == 'mean':
-            return loss.mean()
-        elif self.reduction == 'sum':
-            return loss.sum()
-        else:  # 'none'
-            return loss
-
-
 def compute_loss(combined_predictions, combined_targets, model):
+    # Split seg and yolo stuff
     yolo_targets, seg_targets = combined_targets
     yolo_predictions, seg_predictions = combined_predictions
+    # Check which device was used
+    device = yolo_targets.device
 
+    # Segmentation loss
     seg_loss = nn.CrossEntropyLoss()(seg_predictions[0], seg_targets).unsqueeze(0)
 
-    device = yolo_targets.device
+    # Add placeholder varables for the different losses
     lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
+
+    # Build yolo targets
     tcls, tbox, indices, anchors = build_targets(yolo_predictions, yolo_targets, model)  # targets
 
-    # Define criteria
+    # Define different loss functions classification
     BCEcls = nn.BCEWithLogitsLoss(
         pos_weight=torch.tensor([1.0], device=device))
     BCEobj = nn.BCEWithLogitsLoss(
         pos_weight=torch.tensor([1.0], device=device))
 
-    # Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
-    cp, cn = smooth_BCE(eps=0.0)
-
-    # Focal loss
-    gamma = 0  # focal loss gamma
-    if gamma > 0:
-        BCEcls, BCEobj = FocalLoss(BCEcls, gamma), FocalLoss(BCEobj, gamma)
-
-    # Losses
-    # layer index, layer predictions
+    # Calculate losses for each yolo layer
     for layer_index, layer_predictions in enumerate(yolo_predictions):
-        # image, anchor, gridy, gridx
+        # Get image ids, anchors, grid index i and j for each target in the current yolo layer
         b, anchor, grid_j, grid_i = indices[layer_index]
+        # Build empty object target tensor with the same shape as the object prediction
         tobj = torch.zeros_like(layer_predictions[..., 0], device=device)  # target obj
-
-        num_targets = b.shape[0]  # number of targets
+        # Get the number of targets for this layer.
+        # Each target is a label box with some scaling and the association of an anchor box.
+        # Label boxes may be associated to 0 or multiple anchors. So they are multiple times or not at all in the targets.
+        num_targets = b.shape[0]
+        # Check if there are targets for this batch
         if num_targets:
-            # prediction subset corresponding to targets
+            # Load the corresponding values from the predictions for each of the targets
             ps = layer_predictions[b, anchor, grid_j, grid_i]
 
-            # Regression
+            # Regression of the box
+            # Apply sigmoid to xy offset predictions in each cell that has a target
             pxy = ps[:, :2].sigmoid()
+            # Apply exponent to wh predictions and multiply with the anchor box that matched best with the label for each cell that has a target
             pwh = torch.exp(ps[:, 2:4]) * anchors[layer_index]
-            pbox = torch.cat((pxy, pwh), 1)  # predicted box
-            # iou(prediction, target)
+            # Build box out of xy and wh
+            pbox = torch.cat((pxy, pwh), 1)
+            # Calculate CIoU or GIoU for each target with the predicted box for its cell + anchor
             iou = bbox_iou(pbox.T, tbox[layer_index], x1y1x2y2=False, CIoU=True)
+            # We want to minimize our loss so we and the best possible IoU is 1 so we take 1 - IoU and reduce it with a mean
             lbox += (1.0 - iou).mean()  # iou loss
 
-            # Objectness
+            # Classification of the objectness
+            # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position
             tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype)  # Use cells with iou > 0 as object targets
 
-            # Classification
+            # Classification of the class
+            # Check if we need to do a classification (number of classes > 1)
             if ps.size(1) - 5 > 1:
-                t = torch.full_like(ps[:, 5:], cn, device=device)  # targets
-                t[range(num_targets), tcls[layer_index]] = cp
+                # Hot one class encoding
+                t = torch.zeros_like(ps[:, 5:], device=device)  # targets
+                t[range(num_targets), tcls[layer_index]] = 1
+                # Use the tensor to calculate the BCE loss
                 lcls += BCEcls(ps[:, 5:], t)  # BCE
 
+        # Classification of the objectness the sequel
+        # Calculate the BCE loss between the on the fly generated target and the network prediction
         lobj += BCEobj(layer_predictions[..., 4], tobj) # obj loss
 
+    # Scalaing of losses
     lbox *= 0.2
     lobj *= 1.0
     lcls *= 0.05
 
+    # Merge losses
     loss = lbox + lobj + lcls + seg_loss
 
     return loss, to_cpu(torch.cat((lbox, lobj, lcls, seg_loss, loss)))
@@ -202,51 +137,53 @@ def build_targets(p, targets, model):
     na, nt = 3, targets.shape[0]  # number of anchors, targets #TODO
     tcls, tbox, indices, anch = [], [], [], []
     gain = torch.ones(7, device=targets.device)  # normalized to gridspace gain
-    ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)  # same as .repeat_interleave(nt)
-    # append anchor indices
+    # Make a tensor that iterates 0-2 for 3 anchors and repeat that as many times as we have target boxes
+    ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
+    # Copy target boxes anchor size times and append an anchor index to each copy the anchor index is also expressed by the new first dimension
     targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)
 
-    g = 0.5  # bias
-    off = torch.tensor([[0, 0]], device=targets.device).float() * g  # offsets
-
     for i, yolo_layer in enumerate(model.yolo_layers):
-        anchors = yolo_layer.anchors / yolo_layer.stride
+        # Scale anchors by the yolo grid cell size so that an anchor with the size of the cell would result in 1
+        anchors = yolo_layer.anchors / yolo_layer
+        # Add the number of yolo cells in this layer the gain tensor
+        # The gain tensor matches the collums of our targets (img id, class, x, y, w, h, anchor id)
         gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]  # xyxy gain
-
-        # Match targets to anchors
+        # Scale targets by the number of yolo layer cells, they are now in the yolo cell coordinate system
         t = targets * gain
+        # Check if we have targets
         if nt:
-            # Matches
-            r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
+            # Calculate ration between anchor and target box for both width and height
+            r = t[:, :, 4:6] / anchors[:, None]
+            # Select the ratios that have the highest divergence in any axis and check if the ratio is less than 4
             j = torch.max(r, 1. / r).max(2)[0] < 4  # compare #TODO
-            # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
-            t = t[j]  # filter
-            # Offsets
-            gxy = t[:, 2:4]  # grid xy
-            gxi = gain[[2, 3]] - gxy  # inverse
-            j, k = ((gxy % 1. < g) & (gxy > 1.)).T
-            l, m = ((gxi % 1. < g) & (gxi > 1.)).T
-            j = torch.stack((torch.ones_like(j),))
-            t = t.repeat((off.shape[0], 1, 1))[j]
-            offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
-
+            # Only use targets that have the correct ratios for their anchors
+            # That means we only keep ones that have a matching anchor and we loose the anchor dimension
+            # The anchor id is still saved in the 7th value of each target
+            t = t[j]
         else:
             t = targets[0]
-            offsets = 0
 
-        # Define
-        b, c = t[:, :2].long().T  # image, class
-        gxy = t[:, 2:4]  # grid xy
+        # Extract image id in batch and class id
+        b, c = t[:, :2].long().T
+        # We isolate the target cell associations.
+        # x, y, w, h are allready in the cell coordinate system meaning an x = 1.2 would be 1.2 times cellwidth
+        gxy = t[:, 2:4]
         gwh = t[:, 4:6]  # grid wh
-        gij = (gxy - offsets).long()
+        # Cast to int to get an cell index e.g. 1.2 gets associated to cell 1
+        gij = gxy.long()
+        # Isolate x and y index dimensions
         gi, gj = gij.T  # grid xy indices
 
-        # Append
-        a = t[:, 6].long()  # anchor indices
-        # image, anchor, grid indices
+        # Convert anchor indexes to int
+        a = t[:, 6].long()
+        # Add target tensors for this yolo layer to the output lists
+        # Add to index list and limit index range to prevent out of bounds
         indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))
+        # Add to target box list and convert box coordinates from global grid coordinates to local offsets in the grid cell
         tbox.append(torch.cat((gxy - gij, gwh), 1))  # box
-        anch.append(anchors[a])  # anchors
-        tcls.append(c)  # class
+        # Add correct anchor for each target to the list
+        anch.append(anchors[a])
+        # Add class for each target to the list
+        tcls.append(c)
 
     return tcls, tbox, indices, anch

From 9992bc86051ad37aaa8c5b0689c2fd1bca1d0043 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Wed, 4 Aug 2021 12:37:42 +0200
Subject: [PATCH 18/20] Use fix obj loss value

---
 yoeo/utils/loss.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/yoeo/utils/loss.py b/yoeo/utils/loss.py
index 63765f5..6f7aa72 100644
--- a/yoeo/utils/loss.py
+++ b/yoeo/utils/loss.py
@@ -106,7 +106,7 @@ def compute_loss(combined_predictions, combined_targets, model):
 
             # Classification of the objectness
             # Fill our empty object target tensor with the IoU we just calculated for each target at the targets position
-            tobj[b, anchor, grid_j, grid_i] = iou.detach().clamp(0).type(tobj.dtype)  # Use cells with iou > 0 as object targets
+            tobj[b, anchor, grid_j, grid_i] = 1#iou.detach().clamp(0).type(tobj.dtype)  # Use cells with iou > 0 as object targets
 
             # Classification of the class
             # Check if we need to do a classification (number of classes > 1)

From fae4780ff3e52a37328227706c9bad57f4ccc469 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Wed, 4 Aug 2021 12:38:46 +0200
Subject: [PATCH 19/20] Add rev 7 with custom anchors

---
 config/yoeo-rev-7-anchor.cfg | 337 +++++++++++++++++++++++++++++++++++
 1 file changed, 337 insertions(+)
 create mode 100644 config/yoeo-rev-7-anchor.cfg

diff --git a/config/yoeo-rev-7-anchor.cfg b/config/yoeo-rev-7-anchor.cfg
new file mode 100644
index 0000000..d7697cf
--- /dev/null
+++ b/config/yoeo-rev-7-anchor.cfg
@@ -0,0 +1,337 @@
+[net]
+# Testing
+#batch=1
+#subdivisions=1
+# Training
+batch=64
+subdivisions=8
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+burn_in=100
+max_batches = 4000
+policy=steps
+steps=50000,60000
+scales=.1,.1
+
+####
+# Like YOEO rev 2 but with deeper skip connections
+####
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=2
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers=-1
+groups=2
+group_id=1
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1,-2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -6,-1
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=39
+activation=linear
+
+[yolo]
+mask = 3,4,5
+anchors = 7, 11,  15,24,  25, 52,  48,65,  90,132, 88,237
+classes=8
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = -4
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[upsample]
+stride=2
+
+[route]
+layers = -1, 24
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=39
+activation=linear
+
+[yolo]
+mask = 0,1,2
+anchors = 7, 11,  15,24,  25, 52,  48,65,  90,132, 88,237
+classes=8
+classes=8
+num=6
+jitter=.3
+ignore_thresh = .7
+truth_thresh = 1
+random=1
+
+[route]
+layers = 18
+
+[upsample]
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1, 10
+
+[upsample]
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1, 2
+
+[upsample]
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[route]
+layers = -1, 0
+
+[upsample]
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=6
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[seg]
+classes=5

From 0a1d21eb723675994f92044f4bf43192ca64bd81 Mon Sep 17 00:00:00 2001
From: Florian Vahl <7vahl@informatik.uni-hamburg.de>
Date: Mon, 9 Aug 2021 13:52:12 +0200
Subject: [PATCH 20/20] Cityscapes using aug

---
 config/yoeo-rev-7.cfg       | 4 ++--
 yoeo/detect.py              | 2 +-
 yoeo/train.py               | 2 +-
 yoeo/utils/augmentations.py | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/config/yoeo-rev-7.cfg b/config/yoeo-rev-7.cfg
index 756a265..699d18c 100644
--- a/config/yoeo-rev-7.cfg
+++ b/config/yoeo-rev-7.cfg
@@ -19,8 +19,8 @@ learning_rate=0.001
 burn_in=100
 max_batches = 4000
 policy=steps
-steps=50000,60000
-scales=.1,.1
+steps=15000,30000
+scales=.2,.1
 
 ####
 # Like YOEO rev 2 but with deeper skip connections
diff --git a/yoeo/detect.py b/yoeo/detect.py
index fec1ad6..d6fcb29 100755
--- a/yoeo/detect.py
+++ b/yoeo/detect.py
@@ -239,7 +239,7 @@ def _draw_and_save_output_image(image_path, detections, seg, img_size, output_pa
     plt.gca().yaxis.set_major_locator(NullLocator())
     filename = os.path.basename(image_path).split(".")[0]
     output_path_1 = os.path.join(output_path, f"{filename}.png")
-    plt.savefig(output_path_1, bbox_inches="tight", pad_inches=0.0)
+    plt.savefig(output_path_1, bbox_inches="tight", pad_inches=0.5)
     plt.close()
 
 
diff --git a/yoeo/train.py b/yoeo/train.py
index 665d4e5..6301293 100755
--- a/yoeo/train.py
+++ b/yoeo/train.py
@@ -48,7 +48,7 @@ def _create_data_loader(img_path, batch_size, img_size, n_cpu, multiscale_traini
         img_path,
         img_size=img_size,
         multiscale=multiscale_training,
-        transform=DEFAULT_TRANSFORMS)
+        transform=AUGMENTATION_TRANSFORMS)
     dataloader = DataLoader(
         dataset,
         batch_size=batch_size,
diff --git a/yoeo/utils/augmentations.py b/yoeo/utils/augmentations.py
index c29df04..000e84a 100644
--- a/yoeo/utils/augmentations.py
+++ b/yoeo/utils/augmentations.py
@@ -1,6 +1,6 @@
 import imgaug.augmenters as iaa
 from torchvision import transforms
-from yoeo.utils.transforms import ToTensor, PadSquare, RelativeLabels, AbsoluteLabels, ImgAug
+from yoeo.utils.transforms import ToTensor, PadSquare, RelativeLabels, AbsoluteLabels, ImgAug, ResizeToSquare
 
 
 class DefaultAug(ImgAug):
@@ -29,7 +29,7 @@ def __init__(self, ):
 AUGMENTATION_TRANSFORMS = transforms.Compose([
     AbsoluteLabels(),
     DefaultAug(),
-    PadSquare(),
+    ResizeToSquare(),
     RelativeLabels(),
     ToTensor(),
 ])