swiss-territorial-data-lab
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/anthropogenic-activities/README.md‎
Lines changed: 2 additions & 2 deletions b/‎examples/anthropogenic-activities/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/anthropogenic-activities/config_trne.yaml‎
Lines changed: 6 additions & 6 deletions b/‎examples/anthropogenic-activities/config_trne.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎examples/anthropogenic-activities/data/20250924_emtpy_tiles.gpkg‎ renamed to ‎examples/anthropogenic-activities/data/empty_tiles.gpkg‎ b/‎examples/anthropogenic-activities/data/20250924_emtpy_tiles.gpkg‎ renamed to ‎examples/anthropogenic-activities/data/empty_tiles.gpkg‎
diff --git a/‎examples/anthropogenic-activities/data/sda_ground_truth_250410.gpkg‎ renamed to ‎examples/anthropogenic-activities/data/sda_ground_truth.gpkg‎ b/‎examples/anthropogenic-activities/data/sda_ground_truth_250410.gpkg‎ renamed to ‎examples/anthropogenic-activities/data/sda_ground_truth.gpkg‎
diff --git a/‎examples/anthropogenic-activities/merge_detections.py‎
Lines changed: 3 additions & 73 deletions b/‎examples/anthropogenic-activities/merge_detections.py‎
Lines changed: 3 additions & 73 deletions
diff --git a/‎examples/anthropogenic-activities/prepare_data.py‎
Lines changed: 5 additions & 24 deletions b/‎examples/anthropogenic-activities/prepare_data.py‎
Lines changed: 5 additions & 24 deletions
diff --git a/‎examples/mineral-extract-sites-detection/config_det.yaml‎
Lines changed: 1 addition & 2 deletions b/‎examples/mineral-extract-sites-detection/config_det.yaml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/mineral-extract-sites-detection/config_trne.yaml‎
Lines changed: 3 additions & 17 deletions b/‎examples/mineral-extract-sites-detection/config_trne.yaml‎
Lines changed: 3 additions & 17 deletions
@@ -236,7 +236,7 @@ train_model.py:
     model_zoo_checkpoint_url: <zoo model to start training from, e.g. "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml">
     init_model_weights: <True or False; if True, the model weights will be initialized to 0 (optional, defaults to False)>
   resume_training: <True or False; if True, the training is resumed from the final weights saved in the log folder. Defaults to False>
-  data_augmentation: <True or False; if True, apply random adjustment of brightness, contrast, saturation, lightning, and size, plus flip the image horizontally. Defaults to False>
+  data_augmentation: <True or False; if True, apply random adjustment of brightness, contrast, saturation, lighting, size, and flip the image horizontally. Defaults to False>
 ```
 
 Detectron2's configuration files are provided in the example folders mentioned here-below. We warn the end-user about the fact that, **for the time being, no hyperparameters tuning is automatically performed**.
 
@@ -7,10 +7,10 @@ It consists of the following elements:
 * input data
 * scripts for data preparation and the first step of post-processing
 
-The full project is available is its [own repository](https://github.com/swiss-territorial-data-lab/proj-sda).
+The full project is available in its [own repository](https://github.com/swiss-territorial-data-lab/proj-sda).
 
 
-The **installation** can be carried out by following the instructions [here](../../README.md). When using Docker, the container must be launched from this repository root folder before running the workflow:
+The **installation** can be carried out by following the instructions [here](../../README.md). If used, the Docker container must be launched from the root folder of this repository before running the workflow:
 
 ```bash
 $ sudo chown -R 65534:65534 examples
 
@@ -1,11 +1,11 @@
 # Produce tile geometries based on the AoI extent and zoom level
 prepare_data.py:  
   datasets:
-    shapefile: data/sda_ground_truth_250410.gpkg           # GT labels
+    shapefile: data/sda_ground_truth.gpkg           # GT labels
     fp_shapefile: data/FP_labels.gpkg               # FP labels
     # empty_tiles_aoi: data/AoI/<AOI_SHPFILE>           # AOI in which additional empty tiles can be selected. Only one 'empty_tiles' option can be selected  
     # empty_tiles_year: 2023                              # If "empty_tiles_aoi" selected then provide a year. Choice: (1) numeric (i.e. 2020), (2) [year1, year2] (random selection of a year within a given year range) 
-    empty_tiles_shp: data/20250924_emtpy_tiles.gpkg    # Provided shapefile of selected empty tiles. Only one 'empty_tiles' option can be selected                     
+    empty_tiles_shp: data/empty_tiles.gpkg    # Provided shapefile of selected empty tiles. Only one 'empty_tiles' option can be selected                     
     category_field: Classe
   output_folder: output/trne/
   zoom_level: 16 
@@ -18,10 +18,10 @@ generate_tilesets.py:
   working_directory: .
   output_folder: output/trne/
   datasets:
-    aoi_tiles: output/trne/tiles.geojson
-    ground_truth_labels: output/trne/labels.geojson
+    aoi_tiles: output/trne/tiles.gpkg
+    ground_truth_labels: output/trne/labels.gpkg
     add_fp_labels:             # Uncomment if FP shapefile exists in prepare_data.py     
-      fp_labels: output/trne/FP.geojson
+      fp_labels: output/trne/FP_labels.gpkg
       frac_trn: 0.7        # fraction of fp tiles to add to the trn dataset, then the remaining tiles will be split in 2 and added to tst and val datasets
     image_source:
       type: XYZ                             # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
@@ -82,7 +82,7 @@ assess_detections.py:
   working_directory: output/trne
   output_folder: assessment
   datasets:
-    ground_truth_labels: labels.geojson
+    ground_truth_labels: labels.gpkg
     split_aoi_tiles: split_aoi_tiles.geojson # aoi = Area of Interest
     categories: category_ids.json
     detections:
 
@@ -9,7 +9,7 @@
 import pandas as pd
 
 sys.path.insert(1, '../..')
-from helpers.functions_for_examples import get_categories
+from helpers.functions_for_examples import get_categories, merge_adjacent_detections, read_dets_and_aoi
 import helpers.misc as misc
 
 from loguru import logger
@@ -62,87 +62,17 @@
         logger.success(f"Done! All files already exist in folder {OUTPUT_DIR}. Exiting.")
         sys.exit(0)
 
-    logger.info("Loading split AoI tiles as a GeoPandas DataFrame...")
-    tiles_gdf = gpd.read_file('split_aoi_tiles.geojson')
-    tiles_gdf = tiles_gdf.to_crs(2056)
-    if 'year_tile' in tiles_gdf.keys(): 
-        tiles_gdf['year_tile'] = tiles_gdf.year_tile.astype(int)
-    logger.success(f"Done! {len(tiles_gdf)} features were found.")
-
-    logger.info("Loading detections as a GeoPandas DataFrame...")
-
-    detections_gdf = gpd.GeoDataFrame()
-
-    for dataset, dets_file in DETECTION_FILES.items():
-        detections_ds_gdf = gpd.read_file(dets_file)    
-        detections_ds_gdf[f'dataset'] = dataset
-        detections_gdf = pd.concat([detections_gdf, detections_ds_gdf], axis=0, ignore_index=True)
-    detections_gdf = detections_gdf.to_crs(2056)
-    detections_gdf['area'] = detections_gdf.area 
-    detections_gdf['det_id'] = detections_gdf.index
-    if 'year_det' in detections_gdf.keys(): 
-        detections_gdf['year_det'] = detections_gdf.year_det.astype(int)
-    logger.success(f"Done! {len(detections_gdf)} features were found.")
+    tiles_gdf, detections_gdf = read_dets_and_aoi(DETECTION_FILES)
 
     # Merge features
     logger.info(f"Merge adjacent polygons overlapping tiles with a buffer of {DISTANCE} m...")
     detections_all_years_gdf = gpd.GeoDataFrame()
 
     # Process detection by year
     for year in detections_gdf.year_det.unique():
-        detections_by_year_gdf = detections_gdf[detections_gdf['year_det']==year]
-
-        detections_buffer_gdf = detections_by_year_gdf.copy()
-        detections_buffer_gdf['geometry'] = detections_by_year_gdf.geometry.buffer(DISTANCE, resolution=2)
-
-        # Saves the id of polygons contained entirely within the tile (no merging with adjacent tiles), to avoid merging them if they are at a distance of less than thd  
-        detections_tiles_join_gdf = gpd.sjoin(tiles_gdf, detections_buffer_gdf, how='left', predicate='contains')
-        remove_det_list = detections_tiles_join_gdf.det_id.unique().tolist()
-
-        detections_within_tiles_gdf = detections_by_year_gdf[detections_by_year_gdf.det_id.isin(remove_det_list)].drop_duplicates(subset=['det_id'], ignore_index=True)
-        detections_overlap_tiles_gdf = detections_by_year_gdf[~detections_by_year_gdf.det_id.isin(remove_det_list)].drop_duplicates(subset=['det_id'], ignore_index=True)
-
-        # Merge polygons within the thd distance
-        detections_overlap_tiles_gdf.loc[:, 'geometry'] = detections_overlap_tiles_gdf.buffer(DISTANCE, resolution=2)
-        detections_dissolve_gdf = detections_overlap_tiles_gdf[['det_id', 'geometry']].dissolve(as_index=False)
-        detections_merge_gdf = detections_dissolve_gdf.explode(ignore_index=True)
-        del detections_dissolve_gdf, detections_overlap_tiles_gdf
-
-        if detections_merge_gdf.isnull().values.any():
-            detections_merge_gdf = gpd.GeoDataFrame()
-        else:
-            detections_merge_gdf.geometry = detections_merge_gdf.buffer(-DISTANCE, resolution=2)
-    
-        # Spatially join merged detection with raw ones to retrieve relevant information (score, area,...)
-        detections_merge_gdf['index_merge'] = detections_merge_gdf.index
-        detections_join_gdf = gpd.sjoin(detections_merge_gdf, detections_by_year_gdf, how='inner', predicate='intersects')
-
-        det_class_all = []
-        det_score_all = []
-
-        for id in detections_merge_gdf.index_merge.unique():
-            detections_by_year_gdf = detections_join_gdf.copy()
-            detections_by_year_gdf = detections_by_year_gdf[(detections_by_year_gdf['index_merge']==id)]
-            detections_by_year_gdf.rename(columns={'score_left': 'score'}, inplace=True)
-            det_score_all.append(detections_by_year_gdf['score'].mean())
-            detections_by_year_gdf = detections_by_year_gdf.dissolve(by='det_class', aggfunc='sum', as_index=False)
-            # Keep class of largest det
-            if len(detections_by_year_gdf) > 0:
-                detections_by_year_gdf['det_class'] = detections_by_year_gdf.loc[detections_by_year_gdf['area'] == detections_by_year_gdf['area'].max(), 
-                                                                'det_class'].iloc[0]    
-                det_class = detections_by_year_gdf['det_class'].drop_duplicates().tolist()
-            else:
-                det_class = [0]
-            det_class_all.append(det_class[0])
-
-        detections_merge_gdf['det_class'] = det_class_all
-        detections_merge_gdf['score'] = det_score_all
-
-        complete_merge_dets_gdf = pd.merge(detections_merge_gdf, detections_join_gdf[['index_merge', 'year_det'] + ([] if 'dataset' in detections_merge_gdf.columns else ['dataset'])], on='index_merge')
+        complete_merge_dets_gdf, detections_within_tiles_gdf = merge_adjacent_detections(detections_gdf, tiles_gdf, year, DISTANCE)
         detections_all_years_gdf = pd.concat([detections_all_years_gdf, complete_merge_dets_gdf, detections_within_tiles_gdf], ignore_index=True)
 
-        del complete_merge_dets_gdf, detections_merge_gdf, detections_by_year_gdf, detections_within_tiles_gdf, detections_join_gdf
-
     # get classe ids
     CATEGORIES = os.path.join('category_ids.json')
     categories_info_df, _ = get_categories(CATEGORIES)
 
@@ -59,32 +59,13 @@
     if not os.path.exists(OUTPUT_DIR):
         os.makedirs(OUTPUT_DIR)
 
-    written_files = []
-    
-    gt_labels_4326_gdf = ffe.prepare_labels(SHPFILE, CATEGORY, supercategory=SUPERCATEGORY)
-    
-    label_filepath = os.path.join(OUTPUT_DIR, 'labels.geojson')
-    gt_labels_4326_gdf.to_file(label_filepath, driver='GeoJSON')
-    written_files.append(label_filepath)  
-    logger.success(f"Done! A file was written: {label_filepath}")
+    gt_labels_4326_gdf, written_files = ffe.prepare_labels(SHPFILE, CATEGORY, supercategory=SUPERCATEGORY, output_dir=OUTPUT_DIR)
 
-    tiles_4326_all_gdf, tmp_written_files = ffe.format_all_tiles(
-        FP_SHPFILE, os.path.join(OUTPUT_DIR, 'FP.geojson'), EPT_SHPFILE, ept_data_type=EPT, ept_year=EPT_YEAR, labels_4326_gdf=gt_labels_4326_gdf,
-        category=CATEGORY, supercategory=SUPERCATEGORY, zoom_level=ZOOM_LEVEL
+    _, tmp_written_files = ffe.format_all_tiles(
+        FP_SHPFILE, EPT_SHPFILE, ept_data_type=EPT, ept_year=EPT_YEAR, labels_4326_gdf=gt_labels_4326_gdf,
+        category=CATEGORY, supercategory=SUPERCATEGORY, zoom_level=ZOOM_LEVEL, output_dir=OUTPUT_DIR
     )
-
-    # Save tile shapefile
-    tile_filepath = os.path.join(OUTPUT_DIR, 'tiles.geojson')
-    if tiles_4326_all_gdf.empty:
-        logger.warning('No tile generated for the designated area.')
-        tile_filepath = os.path.join(OUTPUT_DIR, 'area_without_tiles.gpkg')
-        gt_labels_4326_gdf.to_file(tile_filepath)
-        written_files.append(tile_filepath)  
-    else:
-        logger.info("Export tiles to GeoJSON (EPSG:4326)...") 
-        tiles_4326_all_gdf.to_file(tile_filepath, driver='GeoJSON')
-        written_files.append(tile_filepath)  
-        logger.success(f"Done! A file was written: {tile_filepath}")
+    written_files.extend(tmp_written_files)
 
     print()
     logger.info("The following files were written. Let's check them out!")
 
@@ -16,7 +16,7 @@ generate_tilesets.py:
     nb_tiles_max: 5000
   working_directory: ./output/
   datasets:
-    aoi_tiles: det/tiles.geojson
+    aoi_tiles: det/tiles.gpkg
     image_source:
       type: XYZ                             # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
       year: 2020                            # supported values: 1. multi-year (tiles of different year), 2. <year> (i.e. 2020)
@@ -57,7 +57,6 @@ make_detections.py:
 # Assess the final results
 merge_detections.py:
   working_directory: ./output/det/
-  labels: labels.geojson
   detections:
     oth: oth_detections_at_0dot3_threshold.gpkg
   distance: 10 # m, distance use as a buffer to merge close polygons (likely to belong to the same object) together
 
@@ -6,12 +6,6 @@ prepare_data.py:
   srs: EPSG:2056
   datasets:
     shapefile: ./data/labels/mes_swisstlm3d_swissimage2020.shp
-    # fp_shapefile: ./data/FP/[FP_SHPFILE]                # FP labels. Optional: can contain a 'year' column
-    # empty_tiles:
-    #   type: shp                                         # supported values: 1. aoi (area in which tiles will be selected randomly) 2. shp (provided empty tiles). Adapt the following keys accordingly
-    #   shapefile: ./data/AoI/[EPT_SHPFILE]              # shapefile in which additional empty tiles can be selected. 
-    #   year: 2020                                        # if type = aoi selected, then provide a year, otherwise comment line. Supported value: (1) numeric (i.e. 2020), (2) [year1, year2] (random selection of a year within a given year range)                     
-    # category: [CLASS_COL_NAME]                          # if it exists, indicate the attribute column name of the label class
   output_folder: ./output/trne/
   zoom_level: 16  # z, keep between 15 and 18  
 
@@ -22,19 +16,12 @@ generate_tilesets.py:
     nb_tiles_max: 2000
   working_directory: ./output/
   datasets:
-    aoi_tiles: trne/tiles.geojson
-    ground_truth_labels: trne/labels.geojson
-    # fp_labels:
-    #   fp_shp: trne/FP.geojson 
-    #   frac_trn: 0.7        # fraction of fp tiles to add to the trn dataset, then the remaining tiles will be split in 2 and added to tst and val datasets                          
+    aoi_tiles: trne/tiles.gpkg
+    ground_truth_labels: trne/labels.gpkg                        
     image_source:
       type: XYZ                             # supported values: 1. MIL = Map Image Layer 2. WMS 3. XYZ 4. FOLDER
       year: 2020                   # supported values: 1. multi-year (tiles of different year), 2. <year> (i.e. 2020)
       location: https://wmts.geo.admin.ch/1.0.0/ch.swisstopo.swissimage-product/default/{year}/3857/{z}/{x}/{y}.jpeg
-  # empty_tiles:            # add empty tiles to datasets
-  #   tiles_frac: 0.5       # fraction (relative to the number of tiles intersecting labels) of empty tiles to add
-  #   frac_trn: 0.7         # fraction of empty tiles to add to the trn dataset, then the remaining tiles will be split in 2 and added to tst and val datasets
-  #   keep_oth_tiles: False # keep tiles in oth dataset not intersecting oth labels
   output_folder: trne/
   tile_size: 256          # per side, in pixels
   overwrite: True
@@ -86,7 +73,7 @@ make_detections.py:
 assess_detections.py:
   working_directory: ./output/trne/
   datasets:
-    ground_truth_labels: labels.geojson
+    ground_truth_labels: labels.gpkg
     image_metadata_json: img_metadata.json
     split_aoi_tiles: split_aoi_tiles.geojson # aoi = Area of Interest
     categories: category_ids.json
@@ -102,7 +89,6 @@ assess_detections.py:
 # Assess the final results
 merge_detections.py:
   working_directory: ./output/trne/
-  labels: labels.geojson
   detections:
     trn: trn_detections_at_0dot05_threshold.gpkg
     val: val_detections_at_0dot05_threshold.gpkg