From 2b49526d073faa7d70484d79206c2df85a66c93e Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Fri, 19 Dec 2025 14:44:06 +0100
Subject: [PATCH 01/70] Simplified way of implementing fields

---
 src/verification/__init__.py          | 6 +++++-
 workflow/scripts/verif_single_init.py | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index 5f7c7a00..f0df171f 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -203,7 +203,11 @@ def verify(
 
     scores = _merge_metrics(scores)
     statistics = _merge_metrics(statistics)
-    out = xr.merge([scores, statistics], join="outer", compat="no_conflicts")
+    out = xr.merge(
+        [scores, statistics, fcst_aligned - obs_aligned],
+        join="outer",
+        compat="no_conflicts",
+    )
     LOG.info("Computed metrics in %.2f seconds", time.time() - start)
     LOG.info("Metrics dataset: \n%s", out)
     return out
diff --git a/workflow/scripts/verif_single_init.py b/workflow/scripts/verif_single_init.py
index 57b1e2c4..52d9e08f 100644
--- a/workflow/scripts/verif_single_init.py
+++ b/workflow/scripts/verif_single_init.py
@@ -114,6 +114,7 @@ def main(args: ScriptConfig):
     # compute metrics and statistics
 
     results = verify(fcst, analysis, args.label, args.analysis_label, args.regions)
+    LOG.info("Verification results:\n%s", results)
 
     # save results to NetCDF
     args.output.parent.mkdir(parents=True, exist_ok=True)

From a456b1d5e3806f58178f83c74f9263a05589ca9e Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Fri, 19 Dec 2025 15:56:16 +0100
Subject: [PATCH 02/70] Exclude spatial data from being plotted and included in
 dashboard

---
 workflow/scripts/report_experiment_dashboard.py | 2 +-
 workflow/scripts/verif_plot_metrics.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/scripts/report_experiment_dashboard.py b/workflow/scripts/report_experiment_dashboard.py
index 327547b0..a2abe766 100644
--- a/workflow/scripts/report_experiment_dashboard.py
+++ b/workflow/scripts/report_experiment_dashboard.py
@@ -39,7 +39,7 @@ def main(args):
     LOG.info("Loaded verification netcdf: \n%s", ds)
 
     # extract only  non-spatial variables to pd.DataFrame
-    nonspatial_vars = [d for d in ds.data_vars if "spatial" not in d]
+    nonspatial_vars = [d for d in ds.data_vars if "x" not in ds[d].dims]
     df = ds[nonspatial_vars].to_array("stack").to_dataframe(name="value").reset_index()
     df[["param", "metric"]] = df["stack"].str.split(".", n=1, expand=True)
     df.drop(columns=["stack"], inplace=True)
diff --git a/workflow/scripts/verif_plot_metrics.py b/workflow/scripts/verif_plot_metrics.py
index a5a50bba..ae6d9d66 100644
--- a/workflow/scripts/verif_plot_metrics.py
+++ b/workflow/scripts/verif_plot_metrics.py
@@ -84,7 +84,7 @@ def main(args: Namespace) -> None:
     ds = xr.concat(dfs, dim="source", join="outer")
 
     # extract only  non-spatial variables to pd.DataFrame
-    nonspatial_vars = [d for d in ds.data_vars if "spatial" not in d]
+    nonspatial_vars = [d for d in ds.data_vars if "x" not in ds[d].dims]
     all_df = (
         ds[nonspatial_vars].to_array("stack").to_dataframe(name="value").reset_index()
     )

From 0ec286f4b24fe81f226bbd5ac9c56273ec6bceb8 Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Wed, 7 Jan 2026 11:28:55 +0100
Subject: [PATCH 03/70] delete intermeidate verification files

---
 workflow/rules/verif.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index cba4a307..872f43eb 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -27,7 +27,7 @@ rule verif_metrics_baseline:
         analysis_label=config["analysis"].get("label"),
         regions=REGION_TXT,
     output:
-        OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc",
+        temp(OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc"),
     log:
         OUT_ROOT / "logs/verif_metrics_baseline/{baseline_id}-{init_time}.log",
     resources:
@@ -76,7 +76,7 @@ rule verif_metrics:
             Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib"
         ).resolve(),
     log:
-        OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log",
+        temp(OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log"),
     resources:
         cpus_per_task=24,
         mem_mb=50_000,

From 28692d6c6adf890ab88554f6832d9ba9ded457c0 Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Wed, 7 Jan 2026 12:02:58 +0100
Subject: [PATCH 04/70] Fix typo

---
 workflow/rules/verif.smk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 872f43eb..0566ccf2 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -63,7 +63,7 @@ rule verif_metrics:
         inference_okfile=rules.execute_inference.output.okfile,
         analysis_zarr=config["analysis"].get("analysis_zarr"),
     output:
-        OUT_ROOT / "data/runs/{run_id}/{init_time}/verif.nc",
+        temp(OUT_ROOT / "data/runs/{run_id}/{init_time}/verif.nc"),
     # wildcard_constraints:
     # run_id="^" # to avoid ambiguitiy with run_baseline_verif
     # TODO: implement logic to use experiment name instead of run_id as wildcard
@@ -76,7 +76,7 @@ rule verif_metrics:
             Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib"
         ).resolve(),
     log:
-        temp(OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log"),
+        OUT_ROOT / "logs/verif_metrics/{run_id}-{init_time}.log",
     resources:
         cpus_per_task=24,
         mem_mb=50_000,

From f3dcf0db6b5b53f6a2190590182a380deca0ab14 Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Wed, 7 Jan 2026 12:04:04 +0100
Subject: [PATCH 05/70] include score components for maps

---
 src/verification/__init__.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index f0df171f..51e483a4 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -198,13 +198,24 @@ def verify(
         score = xr.concat(score, dim="region")
         fcst_statistics = xr.concat(fcst_statistics, dim="region")
         obs_statistics = xr.concat(obs_statistics, dim="region")
+        score_spatial = _compute_scores(
+            fcst_aligned[param],
+            obs_aligned[param],
+            prefix=param + ".",
+            suffix=".spatial",
+            dim=[],
+        )
         statistics.append(xr.concat([fcst_statistics, obs_statistics], dim="source"))
-        scores.append(score)
+        scores.append(
+            xr.merge([score, score_spatial], join="outer", compat="no_conflicts")
+        )
 
     scores = _merge_metrics(scores)
     statistics = _merge_metrics(statistics)
+    LOG.info("Computed scores dataset: \n%s", scores)
+    LOG.info("Computed statistics dataset: \n%s", statistics)
     out = xr.merge(
-        [scores, statistics, fcst_aligned - obs_aligned],
+        [scores, statistics],
         join="outer",
         compat="no_conflicts",
     )

From 99dac523cdb71355c670088d4d15f72a1f8074bb Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Wed, 7 Jan 2026 17:09:53 +0100
Subject: [PATCH 06/70] Revert "Exclude spatial data from being plotted and
 included in dashboard"

This reverts commit cdefa16a7f0cc8d9721b7598e49a88800ab61d23.
---
 workflow/scripts/report_experiment_dashboard.py | 2 +-
 workflow/scripts/verif_plot_metrics.py          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/scripts/report_experiment_dashboard.py b/workflow/scripts/report_experiment_dashboard.py
index a2abe766..327547b0 100644
--- a/workflow/scripts/report_experiment_dashboard.py
+++ b/workflow/scripts/report_experiment_dashboard.py
@@ -39,7 +39,7 @@ def main(args):
     LOG.info("Loaded verification netcdf: \n%s", ds)
 
     # extract only  non-spatial variables to pd.DataFrame
-    nonspatial_vars = [d for d in ds.data_vars if "x" not in ds[d].dims]
+    nonspatial_vars = [d for d in ds.data_vars if "spatial" not in d]
     df = ds[nonspatial_vars].to_array("stack").to_dataframe(name="value").reset_index()
     df[["param", "metric"]] = df["stack"].str.split(".", n=1, expand=True)
     df.drop(columns=["stack"], inplace=True)
diff --git a/workflow/scripts/verif_plot_metrics.py b/workflow/scripts/verif_plot_metrics.py
index ae6d9d66..a5a50bba 100644
--- a/workflow/scripts/verif_plot_metrics.py
+++ b/workflow/scripts/verif_plot_metrics.py
@@ -84,7 +84,7 @@ def main(args: Namespace) -> None:
     ds = xr.concat(dfs, dim="source", join="outer")
 
     # extract only  non-spatial variables to pd.DataFrame
-    nonspatial_vars = [d for d in ds.data_vars if "x" not in ds[d].dims]
+    nonspatial_vars = [d for d in ds.data_vars if "spatial" not in d]
     all_df = (
         ds[nonspatial_vars].to_array("stack").to_dataframe(name="value").reset_index()
     )

From edcca5b6caff4cd470700c170ee3fe661e77fcaf Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Wed, 7 Jan 2026 20:17:29 +0100
Subject: [PATCH 07/70] remove source dimension from scores

---
 src/verification/__init__.py | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index 51e483a4..e9951b8a 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -84,17 +84,26 @@ def _compute_scores(
     """
     dim = ["x", "y"] if "x" in fcst.dims and "y" in fcst.dims else ["values"]
     error = fcst - obs
-    scores = xr.Dataset(
-        {
-            f"{prefix}BIAS{suffix}": error.mean(dim=dim, skipna=True),
-            f"{prefix}MSE{suffix}": (error**2).mean(dim=dim, skipna=True),
-            f"{prefix}MAE{suffix}": abs(error).mean(dim=dim, skipna=True),
-            f"{prefix}VAR{suffix}": error.var(dim=dim, skipna=True),
-            f"{prefix}CORR{suffix}": xr.corr(fcst, obs, dim=dim),
-            f"{prefix}R2{suffix}": xr.corr(fcst, obs, dim=dim) ** 2,
-        }
-    )
-    scores = scores.expand_dims({"source": [source]})
+    if dim == []:
+        scores = xr.Dataset(
+            {
+                f"{prefix}BIAS{suffix}": error,
+                f"{prefix}MSE{suffix}": (error**2),
+                f"{prefix}MAE{suffix}": abs(error),
+            }
+        )
+    else:
+        scores = xr.Dataset(
+            {
+                f"{prefix}BIAS{suffix}": error.mean(dim=dim, skipna=True),
+                f"{prefix}MSE{suffix}": (error**2).mean(dim=dim, skipna=True),
+                f"{prefix}MAE{suffix}": abs(error).mean(dim=dim, skipna=True),
+                f"{prefix}VAR{suffix}": error.var(dim=dim, skipna=True),
+                f"{prefix}CORR{suffix}": xr.corr(fcst, obs, dim=dim),
+                f"{prefix}R2{suffix}": xr.corr(fcst, obs, dim=dim) ** 2,
+            }
+        )
+    # scores = scores.expand_dims({"source": [source]})
     return scores
 
 
@@ -212,8 +221,6 @@ def verify(
 
     scores = _merge_metrics(scores)
     statistics = _merge_metrics(statistics)
-    LOG.info("Computed scores dataset: \n%s", scores)
-    LOG.info("Computed statistics dataset: \n%s", statistics)
     out = xr.merge(
         [scores, statistics],
         join="outer",

From 0ec8a0f926bd4b641c1436aa4793e1c8592da515 Mon Sep 17 00:00:00 2001
From: Jonas Bhend <jonas.bhend@meteoswiss.ch>
Date: Thu, 8 Jan 2026 08:40:39 +0100
Subject: [PATCH 08/70] clean up

---
 src/verification/__init__.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index e9951b8a..8f05349c 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -221,11 +221,7 @@ def verify(
 
     scores = _merge_metrics(scores)
     statistics = _merge_metrics(statistics)
-    out = xr.merge(
-        [scores, statistics],
-        join="outer",
-        compat="no_conflicts",
-    )
+    out = xr.merge([scores, statistics], join="outer", compat="no_conflicts")
     LOG.info("Computed metrics in %.2f seconds", time.time() - start)
     LOG.info("Metrics dataset: \n%s", out)
     return out

From 51f22a6e44cf5f2660a7813cba1bbf73e7ab4614 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 12 Jan 2026 16:28:01 +0100
Subject: [PATCH 09/70] New rule and plotting file for plotting maps of summary
 statistics. (No changes to code yet.)

---
 workflow/rules/plot.smk                       |  32 +++
 workflow/scripts/plot_summary_stat_maps.mo.py | 239 ++++++++++++++++++
 2 files changed, 271 insertions(+)
 create mode 100644 workflow/scripts/plot_summary_stat_maps.mo.py

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index d4def1ba..7fbd3320 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -67,3 +67,35 @@ rule make_forecast_animation:
         """
         convert -delay {params.delay} -loop 0 {input} {output}
         """
+
+
+rule plot_summary_stat_maps:
+    input:
+        script="workflow/scripts/plot_forecast_frame.mo.py",
+        inference_okfile=rules.execute_inference.output.okfile,
+    output:
+        temp(
+            OUT_ROOT
+            / "showcases/{run_id}/{init_time}/frames/{init_time}_{leadtime}_{param}_{region}.png"
+        ),
+    wildcard_constraints:
+        leadtime=r"\d+",  # only digits
+    resources:
+        slurm_partition="postproc",
+        cpus_per_task=1,
+        runtime="10m",
+    params:
+        grib_out_dir=lambda wc: (
+            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib"
+        ).resolve(),
+    shell:
+        """
+        export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
+        python {input.script} \
+            --input {params.grib_out_dir}  --date {wildcards.init_time} --outfn {output[0]} \
+            --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region} \
+        # interactive editing (needs to set localrule: True and use only one core)
+        # marimo edit {input.script} -- \
+        #     --input {params.grib_out_dir}  --date {wildcards.init_time} --outfn {output[0]}\
+        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region}\
+        """
\ No newline at end of file
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
new file mode 100644
index 00000000..b8592141
--- /dev/null
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -0,0 +1,239 @@
+import marimo
+
+__generated_with = "0.16.5"
+app = marimo.App(width="medium")
+
+
+@app.cell
+def _():
+    import logging
+    from argparse import ArgumentParser
+    from pathlib import Path
+
+    import cartopy.crs as ccrs
+    import earthkit.plots as ekp
+    import numpy as np
+
+    from plotting import DOMAINS
+    from plotting import StatePlotter
+    from plotting.colormap_defaults import CMAP_DEFAULTS
+    from plotting.compat import load_state_from_grib
+
+    return (
+        ArgumentParser,
+        CMAP_DEFAULTS,
+        Path,
+        StatePlotter,
+        ekp,
+        load_state_from_grib,
+        logging,
+        np,
+        DOMAINS,
+        ccrs,
+    )
+
+
+@app.cell
+def _(logging):
+    LOG = logging.getLogger(__name__)
+    LOG_FMT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    logging.basicConfig(level=logging.INFO, format=LOG_FMT)
+    return (LOG,)
+
+
+@app.cell
+def _(ArgumentParser, Path):
+    parser = ArgumentParser()
+
+    parser.add_argument(
+        "--input", type=str, default=None, help="Directory to grib data"
+    )
+    parser.add_argument("--date", type=str, default=None, help="reference datetime")
+    parser.add_argument("--outfn", type=str, help="output filename")
+    parser.add_argument("--leadtime", type=str, help="leadtime")
+    parser.add_argument("--param", type=str, help="parameter")
+    parser.add_argument("--region", type=str, help="name of region")
+
+    args = parser.parse_args()
+    grib_dir = Path(args.input)
+    init_time = args.date
+    outfn = Path(args.outfn)
+    lead_time = args.leadtime
+    param = args.param
+    region = args.region
+    return (
+        args,
+        grib_dir,
+        init_time,
+        lead_time,
+        outfn,
+        param,
+        region,
+    )
+
+
+@app.cell
+def _(grib_dir, init_time, lead_time, load_state_from_grib, param):
+    # load grib file
+    grib_file = grib_dir / f"{init_time}_{lead_time}.grib"
+    if param == "SP_10M":
+        paramlist = ["U_10M", "V_10M"]
+    elif param == "SP":
+        paramlist = ["U", "V"]
+    else:
+        paramlist = [param]
+    state = load_state_from_grib(grib_file, paramlist=paramlist)
+    return (state,)
+
+
+@app.cell
+def _(CMAP_DEFAULTS, ekp):
+    def get_style(param, units_override=None):
+        """Get style and colormap settings for the plot.
+        Needed because cmap/norm does not work in Style(colors=cmap),
+        still needs to be passed as arguments to tripcolor()/tricontourf().
+        """
+        cfg = CMAP_DEFAULTS[param]
+        units = units_override if units_override is not None else cfg.get("units", "")
+        return {
+            "style": ekp.styles.Style(
+                levels=cfg.get("bounds", cfg.get("levels", None)),
+                extend="both",
+                units=units,
+                colors=cfg.get("colors", None),
+            ),
+            "norm": cfg.get("norm", None),
+            "cmap": cfg.get("cmap", None),
+            "levels": cfg.get("levels", None),
+            "vmin": cfg.get("vmin", None),
+            "vmax": cfg.get("vmax", None),
+            "colors": cfg.get("colors", None),
+        }
+
+    return (get_style,)
+
+
+@app.cell
+def _(LOG, np):
+    """Preprocess fields with pint-based unit conversion and derived quantities."""
+    try:
+        import pint  # type: ignore
+
+        _ureg = pint.UnitRegistry()
+
+        def _k_to_c(arr):
+            # robust conversion with pint, fallback if dtype unsupported
+            try:
+                return (_ureg.Quantity(arr, _ureg.kelvin).to(_ureg.degC)).magnitude
+            except Exception:
+                return arr - 273.15
+
+        def _ms_to_knots(arr):
+            # robust conversion with pint, fallback if dtype unsupported
+            try:
+                return (
+                    _ureg.Quantity(arr, _ureg.meter / _ureg.second).to(_ureg.knot)
+                ).magnitude
+            except Exception:
+                return arr * 1.943844
+
+        def _m_to_mm(arr):
+            # robust conversion with pint, fallback if dtype unsupported
+            try:
+                return (_ureg.Quantity(arr, _ureg.meter).to(_ureg.millimeter)).magnitude
+            except Exception:
+                return arr * 1000
+
+    except Exception:
+        LOG.warning("pint not available; falling back hardcoded conversions")
+
+        def _k_to_c(arr):
+            return arr - 273.15
+
+        def _ms_to_knots(arr):
+            return arr * 1.943844
+
+        def _m_to_mm(arr):
+            return arr * 1000
+
+    def preprocess_field(param: str, state: dict):
+        """
+        - Temperatures: K -> °C
+        - Wind speed: sqrt(u^2 + v^2)
+        - Precipitation: m -> mm
+        Returns: (field_array, units_override or None)
+        """
+        fields = state["fields"]
+        # temperature variables
+        if param in ("T_2M", "TD_2M", "T", "TD"):
+            return _k_to_c(fields[param]), "°C"
+        # 10m wind speed (allow legacy 'uv' alias)
+        if param == "SP_10M":
+            u = fields["U_10M"]
+            v = fields["V_10M"]
+            return np.sqrt(u**2 + v**2), "m/s"
+        # wind speed from standard-level components
+        if param == "SP":
+            u = fields["U"]
+            v = fields["V"]
+            return np.sqrt(u**2 + v**2), "m/s"
+        if param == "TOT_PREC":
+            return _m_to_mm(fields[param]), "mm"
+        # default: passthrough
+        return fields[param], None
+
+    return (preprocess_field,)
+
+
+@app.cell
+def _(
+    LOG,
+    StatePlotter,
+    args,
+    get_style,
+    outfn,
+    param,
+    preprocess_field,
+    region,
+    state,
+    DOMAINS,
+    ccrs,
+):
+    # plot individual fields
+    plotter = StatePlotter(
+        state["longitudes"],
+        state["latitudes"],
+        outfn.parent,
+    )
+    fig = plotter.init_geoaxes(
+        nrows=1,
+        ncols=1,
+        projection=DOMAINS[region]["projection"],
+        bbox=DOMAINS[region]["extent"],
+        name=region,
+        size=(6, 6),
+    )
+    subplot = fig.add_map(row=0, column=0)
+
+    # preprocess field (unit conversion, derived quantities)
+    field, units_override = preprocess_field(param, state)
+
+    plotter.plot_field(subplot, field, **get_style(args.param, units_override))
+    subplot.ax.add_geometries(
+        state["lam_envelope"],
+        edgecolor="black",
+        facecolor="none",
+        crs=ccrs.PlateCarree(),
+    )
+    validtime = state["valid_time"].strftime("%Y%m%d%H%M")
+    # leadtime = int(state["lead_time"].total_seconds() // 3600)
+
+    fig.title(f"{param}, time: {validtime}")
+
+    fig.save(outfn, bbox_inches="tight", dpi=200)
+    LOG.info(f"saved: {outfn}")
+    return
+
+
+if __name__ == "__main__":
+    app.run()

From 366249d5563caa9dc1d795d25ee84f65b35fe952 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 12 Jan 2026 17:01:42 +0100
Subject: [PATCH 10/70] Obvious changes to the new plotting rule for maps of
 summary statistics.

---
 workflow/rules/plot.smk | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 7fbd3320..fee1dfe3 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -71,13 +71,10 @@ rule make_forecast_animation:
 
 rule plot_summary_stat_maps:
     input:
-        script="workflow/scripts/plot_forecast_frame.mo.py",
+        script="workflow/scripts/plot_summary_stat_maps.mo.py",
         inference_okfile=rules.execute_inference.output.okfile,
     output:
-        temp(
-            OUT_ROOT
-            / "showcases/{run_id}/{init_time}/frames/{init_time}_{leadtime}_{param}_{region}.png"
-        ),
+        OUT_ROOT / "results/summary_stats/maps/{run_id}/{leadtime}/{metric}_{param}_{region}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
@@ -85,9 +82,7 @@ rule plot_summary_stat_maps:
         cpus_per_task=1,
         runtime="10m",
     params:
-        grib_out_dir=lambda wc: (
-            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib"
-        ).resolve(),
+        # What do I need here?
     shell:
         """
         export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)

From f53139519f83168321f6db96f19e228772958aec Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 12 Jan 2026 17:30:14 +0100
Subject: [PATCH 11/70] Some more changes (preliminary, to be continued).

---
 workflow/rules/plot.smk | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index fee1dfe3..8090a213 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -82,12 +82,14 @@ rule plot_summary_stat_maps:
         cpus_per_task=1,
         runtime="10m",
     params:
-        # What do I need here?
+        nc_out_dir=lambda wc: (
+            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib" # to be adjusted.
+        ).resolve(),
     shell:
         """
         export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
         python {input.script} \
-            --input {params.grib_out_dir}  --date {wildcards.init_time} --outfn {output[0]} \
+            --input {params.nc_out_dir}  --date {wildcards.init_time} --outfn {output[0]} \
             --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region} \
         # interactive editing (needs to set localrule: True and use only one core)
         # marimo edit {input.script} -- \

From 32c7ecf6ff676c9c978090f44208bfd1e53ab558 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 13 Jan 2026 18:02:48 +0100
Subject: [PATCH 12/70] Further changes to plotting scripts.

---
 workflow/rules/plot.smk                       |  4 +++-
 workflow/scripts/plot_summary_stat_maps.mo.py | 14 +++++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 8090a213..2014e000 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -83,7 +83,9 @@ rule plot_summary_stat_maps:
         runtime="10m",
     params:
         nc_out_dir=lambda wc: (
-            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib" # to be adjusted.
+            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib" 
+            # not sure how to do this, because the baselines are in, e.g., output/data/baselines/COSMO-E/verif_aggregated.nc
+            # and the runs are in output/data/runs/runID/verif_aggregated.nc
         ).resolve(),
     shell:
         """
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index b8592141..4b8eb38f 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -6,18 +6,30 @@
 
 @app.cell
 def _():
+    
+    # this sure stays the same.
     import logging
     from argparse import ArgumentParser
     from pathlib import Path
 
+    # this sure stays the same.
     import cartopy.crs as ccrs
     import earthkit.plots as ekp
     import numpy as np
 
+    # this stays the same as well.
     from plotting import DOMAINS
+    
+    # no changes to StatePlotter required according to ChatGPT.
     from plotting import StatePlotter
+
+    # Probably need some new colour maps.
+    # at least one for biases (diverging), maybe different diverging ones for the different variables.
+    # 
     from plotting.colormap_defaults import CMAP_DEFAULTS
-    from plotting.compat import load_state_from_grib
+
+    # need to load nc files...
+    from plotting.compat import load_state_from_grib # TODO: load state from nc?
 
     return (
         ArgumentParser,

From c2ab6451d822271cdd4a9598c3dec91ae118134c Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 14 Jan 2026 15:12:43 +0100
Subject: [PATCH 13/70] First version of colour maps finished. For Bias, RMSE
 and MAE map plots.

---
 src/plotting/colormap_defaults.py | 45 ++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 52ff0502..4a85f67e 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -4,12 +4,23 @@
 from matplotlib import pyplot as plt
 import warnings
 from .colormap_loader import load_ncl_colormap
-
+from matplotlib.colors import BoundaryNorm
+import numpy as np
 
 def _fallback():
     warnings.warn("No colormap found for this parameter, using fallback.", UserWarning)
     return {"cmap": plt.get_cmap("viridis"), "norm": None, "units": ""}
 
+def symmetric_boundary_norm(nlevels):
+    """
+    Returns a callable that creates a symmetric BoundaryNorm
+    around zero with `nlevels` discrete colors. Used for creating colormaps for bias.
+    """
+    def _norm(data):
+        vmax = np.nanmax(np.abs(data))
+        boundaries = np.linspace(-vmax, vmax, nlevels + 1)
+        return BoundaryNorm(boundaries=boundaries, ncolors=nlevels)
+    return _norm
 
 _CMAP_DEFAULTS = {
     "SP": {"cmap": plt.get_cmap("coolwarm", 11), "vmin": 800 * 100, "vmax": 1100 * 100},
@@ -44,6 +55,38 @@ def _fallback():
         "units": "mm",
         "levels": [0, 0.05, 0.1, 0.25, 0.5, 1, 1.5, 2, 3, 4, 5, 6, 7, 100],
     },
+
+    # hard-code this for the moment, can still make smarter later on:
+    # RMSE and MAE first (is all the same). Use Reds colormap to indicate 'error'. 
+    # Use a limited number of levels so that absolute values of error can be read from the map. 
+    # always start at 0 so that the saturation of the colour corresponds to the error magnitude.
+
+    # RMSE:
+    "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
+    "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
+    "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
+    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
+    "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
+    "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
+    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "mm"},
+    
+    # MAE:
+    "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
+    "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
+    "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
+    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
+    "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
+    "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
+    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "mm"}, 
+
+    # Bias:
+    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "m/s"}, 
+    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "m/s"},
+    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "°C"},
+    "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "Pa"},
+    "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "Pa"},
+    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "mm"}
 }
 
 CMAP_DEFAULTS = defaultdict(_fallback, _CMAP_DEFAULTS)

From 27b91e2d123af3a1b142f73e9cf65266db8ec91e Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 14 Jan 2026 17:55:36 +0100
Subject: [PATCH 14/70] Better comments in the colour map code.

---
 src/plotting/colormap_defaults.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 4a85f67e..5e22f72f 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -57,7 +57,8 @@ def _norm(data):
     },
 
     # hard-code this for the moment, can still make smarter later on:
-    # RMSE and MAE first (is all the same). Use Reds colormap to indicate 'error'. 
+    # RMSE and MAE first (is all the same). Sequential colour map to reflect the nature of the data (error, all positive).
+    # Red is suggestive of 'bad' (high error).
     # Use a limited number of levels so that absolute values of error can be read from the map. 
     # always start at 0 so that the saturation of the colour corresponds to the error magnitude.
 
@@ -80,6 +81,8 @@ def _norm(data):
     "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "mm"}, 
 
     # Bias:
+    # diverging colour scheme for the Bias to reflect the nature of the data (can be positive or negative, symmetric).
+    # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "°C"},

From 1b2e6702e4bdf4f976da32d5b96981e372073f65 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 14 Jan 2026 17:56:39 +0100
Subject: [PATCH 15/70] Better Comments, some further changes to code.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 25 ++++++++-----------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 4b8eb38f..14eb182a 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -23,13 +23,12 @@ def _():
     # no changes to StatePlotter required according to ChatGPT.
     from plotting import StatePlotter
 
-    # Probably need some new colour maps.
-    # at least one for biases (diverging), maybe different diverging ones for the different variables.
-    # 
+    # Added some new colour maps for the Bias / MAE / RMSE map plots. 
     from plotting.colormap_defaults import CMAP_DEFAULTS
 
-    # need to load nc files...
-    from plotting.compat import load_state_from_grib # TODO: load state from nc?
+    # need to load nc files. But this statement is not needed any more because 
+    # the .nc files can just be read with xr.open_dataset
+    # from plotting.compat import load_state_from_grib 
 
     return (
         ArgumentParser,
@@ -37,7 +36,7 @@ def _():
         Path,
         StatePlotter,
         ekp,
-        load_state_from_grib,
+        # load_state_from_grib,
         logging,
         np,
         DOMAINS,
@@ -58,25 +57,23 @@ def _(ArgumentParser, Path):
     parser = ArgumentParser()
 
     parser.add_argument(
-        "--input", type=str, default=None, help="Directory to grib data"
+        "--input", type=str, default=None, help="Directory to .nc data containing the error fields"
     )
-    parser.add_argument("--date", type=str, default=None, help="reference datetime")
+
     parser.add_argument("--outfn", type=str, help="output filename")
     parser.add_argument("--leadtime", type=str, help="leadtime")
     parser.add_argument("--param", type=str, help="parameter")
     parser.add_argument("--region", type=str, help="name of region")
 
     args = parser.parse_args()
-    grib_dir = Path(args.input)
-    init_time = args.date
+    nc_dir = Path(args.input)
     outfn = Path(args.outfn)
     lead_time = args.leadtime
     param = args.param
     region = args.region
     return (
         args,
-        grib_dir,
-        init_time,
+        nc_dir,
         lead_time,
         outfn,
         param,
@@ -85,9 +82,9 @@ def _(ArgumentParser, Path):
 
 
 @app.cell
-def _(grib_dir, init_time, lead_time, load_state_from_grib, param):
+def _(nc_dir, init_time, lead_time, load_state_from_grib, param):
     # load grib file
-    grib_file = grib_dir / f"{init_time}_{lead_time}.grib"
+    grib_file = nc_dir / f"{init_time}_{lead_time}.grib"
     if param == "SP_10M":
         paramlist = ["U_10M", "V_10M"]
     elif param == "SP":

From 52dd1e71e7a84f00e8197e2b68a78176270691c3 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 15 Jan 2026 15:46:47 +0100
Subject: [PATCH 16/70] Added back instances of lead time.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 14eb182a..2a60de34 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -59,7 +59,7 @@ def _(ArgumentParser, Path):
     parser.add_argument(
         "--input", type=str, default=None, help="Directory to .nc data containing the error fields"
     )
-
+    # parser.add_argument("--date", type=str, default=None, help="reference datetime") # to be deleted?
     parser.add_argument("--outfn", type=str, help="output filename")
     parser.add_argument("--leadtime", type=str, help="leadtime")
     parser.add_argument("--param", type=str, help="parameter")
@@ -67,6 +67,7 @@ def _(ArgumentParser, Path):
 
     args = parser.parse_args()
     nc_dir = Path(args.input)
+    # init_time = args.date  # to be deleted?
     outfn = Path(args.outfn)
     lead_time = args.leadtime
     param = args.param
@@ -74,6 +75,7 @@ def _(ArgumentParser, Path):
     return (
         args,
         nc_dir,
+        # init_time,  # to be deleted?
         lead_time,
         outfn,
         param,
@@ -83,8 +85,8 @@ def _(ArgumentParser, Path):
 
 @app.cell
 def _(nc_dir, init_time, lead_time, load_state_from_grib, param):
-    # load grib file
-    grib_file = nc_dir / f"{init_time}_{lead_time}.grib"
+    # load .nc verification file
+    nc_file = nc_dir / "verif_aggregated.nc"
     if param == "SP_10M":
         paramlist = ["U_10M", "V_10M"]
     elif param == "SP":

From ddc5883ce3e953af50a93ef10eb7d53f3c349e3e Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 15 Jan 2026 17:55:59 +0100
Subject: [PATCH 17/70] New function for loading netCDF files added to
 compat.py

---
 src/plotting/compat.py | 76 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/src/plotting/compat.py b/src/plotting/compat.py
index 665287e0..fbe26f3e 100644
--- a/src/plotting/compat.py
+++ b/src/plotting/compat.py
@@ -90,3 +90,79 @@ def load_state_from_raw(
         if key.startswith("field_"):
             state["fields"][key.removeprefix("field_")] = value
     return state
+
+
+def load_state_from_netcdf(
+    file: Path,
+    paramlist: list[str],
+    *,
+    season: str = "all",
+    init_hour: int = -999,
+    lead_time: int | None = None,  # hours
+) -> dict:
+    """
+    NetCDF analogue of load_state_from_grib(), restricted to spatial variables.
+    """
+
+    ds = xr.open_dataset(file)
+
+    # --- normalize lead_time to hours (float) ---
+    if ds["lead_time"].dtype.kind == "m":
+        ds = ds.assign_coords(
+            lead_time=ds["lead_time"].dt.total_seconds() / 3600
+        )
+
+    # --- select season / init_hour / lead_time ---
+    ds = ds.sel(season=season, init_hour=init_hour)
+    if lead_time is not None:
+        ds = ds.sel(lead_time=lead_time)
+
+    # --- infer reference + valid time ---
+    # Assumption: forecast_reference_time is not explicitly stored
+    # We reconstruct something consistent with GRIB usage
+    forecast_reference_time = None
+    valid_time = None
+    if lead_time is not None:
+        valid_time = pd.to_datetime(lead_time, unit="h", origin="unix")
+
+    # --- get lat / lon (assumed present as coordinates) ---
+    lat = ds["lat"].values if "lat" in ds.coords else ds["latitude"].values
+    lon = ds["lon"].values if "lon" in ds.coords else ds["longitude"].values
+
+    lon2d, lat2d = np.meshgrid(lon, lat)
+    lats = lat2d.flatten()
+    lons = lon2d.flatten()
+
+    state = {
+        "forecast_reference_time": forecast_reference_time,
+        "valid_time": valid_time,
+        "latitudes": lats,
+        "longitudes": lons,
+        "fields": {},
+    }
+
+    # --- LAM envelope (convex hull) ---
+    lam_hull = MultiPoint(list(zip(lons.tolist(), lats.tolist()))).convex_hull
+    state["lam_envelope"] = gpd.GeoSeries([lam_hull], crs="EPSG:4326")
+
+    # --- extract spatial fields ---
+    for param in paramlist:
+        # e.g. U_10M.MAE.spatial
+        matching_vars = [
+            v for v in ds.data_vars
+            if v.startswith(f"{param}.") and v.endswith(".spatial")
+        ]
+
+        if not matching_vars:
+            state["fields"][param] = np.full(lats.size, np.nan, dtype=float)
+            continue
+
+        # If multiple metrics exist, concatenate them
+        arrays = []
+        for var in matching_vars:
+            arr = ds[var].values  # (lead_time, y, x) or (y, x)
+            arrays.append(arr.reshape(-1))
+
+        state["fields"][param] = np.concatenate(arrays)
+
+    return state

From 35396a70e4b0603bd5b1bb8cdf73fa3610816cab Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 15 Jan 2026 18:06:05 +0100
Subject: [PATCH 18/70] Marimo app cell for loading data from .nc

---
 src/plotting/compat.py                        |  1 +
 workflow/scripts/plot_summary_stat_maps.mo.py | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/plotting/compat.py b/src/plotting/compat.py
index fbe26f3e..06609d1d 100644
--- a/src/plotting/compat.py
+++ b/src/plotting/compat.py
@@ -5,6 +5,7 @@
 import geopandas as gpd
 import numpy as np
 import pandas as pd
+import xarray as xr
 from meteodatalab import data_source
 from meteodatalab import grib_decoder
 from shapely.geometry import MultiPoint
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 2a60de34..27cc957c 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -84,16 +84,21 @@ def _(ArgumentParser, Path):
 
 
 @app.cell
-def _(nc_dir, init_time, lead_time, load_state_from_grib, param):
-    # load .nc verification file
-    nc_file = nc_dir / "verif_aggregated.nc"
+def _(nc_file, param, lead_time, load_state_from_netcdf):
+    # load .nc verification file:
     if param == "SP_10M":
         paramlist = ["U_10M", "V_10M"]
     elif param == "SP":
         paramlist = ["U", "V"]
     else:
         paramlist = [param]
-    state = load_state_from_grib(grib_file, paramlist=paramlist)
+
+    state = load_state_from_netcdf(
+        nc_file,
+        paramlist=paramlist,
+        lead_time=lead_time,
+    )
+
     return (state,)
 
 

From e8a92aaa738e8c5f9e44268057ecf21a0ea4e8d6 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 19 Jan 2026 15:50:02 +0100
Subject: [PATCH 19/70] Remove .nc-loading function again, do it with earthkit
 instead.

---
 src/plotting/compat.py | 75 ------------------------------------------
 1 file changed, 75 deletions(-)

diff --git a/src/plotting/compat.py b/src/plotting/compat.py
index 06609d1d..93634aa3 100644
--- a/src/plotting/compat.py
+++ b/src/plotting/compat.py
@@ -92,78 +92,3 @@ def load_state_from_raw(
             state["fields"][key.removeprefix("field_")] = value
     return state
 
-
-def load_state_from_netcdf(
-    file: Path,
-    paramlist: list[str],
-    *,
-    season: str = "all",
-    init_hour: int = -999,
-    lead_time: int | None = None,  # hours
-) -> dict:
-    """
-    NetCDF analogue of load_state_from_grib(), restricted to spatial variables.
-    """
-
-    ds = xr.open_dataset(file)
-
-    # --- normalize lead_time to hours (float) ---
-    if ds["lead_time"].dtype.kind == "m":
-        ds = ds.assign_coords(
-            lead_time=ds["lead_time"].dt.total_seconds() / 3600
-        )
-
-    # --- select season / init_hour / lead_time ---
-    ds = ds.sel(season=season, init_hour=init_hour)
-    if lead_time is not None:
-        ds = ds.sel(lead_time=lead_time)
-
-    # --- infer reference + valid time ---
-    # Assumption: forecast_reference_time is not explicitly stored
-    # We reconstruct something consistent with GRIB usage
-    forecast_reference_time = None
-    valid_time = None
-    if lead_time is not None:
-        valid_time = pd.to_datetime(lead_time, unit="h", origin="unix")
-
-    # --- get lat / lon (assumed present as coordinates) ---
-    lat = ds["lat"].values if "lat" in ds.coords else ds["latitude"].values
-    lon = ds["lon"].values if "lon" in ds.coords else ds["longitude"].values
-
-    lon2d, lat2d = np.meshgrid(lon, lat)
-    lats = lat2d.flatten()
-    lons = lon2d.flatten()
-
-    state = {
-        "forecast_reference_time": forecast_reference_time,
-        "valid_time": valid_time,
-        "latitudes": lats,
-        "longitudes": lons,
-        "fields": {},
-    }
-
-    # --- LAM envelope (convex hull) ---
-    lam_hull = MultiPoint(list(zip(lons.tolist(), lats.tolist()))).convex_hull
-    state["lam_envelope"] = gpd.GeoSeries([lam_hull], crs="EPSG:4326")
-
-    # --- extract spatial fields ---
-    for param in paramlist:
-        # e.g. U_10M.MAE.spatial
-        matching_vars = [
-            v for v in ds.data_vars
-            if v.startswith(f"{param}.") and v.endswith(".spatial")
-        ]
-
-        if not matching_vars:
-            state["fields"][param] = np.full(lats.size, np.nan, dtype=float)
-            continue
-
-        # If multiple metrics exist, concatenate them
-        arrays = []
-        for var in matching_vars:
-            arr = ds[var].values  # (lead_time, y, x) or (y, x)
-            arrays.append(arr.reshape(-1))
-
-        state["fields"][param] = np.concatenate(arrays)
-
-    return state

From 06be6fa853dc3536e6271b0bb3d453bcc6149f69 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 21 Jan 2026 18:04:30 +0100
Subject: [PATCH 20/70] All kinds of changes. Co-Development session with
 Francesco. Got a long way towards the png plots.

Co-authored-by: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
---
 workflow/Snakefile                            |   7 +
 workflow/rules/plot.smk                       |  19 +-
 workflow/scripts/plot_summary_stat_maps.mo.py | 205 +++++-------------
 3 files changed, 77 insertions(+), 154 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 7f430cf9..7a6e08b3 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -52,6 +52,13 @@ rule experiment_all:
             OUT_ROOT / "data/runs/{run_id}/summary.md",
             run_id=collect_all_candidates(),
         ),
+        expand(
+            OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}.png",
+            run_id=collect_all_candidates(),
+            leadtime=[6, 12],
+            metric=["BIAS", "RMSE", "MAE"],
+            param=["TOT_PREC", "T_2M"],
+        )
 
 
 rule showcase_all:
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 2014e000..6b827dfc 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -70,11 +70,12 @@ rule make_forecast_animation:
 
 
 rule plot_summary_stat_maps:
+    localrule: True
     input:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
-        inference_okfile=rules.execute_inference.output.okfile,
+        verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc"
     output:
-        OUT_ROOT / "results/summary_stats/maps/{run_id}/{leadtime}/{metric}_{param}_{region}.png",
+        OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
@@ -83,18 +84,18 @@ rule plot_summary_stat_maps:
         runtime="10m",
     params:
         nc_out_dir=lambda wc: (
-            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/{wc.init_time}/grib" 
+            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/verif_aggregated.nc"
             # not sure how to do this, because the baselines are in, e.g., output/data/baselines/COSMO-E/verif_aggregated.nc
             # and the runs are in output/data/runs/runID/verif_aggregated.nc
         ).resolve(),
     shell:
         """
         export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
-        python {input.script} \
-            --input {params.nc_out_dir}  --date {wildcards.init_time} --outfn {output[0]} \
-            --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region} \
+        # python {input.script} \
+        #     --input {input.verif_file} --outfn {output[0]} \
+        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric}
         # interactive editing (needs to set localrule: True and use only one core)
-        # marimo edit {input.script} -- \
-        #     --input {params.grib_out_dir}  --date {wildcards.init_time} --outfn {output[0]}\
-        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --region {wildcards.region}\
+        marimo edit {input.script} -- \
+            --input {input.verif_file} --outfn {output[0]}\
+            --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric}
         """
\ No newline at end of file
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 27cc957c..54bf7863 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -1,12 +1,12 @@
 import marimo
 
-__generated_with = "0.16.5"
+__generated_with = "0.19.4"
 app = marimo.App(width="medium")
 
 
 @app.cell
 def _():
-    
+
     # this sure stays the same.
     import logging
     from argparse import ArgumentParser
@@ -16,10 +16,11 @@ def _():
     import cartopy.crs as ccrs
     import earthkit.plots as ekp
     import numpy as np
+    import xarray as xr
 
     # this stays the same as well.
     from plotting import DOMAINS
-    
+
     # no changes to StatePlotter required according to ChatGPT.
     from plotting import StatePlotter
 
@@ -28,19 +29,16 @@ def _():
 
     # need to load nc files. But this statement is not needed any more because 
     # the .nc files can just be read with xr.open_dataset
-    # from plotting.compat import load_state_from_grib 
-
     return (
         ArgumentParser,
         CMAP_DEFAULTS,
+        DOMAINS,
         Path,
         StatePlotter,
         ekp,
-        # load_state_from_grib,
         logging,
         np,
-        DOMAINS,
-        ccrs,
+        xr,
     )
 
 
@@ -49,57 +47,51 @@ def _(logging):
     LOG = logging.getLogger(__name__)
     LOG_FMT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     logging.basicConfig(level=logging.INFO, format=LOG_FMT)
-    return (LOG,)
+    return
 
 
 @app.cell
-def _(ArgumentParser, Path):
+def _(ArgumentParser, Path, np):
     parser = ArgumentParser()
 
     parser.add_argument(
         "--input", type=str, default=None, help="Directory to .nc data containing the error fields"
     )
-    # parser.add_argument("--date", type=str, default=None, help="reference datetime") # to be deleted?
     parser.add_argument("--outfn", type=str, help="output filename")
     parser.add_argument("--leadtime", type=str, help="leadtime")
     parser.add_argument("--param", type=str, help="parameter")
-    parser.add_argument("--region", type=str, help="name of region")
+    # parser.add_argument("--region", type=str, help="name of region")
+    parser.add_argument("--metric", type=str, help = "Evaluation Metric. So far Bias, RMSE or MAE are implemented.")
+    parser.add_argument("--season", type=str, default="all", help="season filter")
+    parser.add_argument("--init_hour", type=str, default="all", help="initialization hour filter")
 
     args = parser.parse_args()
-    nc_dir = Path(args.input)
-    # init_time = args.date  # to be deleted?
+    verif_file = Path(args.input)
     outfn = Path(args.outfn)
     lead_time = args.leadtime
     param = args.param
-    region = args.region
-    return (
-        args,
-        nc_dir,
-        # init_time,  # to be deleted?
-        lead_time,
-        outfn,
-        param,
-        region,
-    )
+    # region = args.region
+    season = args.season
+    init_hour = args.init_hour
+    metric = args.metric
 
+    if isinstance(init_hour, str):
+        if init_hour == "all":
+            init_hour = -999
+        else:
+            raise ValueError("init_hour must be 'all' or an integer hour")
 
-@app.cell
-def _(nc_file, param, lead_time, load_state_from_netcdf):
-    # load .nc verification file:
-    if param == "SP_10M":
-        paramlist = ["U_10M", "V_10M"]
-    elif param == "SP":
-        paramlist = ["U", "V"]
-    else:
-        paramlist = [param]
-
-    state = load_state_from_netcdf(
-        nc_file,
-        paramlist=paramlist,
-        lead_time=lead_time,
-    )
+    lead_time = np.timedelta64(lead_time, 'h')
+    return init_hour, lead_time, metric, outfn, param, season, verif_file
 
-    return (state,)
+
+@app.cell
+def _(init_hour, lead_time, metric, param, season, verif_file, xr):
+    ds = xr.open_dataset(verif_file)
+    var = f"{param}.{metric}.spatial"
+    ds = ds[var].sel(init_hour=init_hour, lead_time=lead_time, season=season)
+    ds
+    return ds, var
 
 
 @app.cell
@@ -125,129 +117,52 @@ def get_style(param, units_override=None):
             "vmax": cfg.get("vmax", None),
             "colors": cfg.get("colors", None),
         }
-
     return (get_style,)
 
 
 @app.cell
-def _(LOG, np):
-    """Preprocess fields with pint-based unit conversion and derived quantities."""
-    try:
-        import pint  # type: ignore
-
-        _ureg = pint.UnitRegistry()
-
-        def _k_to_c(arr):
-            # robust conversion with pint, fallback if dtype unsupported
-            try:
-                return (_ureg.Quantity(arr, _ureg.kelvin).to(_ureg.degC)).magnitude
-            except Exception:
-                return arr - 273.15
-
-        def _ms_to_knots(arr):
-            # robust conversion with pint, fallback if dtype unsupported
-            try:
-                return (
-                    _ureg.Quantity(arr, _ureg.meter / _ureg.second).to(_ureg.knot)
-                ).magnitude
-            except Exception:
-                return arr * 1.943844
-
-        def _m_to_mm(arr):
-            # robust conversion with pint, fallback if dtype unsupported
-            try:
-                return (_ureg.Quantity(arr, _ureg.meter).to(_ureg.millimeter)).magnitude
-            except Exception:
-                return arr * 1000
-
-    except Exception:
-        LOG.warning("pint not available; falling back hardcoded conversions")
-
-        def _k_to_c(arr):
-            return arr - 273.15
-
-        def _ms_to_knots(arr):
-            return arr * 1.943844
-
-        def _m_to_mm(arr):
-            return arr * 1000
-
-    def preprocess_field(param: str, state: dict):
-        """
-        - Temperatures: K -> °C
-        - Wind speed: sqrt(u^2 + v^2)
-        - Precipitation: m -> mm
-        Returns: (field_array, units_override or None)
-        """
-        fields = state["fields"]
-        # temperature variables
-        if param in ("T_2M", "TD_2M", "T", "TD"):
-            return _k_to_c(fields[param]), "°C"
-        # 10m wind speed (allow legacy 'uv' alias)
-        if param == "SP_10M":
-            u = fields["U_10M"]
-            v = fields["V_10M"]
-            return np.sqrt(u**2 + v**2), "m/s"
-        # wind speed from standard-level components
-        if param == "SP":
-            u = fields["U"]
-            v = fields["V"]
-            return np.sqrt(u**2 + v**2), "m/s"
-        if param == "TOT_PREC":
-            return _m_to_mm(fields[param]), "mm"
-        # default: passthrough
-        return fields[param], None
-
-    return (preprocess_field,)
-
-
-@app.cell
-def _(
-    LOG,
-    StatePlotter,
-    args,
-    get_style,
-    outfn,
-    param,
-    preprocess_field,
-    region,
-    state,
-    DOMAINS,
-    ccrs,
-):
+def _(DOMAINS, StatePlotter, ds, get_style, outfn, var):
     # plot individual fields
+    import matplotlib.pyplot as plt
+
     plotter = StatePlotter(
-        state["longitudes"],
-        state["latitudes"],
+        ds["longitude"].values.ravel(),
+        ds["latitude"].values.ravel(),
         outfn.parent,
     )
     fig = plotter.init_geoaxes(
         nrows=1,
         ncols=1,
-        projection=DOMAINS[region]["projection"],
-        bbox=DOMAINS[region]["extent"],
-        name=region,
+        projection=DOMAINS["switzerland"]["projection"],
+        bbox=DOMAINS["switzerland"]["extent"],
+        name="switzerland",
         size=(6, 6),
     )
     subplot = fig.add_map(row=0, column=0)
 
-    # preprocess field (unit conversion, derived quantities)
-    field, units_override = preprocess_field(param, state)
+    # # preprocess field (unit conversion, derived quantities)
+    # field, units_override = preprocess_field(param, state)
 
-    plotter.plot_field(subplot, field, **get_style(args.param, units_override))
-    subplot.ax.add_geometries(
-        state["lam_envelope"],
-        edgecolor="black",
-        facecolor="none",
-        crs=ccrs.PlateCarree(),
-    )
-    validtime = state["valid_time"].strftime("%Y%m%d%H%M")
-    # leadtime = int(state["lead_time"].total_seconds() // 3600)
+    plotter.plot_field(subplot, ds.values.ravel(), **get_style(var))
+    # subplot.ax.add_geometries(
+    #     state["lam_envelope"],
+    #     edgecolor="black",
+    #     facecolor="none",
+    #     crs=ccrs.PlateCarree(),
+    # )
+    plt.show()
+    # validtime = state["valid_time"].strftime("%Y%m%d%H%M")
+    # # leadtime = int(state["lead_time"].total_seconds() // 3600)
+
+    # fig.title(f"{param}, time: {validtime}")
+
+    # fig.save(outfn, bbox_inches="tight", dpi=200)
+    # LOG.info(f"saved: {outfn}")
+    return
 
-    fig.title(f"{param}, time: {validtime}")
 
-    fig.save(outfn, bbox_inches="tight", dpi=200)
-    LOG.info(f"saved: {outfn}")
+@app.cell
+def _():
     return
 
 

From 61c728ec66ea55bfbcbe7164920d44115d590fa0 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 22 Jan 2026 17:14:21 +0100
Subject: [PATCH 21/70] Generalized to the other non-trivial (non-wind)
 variables.

---
 workflow/Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 7a6e08b3..41b54415 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -57,7 +57,7 @@ rule experiment_all:
             run_id=collect_all_candidates(),
             leadtime=[6, 12],
             metric=["BIAS", "RMSE", "MAE"],
-            param=["TOT_PREC", "T_2M"],
+            param=["TD_2M", "T_2M", "PMSL", "PS", "TOT_PREC"],
         )
 
 

From 761f8d94cff59a721172260f54f5781d4ea64e37 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 22 Jan 2026 17:24:11 +0100
Subject: [PATCH 22/70] Some changes to plotting script.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 54bf7863..52ea116d 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -26,9 +26,6 @@ def _():
 
     # Added some new colour maps for the Bias / MAE / RMSE map plots. 
     from plotting.colormap_defaults import CMAP_DEFAULTS
-
-    # need to load nc files. But this statement is not needed any more because 
-    # the .nc files can just be read with xr.open_dataset
     return (
         ArgumentParser,
         CMAP_DEFAULTS,
@@ -47,7 +44,7 @@ def _(logging):
     LOG = logging.getLogger(__name__)
     LOG_FMT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
     logging.basicConfig(level=logging.INFO, format=LOG_FMT)
-    return
+    return (LOG,)
 
 
 @app.cell
@@ -121,7 +118,18 @@ def get_style(param, units_override=None):
 
 
 @app.cell
-def _(DOMAINS, StatePlotter, ds, get_style, outfn, var):
+def _(
+    DOMAINS,
+    LOG,
+    StatePlotter,
+    ds,
+    get_style,
+    lead_time,
+    metric,
+    outfn,
+    param,
+    var,
+):
     # plot individual fields
     import matplotlib.pyplot as plt
 
@@ -150,11 +158,11 @@ def _(DOMAINS, StatePlotter, ds, get_style, outfn, var):
     #     facecolor="none",
     #     crs=ccrs.PlateCarree(),
     # )
-    plt.show()
+
     # validtime = state["valid_time"].strftime("%Y%m%d%H%M")
     # # leadtime = int(state["lead_time"].total_seconds() // 3600)
 
-    # fig.title(f"{param}, time: {validtime}")
+    fig.title(f"{metric} of {param}, Lead Time: {lead_time}")
 
     # fig.save(outfn, bbox_inches="tight", dpi=200)
     # LOG.info(f"saved: {outfn}")

From 1cd4dbf22e87464b69cd52d7126109d5722abd8d Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 23 Jan 2026 13:25:07 +0100
Subject: [PATCH 23/70] Plotting region now dynamical (but not yet properly
 working). Output written to .png now working.

---
 workflow/Snakefile                            |  3 ++-
 workflow/rules/plot.smk                       | 19 +++++++++++--------
 workflow/scripts/plot_summary_stat_maps.mo.py | 13 +++++++------
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 41b54415..0c96d3c8 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -53,11 +53,12 @@ rule experiment_all:
             run_id=collect_all_candidates(),
         ),
         expand(
-            OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}.png",
+            OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
             run_id=collect_all_candidates(),
             leadtime=[6, 12],
             metric=["BIAS", "RMSE", "MAE"],
             param=["TD_2M", "T_2M", "PMSL", "PS", "TOT_PREC"],
+            region=["europe", "switzerland"]
         )
 
 
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 6b827dfc..cad66bd5 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -73,9 +73,12 @@ rule plot_summary_stat_maps:
     localrule: True
     input:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
-        verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc"
+        verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc",
+        # verif_file=EXPERIMENT_PARTICIPANTS.values(), 
+        # copied from rule report_experiment_dashboard - should be correct here, 
+        # but needs adjustments in the output as well - tbd.
     output:
-        OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}.png",
+        OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
@@ -91,11 +94,11 @@ rule plot_summary_stat_maps:
     shell:
         """
         export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
-        # python {input.script} \
-        #     --input {input.verif_file} --outfn {output[0]} \
-        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric}
+        python {input.script} \
+            --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
+            --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
         # interactive editing (needs to set localrule: True and use only one core)
-        marimo edit {input.script} -- \
-            --input {input.verif_file} --outfn {output[0]}\
-            --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric}
+        # marimo edit {input.script} -- \
+        #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
+        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
         """
\ No newline at end of file
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 52ea116d..0f68c203 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -57,7 +57,7 @@ def _(ArgumentParser, Path, np):
     parser.add_argument("--outfn", type=str, help="output filename")
     parser.add_argument("--leadtime", type=str, help="leadtime")
     parser.add_argument("--param", type=str, help="parameter")
-    # parser.add_argument("--region", type=str, help="name of region")
+    parser.add_argument("--region", type=str, help="name of region")
     parser.add_argument("--metric", type=str, help = "Evaluation Metric. So far Bias, RMSE or MAE are implemented.")
     parser.add_argument("--season", type=str, default="all", help="season filter")
     parser.add_argument("--init_hour", type=str, default="all", help="initialization hour filter")
@@ -128,6 +128,7 @@ def _(
     metric,
     outfn,
     param,
+    region, 
     var,
 ):
     # plot individual fields
@@ -141,9 +142,9 @@ def _(
     fig = plotter.init_geoaxes(
         nrows=1,
         ncols=1,
-        projection=DOMAINS["switzerland"]["projection"],
-        bbox=DOMAINS["switzerland"]["extent"],
-        name="switzerland",
+        projection=DOMAINS[region]["projection"],
+        bbox=DOMAINS[region]["extent"],
+        name=region,
         size=(6, 6),
     )
     subplot = fig.add_map(row=0, column=0)
@@ -164,8 +165,8 @@ def _(
 
     fig.title(f"{metric} of {param}, Lead Time: {lead_time}")
 
-    # fig.save(outfn, bbox_inches="tight", dpi=200)
-    # LOG.info(f"saved: {outfn}")
+    fig.save(outfn, bbox_inches="tight", dpi=200)
+    LOG.info(f"saved: {outfn}")
     return
 
 

From cdb2ccd8f91890b55386c9dbaabfbbd1b4eb967b Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 23 Jan 2026 13:31:00 +0100
Subject: [PATCH 24/70] Dynamic Regions now working.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 0f68c203..a2be8ca1 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -67,7 +67,7 @@ def _(ArgumentParser, Path, np):
     outfn = Path(args.outfn)
     lead_time = args.leadtime
     param = args.param
-    # region = args.region
+    region = args.region
     season = args.season
     init_hour = args.init_hour
     metric = args.metric

From 558689c980b886c53d4a99022c477c76e7f14149 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 23 Jan 2026 14:36:07 +0100
Subject: [PATCH 25/70] Store results under experiment hash.

---
 workflow/Snakefile                            |  5 +++--
 workflow/rules/plot.smk                       |  5 +----
 workflow/scripts/plot_summary_stat_maps.mo.py | 11 ++++++++++-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 0c96d3c8..f8c9214f 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -53,12 +53,13 @@ rule experiment_all:
             run_id=collect_all_candidates(),
         ),
         expand(
-            OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
+            OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
             run_id=collect_all_candidates(),
             leadtime=[6, 12],
             metric=["BIAS", "RMSE", "MAE"],
             param=["TD_2M", "T_2M", "PMSL", "PS", "TOT_PREC"],
-            region=["europe", "switzerland"]
+            region=["europe", "switzerland"], 
+            experiment=EXPERIMENT_HASH
         )
 
 
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index cad66bd5..298784a2 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -74,11 +74,8 @@ rule plot_summary_stat_maps:
     input:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
         verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc",
-        # verif_file=EXPERIMENT_PARTICIPANTS.values(), 
-        # copied from rule report_experiment_dashboard - should be correct here, 
-        # but needs adjustments in the output as well - tbd.
     output:
-        OUT_ROOT / "results/experiment/metrics_spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
+        OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index a2be8ca1..49ce4893 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -79,7 +79,16 @@ def _(ArgumentParser, Path, np):
             raise ValueError("init_hour must be 'all' or an integer hour")
 
     lead_time = np.timedelta64(lead_time, 'h')
-    return init_hour, lead_time, metric, outfn, param, season, verif_file
+    return (
+        init_hour,
+        lead_time,
+        metric,
+        outfn,
+        param,
+        region,
+        season,
+        verif_file,
+    )
 
 
 @app.cell

From 986a7ee0ce75ba9bfa1f91407c83de7d579dea80 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 26 Jan 2026 15:05:39 +0100
Subject: [PATCH 26/70] Introduced new domain "switzerland_small" for more
 detailed inspection of results at smaller spatial scale.

---
 src/plotting/__init__.py | 4 ++++
 workflow/Snakefile       | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/plotting/__init__.py b/src/plotting/__init__.py
index ce5e4e63..520e35ae 100644
--- a/src/plotting/__init__.py
+++ b/src/plotting/__init__.py
@@ -37,6 +37,10 @@
         "extent": [0, 17.5, 40.5, 53.0],
         "projection": _PROJECTIONS["orthographic"],
     },
+    "switzerland_small": {
+        "extent": [5.5, 11.0, 45.5, 48.0],
+        "projection": _PROJECTIONS["orthographic"],
+    },
 }
 
 
diff --git a/workflow/Snakefile b/workflow/Snakefile
index f8c9214f..64a30694 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -58,7 +58,7 @@ rule experiment_all:
             leadtime=[6, 12],
             metric=["BIAS", "RMSE", "MAE"],
             param=["TD_2M", "T_2M", "PMSL", "PS", "TOT_PREC"],
-            region=["europe", "switzerland"], 
+            region=["switzerland", "switzerland_small"], 
             experiment=EXPERIMENT_HASH
         )
 

From 7f70fb453c49340e377e8224ccbeb1fce08b3ce6 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 27 Jan 2026 16:04:46 +0100
Subject: [PATCH 27/70] Reverse Red-Blue colour maps for bias.

---
 src/plotting/colormap_defaults.py | 34 +++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 5e22f72f..5539b363 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -11,16 +11,16 @@ def _fallback():
     warnings.warn("No colormap found for this parameter, using fallback.", UserWarning)
     return {"cmap": plt.get_cmap("viridis"), "norm": None, "units": ""}
 
-def symmetric_boundary_norm(nlevels):
-    """
-    Returns a callable that creates a symmetric BoundaryNorm
-    around zero with `nlevels` discrete colors. Used for creating colormaps for bias.
-    """
-    def _norm(data):
-        vmax = np.nanmax(np.abs(data))
-        boundaries = np.linspace(-vmax, vmax, nlevels + 1)
-        return BoundaryNorm(boundaries=boundaries, ncolors=nlevels)
-    return _norm
+# def symmetric_boundary_norm(nlevels):
+#     """
+#     Returns a callable that creates a symmetric BoundaryNorm
+#     around zero with `nlevels` discrete colors. Used for creating colormaps for bias.
+#     """
+#     def _norm(data):
+#         vmax = np.nanmax(np.abs(data))
+#         boundaries = np.linspace(-vmax, vmax, nlevels + 1)
+#         return BoundaryNorm(boundaries=boundaries, ncolors=nlevels)
+#     return _norm
 
 _CMAP_DEFAULTS = {
     "SP": {"cmap": plt.get_cmap("coolwarm", 11), "vmin": 800 * 100, "vmax": 1100 * 100},
@@ -83,13 +83,13 @@ def _norm(data):
     # Bias:
     # diverging colour scheme for the Bias to reflect the nature of the data (can be positive or negative, symmetric).
     # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
-    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "m/s"}, 
-    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "m/s"},
-    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "°C"},
-    "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "Pa"},
-    "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "Pa"},
-    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11), "norm": symmetric_boundary_norm(nlevels=11)} | {"units": "mm"}
+    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
+    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
+    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
+    "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
+    "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
+    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}
 }
 
 CMAP_DEFAULTS = defaultdict(_fallback, _CMAP_DEFAULTS)

From 50e899fdce05ffe395b98a0d107ff986cee79fe4 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 27 Jan 2026 16:08:51 +0100
Subject: [PATCH 28/70] Preliminary changes to plotting script for getting
 symmetric colour map for bias.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 49ce4893..2d81ac8d 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -102,25 +102,38 @@ def _(init_hour, lead_time, metric, param, season, verif_file, xr):
 
 @app.cell
 def _(CMAP_DEFAULTS, ekp):
-    def get_style(param, units_override=None):
+    def get_style(param, metric, units_override=None):
         """Get style and colormap settings for the plot.
         Needed because cmap/norm does not work in Style(colors=cmap),
         still needs to be passed as arguments to tripcolor()/tricontourf().
         """
         cfg = CMAP_DEFAULTS[param]
         units = units_override if units_override is not None else cfg.get("units", "")
+
+        levels = cfg.get("levels", None)
+
+        if (metric == "BIAS"):
+            # For bias, we want to use a diverging colormap with symmetric bounds around zero.
+            max_abs_val = max(abs(levels))
+            levels = numpy.arange(-max_abs_val, max_abs_val, dtype=None)
+            vmin = -max_abs_val
+            vmax =  max_abs_val
+        else:
+            vmin = cfg.get("vmin", None)
+            vmax = cfg.get("vmax", None)
+
         return {
             "style": ekp.styles.Style(
-                levels=cfg.get("bounds", cfg.get("levels", None)),
+                levels=cfg.get("bounds", levels),
                 extend="both",
                 units=units,
                 colors=cfg.get("colors", None),
             ),
             "norm": cfg.get("norm", None),
             "cmap": cfg.get("cmap", None),
-            "levels": cfg.get("levels", None),
-            "vmin": cfg.get("vmin", None),
-            "vmax": cfg.get("vmax", None),
+            "levels": levels,
+            "vmin": vmin,
+            "vmax": vmax,
             "colors": cfg.get("colors", None),
         }
     return (get_style,)
@@ -161,7 +174,7 @@ def _(
     # # preprocess field (unit conversion, derived quantities)
     # field, units_override = preprocess_field(param, state)
 
-    plotter.plot_field(subplot, ds.values.ravel(), **get_style(var))
+    plotter.plot_field(subplot, ds.values.ravel(), **get_style(var, metric))
     # subplot.ax.add_geometries(
     #     state["lam_envelope"],
     #     edgecolor="black",

From a0aefc0ad065e272b4cbc28b41fd21de30f1eb1b Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 27 Jan 2026 16:23:57 +0100
Subject: [PATCH 29/70] Temporarily changed plotting script back to original to
 see if all of it still works.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 62 +++++++++++++------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 2d81ac8d..9b51c920 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -99,45 +99,69 @@ def _(init_hour, lead_time, metric, param, season, verif_file, xr):
     ds
     return ds, var
 
-
 @app.cell
 def _(CMAP_DEFAULTS, ekp):
-    def get_style(param, metric, units_override=None):
+    def get_style(param, units_override=None):
         """Get style and colormap settings for the plot.
         Needed because cmap/norm does not work in Style(colors=cmap),
         still needs to be passed as arguments to tripcolor()/tricontourf().
         """
         cfg = CMAP_DEFAULTS[param]
         units = units_override if units_override is not None else cfg.get("units", "")
-
-        levels = cfg.get("levels", None)
-
-        if (metric == "BIAS"):
-            # For bias, we want to use a diverging colormap with symmetric bounds around zero.
-            max_abs_val = max(abs(levels))
-            levels = numpy.arange(-max_abs_val, max_abs_val, dtype=None)
-            vmin = -max_abs_val
-            vmax =  max_abs_val
-        else:
-            vmin = cfg.get("vmin", None)
-            vmax = cfg.get("vmax", None)
-
         return {
             "style": ekp.styles.Style(
-                levels=cfg.get("bounds", levels),
+                levels=cfg.get("bounds", cfg.get("levels", None)),
                 extend="both",
                 units=units,
                 colors=cfg.get("colors", None),
             ),
             "norm": cfg.get("norm", None),
             "cmap": cfg.get("cmap", None),
-            "levels": levels,
-            "vmin": vmin,
-            "vmax": vmax,
+            "levels": cfg.get("levels", None),
+            "vmin": cfg.get("vmin", None),
+            "vmax": cfg.get("vmax", None),
             "colors": cfg.get("colors", None),
         }
+
     return (get_style,)
 
+# @app.cell
+# def _(CMAP_DEFAULTS, ekp):
+#     def get_style(param, metric, units_override=None):
+#         """Get style and colormap settings for the plot.
+#         Needed because cmap/norm does not work in Style(colors=cmap),
+#         still needs to be passed as arguments to tripcolor()/tricontourf().
+#         """
+#         cfg = CMAP_DEFAULTS[param]
+#         units = units_override if units_override is not None else cfg.get("units", "")
+
+#         levels = cfg.get("levels", None)
+
+#         if (metric == "BIAS"):
+#             # For bias, we want to use a diverging colormap with symmetric bounds around zero.
+#             max_abs_val = max(abs(levels))
+#             levels = numpy.arange(-max_abs_val, max_abs_val, dtype=None)
+#             vmin = -max_abs_val
+#             vmax =  max_abs_val
+#         else:
+#             vmin = cfg.get("vmin", None)
+#             vmax = cfg.get("vmax", None)
+
+#         return {
+#             "style": ekp.styles.Style(
+#                 levels=cfg.get("bounds", levels),
+#                 extend="both",
+#                 units=units,
+#                 colors=cfg.get("colors", None),
+#             ),
+#             "norm": cfg.get("norm", None),
+#             "cmap": cfg.get("cmap", None),
+#             "levels": levels,
+#             "vmin": vmin,
+#             "vmax": vmax,
+#             "colors": cfg.get("colors", None),
+#         }
+#     return (get_style,)
 
 @app.cell
 def _(

From be6fa354094d9a1c8dc88da10d94b64603b26f7a Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 27 Jan 2026 18:04:52 +0100
Subject: [PATCH 30/70] Fix to the functioning of _compute_scores and
 _compute_statistics with argument dim.

---
 src/verification/__init__.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index 8f05349c..747d3b4e 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -77,12 +77,13 @@ def _compute_scores(
     prefix="",
     suffix="",
     source="",
+    dim=["x", "y"],
 ) -> xr.Dataset:
     """
     Compute basic verification metrics between two xarray DataArrays (fcst and obs).
     Returns a xarray Dataset with the computed metrics.
     """
-    dim = ["x", "y"] if "x" in fcst.dims and "y" in fcst.dims else ["values"]
+    
     error = fcst - obs
     if dim == []:
         scores = xr.Dataset(
@@ -112,12 +113,13 @@ def _compute_statistics(
     prefix="",
     suffix="",
     source="",
+    dim=["x", "y"],
 ) -> xr.Dataset:
     """
     Compute basic statistics of a xarray DataArray (data).
     Returns a xarray Dataset with the computed statistics.
     """
-    dim = ["x", "y"] if "x" in data.dims and "y" in data.dims else ["values"]
+    
     stats = xr.Dataset(
         {
             f"{prefix}mean{suffix}": data.mean(dim=dim, skipna=True),
@@ -161,6 +163,8 @@ def verify(
     """
     start = time.time()
 
+    dim = ["x", "y"] if "x" in fcst.dims and "y" in fcst.dims else ["values"]
+
     # rewrite the verification to use dask and xarray
     # chunk the data to avoid memory issues
     # compute the metrics in parallel
@@ -188,19 +192,19 @@ def verify(
             # scores vs time (reduce spatially)
             score.append(
                 _compute_scores(
-                    fcst_param, obs_param, prefix=param + ".", source=fcst_label
+                    fcst_param, obs_param, prefix=param + ".", source=fcst_label, dim=dim
                 ).expand_dims(region=[region])
             )
 
             # statistics vs time (reduce spatially)
             fcst_statistics.append(
                 _compute_statistics(
-                    fcst_param, prefix=param + ".", source=fcst_label
+                    fcst_param, prefix=param + ".", source=fcst_label, dim=dim
                 ).expand_dims(region=[region])
             )
             obs_statistics.append(
                 _compute_statistics(
-                    obs_param, prefix=param + ".", source=obs_label
+                    obs_param, prefix=param + ".", source=obs_label, dim=dim
                 ).expand_dims(region=[region])
             )
 

From 2fcd1f92a0ecd0ae8de2644ea1b9d3b04a90713a Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 29 Jan 2026 15:32:25 +0100
Subject: [PATCH 31/70] Working version for colour scale for bias that is
 symmetric about zero. vmin and vmax values for variables other than T_2M yet
 to be defined.

---
 src/plotting/colormap_defaults.py             | 13 +---
 workflow/scripts/plot_summary_stat_maps.mo.py | 63 ++++++-------------
 2 files changed, 19 insertions(+), 57 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 5539b363..10c461a4 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -11,17 +11,6 @@ def _fallback():
     warnings.warn("No colormap found for this parameter, using fallback.", UserWarning)
     return {"cmap": plt.get_cmap("viridis"), "norm": None, "units": ""}
 
-# def symmetric_boundary_norm(nlevels):
-#     """
-#     Returns a callable that creates a symmetric BoundaryNorm
-#     around zero with `nlevels` discrete colors. Used for creating colormaps for bias.
-#     """
-#     def _norm(data):
-#         vmax = np.nanmax(np.abs(data))
-#         boundaries = np.linspace(-vmax, vmax, nlevels + 1)
-#         return BoundaryNorm(boundaries=boundaries, ncolors=nlevels)
-#     return _norm
-
 _CMAP_DEFAULTS = {
     "SP": {"cmap": plt.get_cmap("coolwarm", 11), "vmin": 800 * 100, "vmax": 1100 * 100},
     "TD_2M": load_ncl_colormap("t2m_29lev.ct"),
@@ -86,7 +75,7 @@ def _fallback():
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "vmin": -5.5, "vmax": 5.5} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 9b51c920..349ea300 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -100,68 +100,41 @@ def _(init_hour, lead_time, metric, param, season, verif_file, xr):
     return ds, var
 
 @app.cell
-def _(CMAP_DEFAULTS, ekp):
-    def get_style(param, units_override=None):
+def _(CMAP_DEFAULTS, ekp, np):
+    def get_style(param, metric, units_override=None):
         """Get style and colormap settings for the plot.
         Needed because cmap/norm does not work in Style(colors=cmap),
         still needs to be passed as arguments to tripcolor()/tricontourf().
         """
-        cfg = CMAP_DEFAULTS[param]
+
+        metric_key = f"{param}.{metric}.spatial"
+        cfg = CMAP_DEFAULTS[metric_key] if metric_key in CMAP_DEFAULTS else CMAP_DEFAULTS[param]
         units = units_override if units_override is not None else cfg.get("units", "")
+
+        vmin = cfg.get("vmin", None)
+        vmax = cfg.get("vmax", None)
+        levels = cfg.get("levels", None)
+
+        # For the case of Bias, construct symmetric levels:
+        if metric == "BIAS":
+            levels = np.linspace(start = vmin, stop = vmax, num=12)
+
         return {
             "style": ekp.styles.Style(
-                levels=cfg.get("bounds", cfg.get("levels", None)),
+                levels=cfg.get("bounds", levels),
                 extend="both",
                 units=units,
                 colors=cfg.get("colors", None),
             ),
             "norm": cfg.get("norm", None),
             "cmap": cfg.get("cmap", None),
-            "levels": cfg.get("levels", None),
-            "vmin": cfg.get("vmin", None),
-            "vmax": cfg.get("vmax", None),
+            "levels": levels,
+            "vmin": vmin,
+            "vmax": vmax,
             "colors": cfg.get("colors", None),
         }
-
     return (get_style,)
 
-# @app.cell
-# def _(CMAP_DEFAULTS, ekp):
-#     def get_style(param, metric, units_override=None):
-#         """Get style and colormap settings for the plot.
-#         Needed because cmap/norm does not work in Style(colors=cmap),
-#         still needs to be passed as arguments to tripcolor()/tricontourf().
-#         """
-#         cfg = CMAP_DEFAULTS[param]
-#         units = units_override if units_override is not None else cfg.get("units", "")
-
-#         levels = cfg.get("levels", None)
-
-#         if (metric == "BIAS"):
-#             # For bias, we want to use a diverging colormap with symmetric bounds around zero.
-#             max_abs_val = max(abs(levels))
-#             levels = numpy.arange(-max_abs_val, max_abs_val, dtype=None)
-#             vmin = -max_abs_val
-#             vmax =  max_abs_val
-#         else:
-#             vmin = cfg.get("vmin", None)
-#             vmax = cfg.get("vmax", None)
-
-#         return {
-#             "style": ekp.styles.Style(
-#                 levels=cfg.get("bounds", levels),
-#                 extend="both",
-#                 units=units,
-#                 colors=cfg.get("colors", None),
-#             ),
-#             "norm": cfg.get("norm", None),
-#             "cmap": cfg.get("cmap", None),
-#             "levels": levels,
-#             "vmin": vmin,
-#             "vmax": vmax,
-#             "colors": cfg.get("colors", None),
-#         }
-#     return (get_style,)
 
 @app.cell
 def _(

From 7864b5d3d68a1541be11bcb951d1856b20004616 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 29 Jan 2026 17:21:22 +0100
Subject: [PATCH 32/70] New colour map defaults for T_2M

---
 src/plotting/colormap_defaults.py             | 28 +++++++++----------
 workflow/scripts/plot_summary_stat_maps.mo.py |  4 +++
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 10c461a4..e3ed2e22 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -52,22 +52,22 @@ def _fallback():
     # always start at 0 so that the saturation of the colour corresponds to the error magnitude.
 
     # RMSE:
-    "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
-    "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
-    "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
-    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
-    "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
-    "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
-    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "mm"},
+    "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
+    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0, "vmax": 6} | {"units": "°C"},
+    "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
+    "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
+    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"},
     
     # MAE:
-    "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
-    "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "m/s"},
-    "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
-    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "°C"},
-    "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
-    "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "Pa"},
-    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 11), "vmin": 0} | {"units": "mm"}, 
+    "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
+    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0, "vmax": 6} | {"units": "°C"},
+    "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
+    "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
+    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"}, 
 
     # Bias:
     # diverging colour scheme for the Bias to reflect the nature of the data (can be positive or negative, symmetric).
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 349ea300..af4be700 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -118,6 +118,10 @@ def get_style(param, metric, units_override=None):
         # For the case of Bias, construct symmetric levels:
         if metric == "BIAS":
             levels = np.linspace(start = vmin, stop = vmax, num=12)
+        elif metric == "RMSE": 
+            levels = np.linspace(start = vmin, stop = vmax, num=7)
+        elif metric == "MAE": 
+            levels = np.linspace(start = vmin, stop = vmax, num=7)
 
         return {
             "style": ekp.styles.Style(

From de0f2794ed845e44d243b54fec44a3fdc5247576 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 3 Feb 2026 16:26:46 +0100
Subject: [PATCH 33/70] Hard-Code levels for colour breaks. This way,
 everything is in the same place and can be changed there when needed.
 Accompanying changes in the plotting script (marimo cell that gets the colour
 map defaults).

---
 src/plotting/colormap_defaults.py             |  6 ++---
 workflow/scripts/plot_summary_stat_maps.mo.py | 24 ++++---------------
 2 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index e3ed2e22..c9968240 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -55,7 +55,7 @@ def _fallback():
     "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0, "vmax": 6} | {"units": "°C"},
+    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 2, 3, 4, 5, 6]} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"},
@@ -64,7 +64,7 @@ def _fallback():
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0, "vmax": 6} | {"units": "°C"},
+    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 2, 3, 4, 5, 6]} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"}, 
@@ -75,7 +75,7 @@ def _fallback():
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "vmin": -5.5, "vmax": 5.5} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : [-5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5]} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index af4be700..4ee6e9da 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -100,41 +100,27 @@ def _(init_hour, lead_time, metric, param, season, verif_file, xr):
     return ds, var
 
 @app.cell
-def _(CMAP_DEFAULTS, ekp, np):
+def _(CMAP_DEFAULTS, ekp):
     def get_style(param, metric, units_override=None):
         """Get style and colormap settings for the plot.
         Needed because cmap/norm does not work in Style(colors=cmap),
         still needs to be passed as arguments to tripcolor()/tricontourf().
         """
-
         metric_key = f"{param}.{metric}.spatial"
         cfg = CMAP_DEFAULTS[metric_key] if metric_key in CMAP_DEFAULTS else CMAP_DEFAULTS[param]
         units = units_override if units_override is not None else cfg.get("units", "")
-
-        vmin = cfg.get("vmin", None)
-        vmax = cfg.get("vmax", None)
-        levels = cfg.get("levels", None)
-
-        # For the case of Bias, construct symmetric levels:
-        if metric == "BIAS":
-            levels = np.linspace(start = vmin, stop = vmax, num=12)
-        elif metric == "RMSE": 
-            levels = np.linspace(start = vmin, stop = vmax, num=7)
-        elif metric == "MAE": 
-            levels = np.linspace(start = vmin, stop = vmax, num=7)
-
         return {
             "style": ekp.styles.Style(
-                levels=cfg.get("bounds", levels),
+                levels=cfg.get("bounds", cfg.get("levels", None)),
                 extend="both",
                 units=units,
                 colors=cfg.get("colors", None),
             ),
             "norm": cfg.get("norm", None),
             "cmap": cfg.get("cmap", None),
-            "levels": levels,
-            "vmin": vmin,
-            "vmax": vmax,
+            "levels": cfg.get("levels", None),
+            "vmin": cfg.get("vmin", None),
+            "vmax": cfg.get("vmax", None),
             "colors": cfg.get("colors", None),
         }
     return (get_style,)

From 69e7b36dce3537ff12c2986e6b0f3d853f5a5967 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 3 Feb 2026 17:18:29 +0100
Subject: [PATCH 34/70] New Rule for Map plots of Baselines.

---
 workflow/rules/plot.smk | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 298784a2..ee113266 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -75,7 +75,7 @@ rule plot_summary_stat_maps:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
         verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc",
     output:
-        OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
+        OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
@@ -98,4 +98,16 @@ rule plot_summary_stat_maps:
         # marimo edit {input.script} -- \
         #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
         #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
-        """
\ No newline at end of file
+        """
+
+use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
+    input:
+        script="workflow/scripts/plot_summary_stat_maps.mo.py",
+        verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
+    output:
+        OUT_ROOT / "results/{experiment}/metrics/spatial/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
+    params:
+        nc_out_dir=lambda wc: (
+            Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
+            # not sure if this is actually needed. Verification file is already specified above as input. Leave it for the time being. 
+        ).resolve()
\ No newline at end of file

From 1353788e151fdcedafca79f786bdc29a30cca03a Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 4 Feb 2026 15:14:09 +0100
Subject: [PATCH 35/70] Different File naming (Region first -> Order)

Required accompanying change in Snakefile.
---
 workflow/Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 64a30694..d21469bb 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -53,7 +53,7 @@ rule experiment_all:
             run_id=collect_all_candidates(),
         ),
         expand(
-            OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{leadtime}_{region}.png",
+            OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
             run_id=collect_all_candidates(),
             leadtime=[6, 12],
             metric=["BIAS", "RMSE", "MAE"],

From 569759a1f46a6730cf8ac7843025d2bb182d5b48 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 5 Feb 2026 16:00:40 +0100
Subject: [PATCH 36/70] Temporarily remove rule for maps for baselines.

---
 workflow/rules/plot.smk | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index ee113266..54977ac8 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -100,14 +100,14 @@ rule plot_summary_stat_maps:
         #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
         """
 
-use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
-    input:
-        script="workflow/scripts/plot_summary_stat_maps.mo.py",
-        verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
-    output:
-        OUT_ROOT / "results/{experiment}/metrics/spatial/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
-    params:
-        nc_out_dir=lambda wc: (
-            Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
-            # not sure if this is actually needed. Verification file is already specified above as input. Leave it for the time being. 
-        ).resolve()
\ No newline at end of file
+# use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
+#     input:
+#         script="workflow/scripts/plot_summary_stat_maps.mo.py",
+#         verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
+#     output:
+#         OUT_ROOT / "results/{experiment}/metrics/spatial/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
+#     params:
+#         nc_out_dir=lambda wc: (
+#             Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
+#             # not sure if this is actually needed. Verification file is already specified above as input. Leave it for the time being. 
+#         ).resolve()
\ No newline at end of file

From bda0e0d51a2f1d8d0adb104a64d37493eed976a3 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 5 Feb 2026 16:03:04 +0100
Subject: [PATCH 37/70] Renamed domain "switzerland_small".

This has caused problems before.
---
 src/plotting/__init__.py | 2 +-
 workflow/Snakefile       | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/plotting/__init__.py b/src/plotting/__init__.py
index 520e35ae..cb808df8 100644
--- a/src/plotting/__init__.py
+++ b/src/plotting/__init__.py
@@ -37,7 +37,7 @@
         "extent": [0, 17.5, 40.5, 53.0],
         "projection": _PROJECTIONS["orthographic"],
     },
-    "switzerland_small": {
+    "switzerlandsmall": {
         "extent": [5.5, 11.0, 45.5, 48.0],
         "projection": _PROJECTIONS["orthographic"],
     },
diff --git a/workflow/Snakefile b/workflow/Snakefile
index d21469bb..3bcee554 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -56,9 +56,9 @@ rule experiment_all:
             OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
             run_id=collect_all_candidates(),
             leadtime=[6, 12],
-            metric=["BIAS", "RMSE", "MAE"],
-            param=["TD_2M", "T_2M", "PMSL", "PS", "TOT_PREC"],
-            region=["switzerland", "switzerland_small"], 
+            metric=["BIAS", "RMSE"],
+            param=["T_2M", "TOT_PREC"],
+            region=["switzerland", "switzerlandsmall"], 
             experiment=EXPERIMENT_HASH
         )
 

From ccf6279e50ac762a55055c35946fb56875fd6cb1 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 5 Feb 2026 16:04:24 +0100
Subject: [PATCH 38/70] Generate Colour breaks more elegantly.

---
 src/plotting/colormap_defaults.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index c9968240..dd6530a3 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -75,7 +75,7 @@ def _fallback():
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : [-5.5, -4.5, -3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5, 4.5, 5.5]} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -5.5, stop = 6, step = 1)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}

From 258b2dea37c0376944cf92dcb86f48c1ccc359ab Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 5 Feb 2026 16:24:41 +0100
Subject: [PATCH 39/70] Colour breaks from function for all T2m metrics.

---
 src/plotting/colormap_defaults.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index dd6530a3..d7b35745 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -55,7 +55,7 @@ def _fallback():
     "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 2, 3, 4, 5, 6]} | {"units": "°C"},
+    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 6.1, step = 1)} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"},
@@ -64,7 +64,7 @@ def _fallback():
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 2, 3, 4, 5, 6]} | {"units": "°C"},
+    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 6.1, step = 1)} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"}, 
@@ -75,7 +75,7 @@ def _fallback():
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -5.5, stop = 6, step = 1)} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -5.5, stop = 5.6, step = 1)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}

From 5c0151b8e78d9573ea1eabffc8054de285dfdbef Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 9 Feb 2026 19:09:45 +0100
Subject: [PATCH 40/70] Map plots now also for baselines.

---
 workflow/Snakefile      | 11 ++++++++++-
 workflow/rules/plot.smk | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 3bcee554..991c288d 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -53,13 +53,22 @@ rule experiment_all:
             run_id=collect_all_candidates(),
         ),
         expand(
-            OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
+            OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
             run_id=collect_all_candidates(),
             leadtime=[6, 12],
             metric=["BIAS", "RMSE"],
             param=["T_2M", "TOT_PREC"],
             region=["switzerland", "switzerlandsmall"], 
             experiment=EXPERIMENT_HASH
+        ), 
+        expand(
+            OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
+            baseline_id=collect_all_baselines(),
+            leadtime=[6, 12],
+            metric=["BIAS", "RMSE"],
+            param=["T_2M", "TOT_PREC"],
+            region=["switzerland", "switzerlandsmall"], 
+            experiment=EXPERIMENT_HASH
         )
 
 
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 54977ac8..0190761c 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -75,7 +75,7 @@ rule plot_summary_stat_maps:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
         verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc",
     output:
-        OUT_ROOT / "results/{experiment}/metrics/spatial/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
+        OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
@@ -100,6 +100,37 @@ rule plot_summary_stat_maps:
         #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
         """
 
+rule plot_summary_stat_maps_baseline:
+    localrule: True
+    input:
+        script="workflow/scripts/plot_summary_stat_maps.mo.py",
+        verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
+    output:
+        OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
+    wildcard_constraints:
+        leadtime=r"\d+",  # only digits
+    resources:
+        slurm_partition="postproc",
+        cpus_per_task=1,
+        runtime="10m",
+    params:
+        nc_out_dir=lambda wc: (
+            Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
+            # not sure how to do this, because the baselines are in, e.g., output/data/baselines/COSMO-E/verif_aggregated.nc
+            # and the runs are in output/data/runs/runID/verif_aggregated.nc
+        ).resolve(),
+    shell:
+        """
+        export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
+        python {input.script} \
+            --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
+            --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
+        # interactive editing (needs to set localrule: True and use only one core)
+        # marimo edit {input.script} -- \
+        #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
+        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
+        """
+
 # use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
 #     input:
 #         script="workflow/scripts/plot_summary_stat_maps.mo.py",

From 7b50809e8cc9fdcd4e684cfed1dd90196ea0235a Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 9 Feb 2026 19:10:28 +0100
Subject: [PATCH 41/70] Fix memory leak occurring during creation of html.

---
 .../scripts/report_experiment_dashboard.py    | 68 ++++++++++++++++++-
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/workflow/scripts/report_experiment_dashboard.py b/workflow/scripts/report_experiment_dashboard.py
index 327547b0..fc6ed6a9 100644
--- a/workflow/scripts/report_experiment_dashboard.py
+++ b/workflow/scripts/report_experiment_dashboard.py
@@ -28,19 +28,81 @@ def program_summary_log(args):
     LOG.info("=" * 80)
 
 
+def dataset_metrics_to_dataframe(
+    ds: xr.Dataset,
+    forbidden_dims=("values",),
+    metric_dims=("source", "season", "init_hour", "region", "lead_time", "eps"),
+):
+    """
+    Convert a verification xarray.Dataset to a pandas.DataFrame compatible with
+    the old `to_array("stack").to_dataframe()` workflow.
+
+    Returns columns:
+      - metric_dims...
+      - stack   (e.g. "T_2M.MAE")
+      - value
+    """
+
+    # keep only non-spatial metric variables
+    metric_vars = []
+    for v in ds.data_vars:
+        da = ds[v]
+
+        # skip spatial metrics
+        if "spatial" in v:
+            continue
+
+        # skip forbidden dimensions
+        if any(d in da.dims for d in forbidden_dims):
+            continue
+
+        # only keep vars whose dims are a subset of expected metric dims
+        if not set(da.dims).issubset(metric_dims):
+            continue
+
+        metric_vars.append(v)
+
+    if not metric_vars:
+        raise ValueError("No compatible metric variables found in dataset")
+
+    # stack variables exactly like the original code
+    df = (
+        ds[metric_vars]
+        .to_array("stack")
+        .to_dataframe(name="value")
+        .reset_index()
+    )
+
+    return df
+
 def main(args):
     program_summary_log(args)
 
     # Load, de-duplicate lead_time, and keep best provider per source (same logic as verif_plot_metrics)
-    dfs = [xr.open_dataset(f) for f in args.verif_files]
+    drop_variables = ["TOT_PREC.MAE.spatial", "TOT_PREC.RMSE.spatial", "TOT_PREC.BIAS.spatial", 
+                      "PMSL.MAE.spatial", "PMSL.RMSE.spatial", "PMSL.BIAS.spatial", 
+                      "PS.MAE.spatial", "PS.RMSE.spatial", "PS.BIAS.spatial", 
+                      "V_10M.MAE.spatial", "V_10M.RMSE.spatial", "V_10M.BIAS.spatial", 
+                      "U_10M.MAE.spatial", "U_10M.RMSE.spatial", "U_10M.BIAS.spatial", 
+                      "TD_2M.MAE.spatial", "TD_2M.RMSE.spatial", "TD_2M.BIAS.spatial", 
+                      "T_2M.MAE.spatial", "T_2M.RMSE.spatial", "T_2M.BIAS.spatial"]
+    
+    dfs = [xr.open_dataset(f, drop_variables = drop_variables) for f in args.verif_files]
     dfs = [_ensure_unique_lead_time(d) for d in dfs]
     dfs = _select_best_sources(dfs)
     ds = xr.concat(dfs, dim="source", join="outer")
     LOG.info("Loaded verification netcdf: \n%s", ds)
 
     # extract only  non-spatial variables to pd.DataFrame
-    nonspatial_vars = [d for d in ds.data_vars if "spatial" not in d]
-    df = ds[nonspatial_vars].to_array("stack").to_dataframe(name="value").reset_index()
+    # nonspatial_vars = [d for d in ds.data_vars if "spatial" not in d]
+    # df = ds[nonspatial_vars].to_array("stack").to_dataframe(name="value").reset_index()
+
+    df = dataset_metrics_to_dataframe(
+        ds,
+        forbidden_dims=("values",),  # critical!
+        metric_dims=("source", "season", "init_hour", "region", "lead_time", "eps"),
+    )
+
     df[["param", "metric"]] = df["stack"].str.split(".", n=1, expand=True)
     df.drop(columns=["stack"], inplace=True)
     df["lead_time"] = df["lead_time"].dt.total_seconds() / 3600

From c3d37a20fd3db2ff5815003e462ca623054dcac3 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 10 Feb 2026 17:01:08 +0100
Subject: [PATCH 42/70] "Alias" Rule for Plotting maps of Baselines

This does the same thing with less code.
---
 workflow/rules/plot.smk | 37 +++----------------------------------
 1 file changed, 3 insertions(+), 34 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 0190761c..2924201e 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -100,45 +100,14 @@ rule plot_summary_stat_maps:
         #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
         """
 
-rule plot_summary_stat_maps_baseline:
-    localrule: True
+use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
     input:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
         verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
     output:
         OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
-    wildcard_constraints:
-        leadtime=r"\d+",  # only digits
-    resources:
-        slurm_partition="postproc",
-        cpus_per_task=1,
-        runtime="10m",
     params:
         nc_out_dir=lambda wc: (
             Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
-            # not sure how to do this, because the baselines are in, e.g., output/data/baselines/COSMO-E/verif_aggregated.nc
-            # and the runs are in output/data/runs/runID/verif_aggregated.nc
-        ).resolve(),
-    shell:
-        """
-        export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
-        python {input.script} \
-            --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
-            --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
-        # interactive editing (needs to set localrule: True and use only one core)
-        # marimo edit {input.script} -- \
-        #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
-        #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
-        """
-
-# use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
-#     input:
-#         script="workflow/scripts/plot_summary_stat_maps.mo.py",
-#         verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
-#     output:
-#         OUT_ROOT / "results/{experiment}/metrics/spatial/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
-#     params:
-#         nc_out_dir=lambda wc: (
-#             Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
-#             # not sure if this is actually needed. Verification file is already specified above as input. Leave it for the time being. 
-#         ).resolve()
\ No newline at end of file
+            # not sure if this is actually needed. Verification file is already specified above as input. Leave it for the time being. 
+        ).resolve()
\ No newline at end of file

From 8323fbda7017e29b80d46cdba5831d1923332a15 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 11 Feb 2026 10:19:44 +0100
Subject: [PATCH 43/70] Reorganisation of Domains.

---
 src/plotting/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/plotting/__init__.py b/src/plotting/__init__.py
index cb808df8..c905fcc3 100644
--- a/src/plotting/__init__.py
+++ b/src/plotting/__init__.py
@@ -29,15 +29,15 @@
         "extent": [-16.0, 25.0, 30.0, 65.0],
         "projection": _PROJECTIONS["orthographic"],
     },
+    # The domains which are originally called "centraleurope" and "switzerland"
+    # are mostly the same. I suggest making domain "switzerland" much smaller, 
+    # so that more spatial detail can be seen, especially in the complex 
+    # topography of the alps. 
     "centraleurope": {
         "extent": [-2.6, 19.5, 40.2, 52.3],
         "projection": _PROJECTIONS["orthographic"],
     },
     "switzerland": {
-        "extent": [0, 17.5, 40.5, 53.0],
-        "projection": _PROJECTIONS["orthographic"],
-    },
-    "switzerlandsmall": {
         "extent": [5.5, 11.0, 45.5, 48.0],
         "projection": _PROJECTIONS["orthographic"],
     },

From d03781cf86b0f558097b688f0effbc5cb6fef363 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 11 Feb 2026 11:57:17 +0100
Subject: [PATCH 44/70] Introduce Seasonal Stratification.

---
 workflow/Snakefile                            | 22 ++++++++++---------
 workflow/rules/plot.smk                       |  6 +++--
 workflow/scripts/plot_summary_stat_maps.mo.py |  3 ++-
 3 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 991c288d..51d13b41 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -53,21 +53,23 @@ rule experiment_all:
             run_id=collect_all_candidates(),
         ),
         expand(
-            OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
+            OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
             run_id=collect_all_candidates(),
-            leadtime=[6, 12],
-            metric=["BIAS", "RMSE"],
-            param=["T_2M", "TOT_PREC"],
-            region=["switzerland", "switzerlandsmall"], 
+            leadtime=[6],
+            metric=["BIAS"],
+            param=["T_2M"],
+            region=["centraleurope", "switzerland"], 
+            season=["all", "DJF", "MAM", "JJA", "SON"], 
             experiment=EXPERIMENT_HASH
         ), 
         expand(
-            OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
+            OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
             baseline_id=collect_all_baselines(),
-            leadtime=[6, 12],
-            metric=["BIAS", "RMSE"],
-            param=["T_2M", "TOT_PREC"],
-            region=["switzerland", "switzerlandsmall"], 
+            leadtime=[6],
+            metric=["BIAS"],
+            param=["T_2M"],
+            region=["centraleurope", "switzerland"], 
+            season=["all", "DJF", "MAM", "JJA", "SON"], 
             experiment=EXPERIMENT_HASH
         )
 
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 2924201e..10f85c47 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -75,7 +75,7 @@ rule plot_summary_stat_maps:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
         verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc",
     output:
-        OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{leadtime}.png",
+        OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
     resources:
@@ -94,10 +94,12 @@ rule plot_summary_stat_maps:
         python {input.script} \
             --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
             --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
+            --season {wildcards.season} \
         # interactive editing (needs to set localrule: True and use only one core)
         # marimo edit {input.script} -- \
         #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
         #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
+        #     --season {wildcards.season} \
         """
 
 use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
@@ -105,7 +107,7 @@ use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
         verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
     output:
-        OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{leadtime}.png",
+        OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
     params:
         nc_out_dir=lambda wc: (
             Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 4ee6e9da..4ff57749 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -138,6 +138,7 @@ def _(
     outfn,
     param,
     region, 
+    season, 
     var,
 ):
     # plot individual fields
@@ -172,7 +173,7 @@ def _(
     # validtime = state["valid_time"].strftime("%Y%m%d%H%M")
     # # leadtime = int(state["lead_time"].total_seconds() // 3600)
 
-    fig.title(f"{metric} of {param}, Lead Time: {lead_time}")
+    fig.title(f"{metric} of {param}, Season: {season}, Lead Time: {lead_time}")
 
     fig.save(outfn, bbox_inches="tight", dpi=200)
     LOG.info(f"saved: {outfn}")

From 48e67892cb1d690aa16b31951d50c08da24db5a9 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 12 Feb 2026 12:33:04 +0100
Subject: [PATCH 45/70] Definitive Colour Levels for 2m-Temperature

---
 src/plotting/colormap_defaults.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index d7b35745..12dd9420 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -55,7 +55,7 @@ def _fallback():
     "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 6.1, step = 1)} | {"units": "°C"},
+    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 3.1, step = 0.5)} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"},
@@ -64,7 +64,7 @@ def _fallback():
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 6.1, step = 1)} | {"units": "°C"},
+    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 3.1, step = 0.5)} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"}, 
@@ -75,7 +75,7 @@ def _fallback():
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -5.5, stop = 5.6, step = 1)} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}

From 1c3e62984566d039b7af8ae0ba47b81aa6c53184 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 13 Feb 2026 16:17:09 +0100
Subject: [PATCH 46/70] Colour levels for Precipitation complete.

---
 src/plotting/colormap_defaults.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 12dd9420..bda0d86c 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -58,8 +58,10 @@ def _fallback():
     "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 3.1, step = 0.5)} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
-    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"},
-    
+    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.001, 0.0015, 0.002, 0.003, 0.004]} | {"units": "mm"},
+    # would ideally want a 6th colour on the high end of the colour scale, but somehow 
+    # matplotlib does not do that -> ?
+
     # MAE:
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
@@ -67,7 +69,8 @@ def _fallback():
     "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 3.1, step = 0.5)} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
-    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "mm"}, 
+    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.001, 0.0015, 0.002, 0.003, 0.004]} | {"units": "mm"}, 
+    # the levels for precip are a bit on the bright side, but still worth keeping consistent with RMSE. 
 
     # Bias:
     # diverging colour scheme for the Bias to reflect the nature of the data (can be positive or negative, symmetric).
@@ -78,7 +81,7 @@ def _fallback():
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
-    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 11)} | {"units": "mm"}
+    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 9), "levels": [-0.001, -0.0005, -0.00025, -0.0001, 0.0001, 0.00025, 0.0005, 0.001]} | {"units": "mm"}
 }
 
 CMAP_DEFAULTS = defaultdict(_fallback, _CMAP_DEFAULTS)

From a4a5607fb4454b382dc3ffe58adbd9cb67a38b07 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 17 Feb 2026 12:14:42 +0100
Subject: [PATCH 47/70] Colour Levels for Dew-Point Temperature

and similar style for MAE and RMSE of T2m.
---
 src/plotting/colormap_defaults.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index bda0d86c..612265b9 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -54,8 +54,8 @@ def _fallback():
     # RMSE:
     "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 3.1, step = 0.5)} | {"units": "°C"},
+    "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
+    "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.001, 0.0015, 0.002, 0.003, 0.004]} | {"units": "mm"},
@@ -65,8 +65,8 @@ def _fallback():
     # MAE:
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "°C"},
-    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": np.arange(start = 0, stop = 3.1, step = 0.5)} | {"units": "°C"},
+    "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
+    "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.001, 0.0015, 0.002, 0.003, 0.004]} | {"units": "mm"}, 
@@ -77,7 +77,7 @@ def _fallback():
     # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
-    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "°C"},
+    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},

From f245ea38785ad2901746dfd4baf68051d26c9484 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 19 Feb 2026 15:46:58 +0100
Subject: [PATCH 48/70] Calculate and Evaluate Wind Speed too

---
 src/plotting/colormap_defaults.py     |  3 +++
 workflow/Snakefile                    |  4 ++--
 workflow/scripts/verif_single_init.py | 11 +++++++++--
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 612265b9..a9cd0b3d 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -54,6 +54,7 @@ def _fallback():
     # RMSE:
     "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "WS_10M.RMSE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
@@ -65,6 +66,7 @@ def _fallback():
     # MAE:
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "WS_10M.MAE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
@@ -77,6 +79,7 @@ def _fallback():
     # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
+    "WS_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 51d13b41..a2989eb5 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -57,7 +57,7 @@ rule experiment_all:
             run_id=collect_all_candidates(),
             leadtime=[6],
             metric=["BIAS"],
-            param=["T_2M"],
+            param=["WS_10M"],
             region=["centraleurope", "switzerland"], 
             season=["all", "DJF", "MAM", "JJA", "SON"], 
             experiment=EXPERIMENT_HASH
@@ -67,7 +67,7 @@ rule experiment_all:
             baseline_id=collect_all_baselines(),
             leadtime=[6],
             metric=["BIAS"],
-            param=["T_2M"],
+            param=["WS_10M"],
             region=["centraleurope", "switzerland"], 
             season=["all", "DJF", "MAM", "JJA", "SON"], 
             experiment=EXPERIMENT_HASH
diff --git a/workflow/scripts/verif_single_init.py b/workflow/scripts/verif_single_init.py
index 52d9e08f..44cb2ddb 100644
--- a/workflow/scripts/verif_single_init.py
+++ b/workflow/scripts/verif_single_init.py
@@ -111,8 +111,15 @@ def main(args: ScriptConfig):
         analysis,
     )
 
-    # compute metrics and statistics
+    # before verifying, calculate wind speed:
+    for ds in [fcst, analysis]:
+        if "U_10M" in ds and "V_10M" in ds:
+            LOG.info("Calculating Wind Speed (WS_10M)...")
+            ds["WS_10M"] = (ds["U_10M"]**2 + ds["V_10M"]**2)**0.5
+            # Optional: Add metadata for the netCDF output
+            ds["WS_10M"].attrs = {"units": "m/s", "long_name": "10m Wind Speed"}
 
+    # compute metrics and statistics
     results = verify(fcst, analysis, args.label, args.analysis_label, args.regions)
     LOG.info("Verification results:\n%s", results)
 
@@ -150,7 +157,7 @@ def main(args: ScriptConfig):
     parser.add_argument(
         "--params",
         type=lambda x: x.split(","),
-        default=["T_2M", "TD_2M", "U_10M", "V_10M", "PS", "PMSL", "TOT_PREC"],
+        default=["T_2M", "TD_2M", "U_10M", "V_10M", "WS_10M", "PS", "PMSL", "TOT_PREC"],
     )
     parser.add_argument(
         "--steps",

From ccea8dbca6de9b20c978d8ed6f75fbe0b625a1e2 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 19 Feb 2026 16:55:34 +0100
Subject: [PATCH 49/70] Fix for Wind speed calculation

Now works!
---
 workflow/scripts/verif_single_init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/scripts/verif_single_init.py b/workflow/scripts/verif_single_init.py
index 44cb2ddb..27b9cbf6 100644
--- a/workflow/scripts/verif_single_init.py
+++ b/workflow/scripts/verif_single_init.py
@@ -157,7 +157,7 @@ def main(args: ScriptConfig):
     parser.add_argument(
         "--params",
         type=lambda x: x.split(","),
-        default=["T_2M", "TD_2M", "U_10M", "V_10M", "WS_10M", "PS", "PMSL", "TOT_PREC"],
+        default=["T_2M", "TD_2M", "U_10M", "V_10M", "PS", "PMSL", "TOT_PREC"],
     )
     parser.add_argument(
         "--steps",

From ce4242fd5cc14b1b8e0426b4cd773fc240ff675c Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 23 Feb 2026 14:39:12 +0100
Subject: [PATCH 50/70] Name Wind Speed consistent with the previous def.

---
 src/plotting/colormap_defaults.py     | 6 +++---
 workflow/Snakefile                    | 4 ++--
 workflow/scripts/verif_single_init.py | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index a9cd0b3d..06790652 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -54,7 +54,7 @@ def _fallback():
     # RMSE:
     "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "WS_10M.RMSE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "SP_10M.RMSE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
@@ -66,7 +66,7 @@ def _fallback():
     # MAE:
     "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "WS_10M.MAE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "SP_10M.MAE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
@@ -79,7 +79,7 @@ def _fallback():
     # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
     "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
     "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
-    "WS_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
+    "SP_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
diff --git a/workflow/Snakefile b/workflow/Snakefile
index a2989eb5..aa1ab24a 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -57,7 +57,7 @@ rule experiment_all:
             run_id=collect_all_candidates(),
             leadtime=[6],
             metric=["BIAS"],
-            param=["WS_10M"],
+            param=["SP_10M"],
             region=["centraleurope", "switzerland"], 
             season=["all", "DJF", "MAM", "JJA", "SON"], 
             experiment=EXPERIMENT_HASH
@@ -67,7 +67,7 @@ rule experiment_all:
             baseline_id=collect_all_baselines(),
             leadtime=[6],
             metric=["BIAS"],
-            param=["WS_10M"],
+            param=["SP_10M"],
             region=["centraleurope", "switzerland"], 
             season=["all", "DJF", "MAM", "JJA", "SON"], 
             experiment=EXPERIMENT_HASH
diff --git a/workflow/scripts/verif_single_init.py b/workflow/scripts/verif_single_init.py
index 27b9cbf6..dae6020a 100644
--- a/workflow/scripts/verif_single_init.py
+++ b/workflow/scripts/verif_single_init.py
@@ -114,10 +114,10 @@ def main(args: ScriptConfig):
     # before verifying, calculate wind speed:
     for ds in [fcst, analysis]:
         if "U_10M" in ds and "V_10M" in ds:
-            LOG.info("Calculating Wind Speed (WS_10M)...")
-            ds["WS_10M"] = (ds["U_10M"]**2 + ds["V_10M"]**2)**0.5
+            LOG.info("Calculating Wind Speed (SP_10M)...")
+            ds["SP_10M"] = (ds["U_10M"]**2 + ds["V_10M"]**2)**0.5
             # Optional: Add metadata for the netCDF output
-            ds["WS_10M"].attrs = {"units": "m/s", "long_name": "10m Wind Speed"}
+            ds["SP_10M"].attrs = {"units": "m/s", "long_name": "10m Wind Speed"}
 
     # compute metrics and statistics
     results = verify(fcst, analysis, args.label, args.analysis_label, args.regions)

From 9628fae72db3f287faa9ce3b5bb06ee37fa0a4f7 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 24 Feb 2026 10:09:27 +0100
Subject: [PATCH 51/70] Verification files not temporary any more

Some SLURM optimizations.
---
 workflow/rules/verif.smk | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 0566ccf2..4d60045d 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -27,13 +27,14 @@ rule verif_metrics_baseline:
         analysis_label=config["analysis"].get("label"),
         regions=REGION_TXT,
     output:
-        temp(OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc"),
+        OUT_ROOT / "data/baselines/{baseline_id}/{init_time}/verif.nc",
     log:
         OUT_ROOT / "logs/verif_metrics_baseline/{baseline_id}-{init_time}.log",
     resources:
         cpus_per_task=24,
         mem_mb=50_000,
         runtime="60m",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
     shell:
         """
         uv run {input.script} \
@@ -63,7 +64,7 @@ rule verif_metrics:
         inference_okfile=rules.execute_inference.output.okfile,
         analysis_zarr=config["analysis"].get("analysis_zarr"),
     output:
-        temp(OUT_ROOT / "data/runs/{run_id}/{init_time}/verif.nc"),
+        OUT_ROOT / "data/runs/{run_id}/{init_time}/verif.nc",
     # wildcard_constraints:
     # run_id="^" # to avoid ambiguitiy with run_baseline_verif
     # TODO: implement logic to use experiment name instead of run_id as wildcard
@@ -81,6 +82,7 @@ rule verif_metrics:
         cpus_per_task=24,
         mem_mb=50_000,
         runtime="60m",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
     shell:
         """
         uv run {input.script} \
@@ -117,7 +119,8 @@ rule verif_metrics_aggregation:
     resources:
         cpus_per_task=24,
         mem_mb=250_000,
-        runtime="2h",
+        runtime="24h",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
     shell:
         """
         uv run {input.script} {input.verif_nc} --output {output} > {log} 2>&1

From 59d4b12a1369d33ed764550e242ad6f78d7f2464 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 26 Feb 2026 13:53:10 +0100
Subject: [PATCH 52/70] Precipitation plotting: meters to millimeters.

---
 src/plotting/colormap_defaults.py             |  6 +-
 workflow/scripts/plot_summary_stat_maps.mo.py | 78 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 06790652..4fc652e4 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -59,7 +59,7 @@ def _fallback():
     "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
-    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.001, 0.0015, 0.002, 0.003, 0.004]} | {"units": "mm"},
+    "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 1.5, 2, 3, 4]} | {"units": "mm"},
     # would ideally want a 6th colour on the high end of the colour scale, but somehow 
     # matplotlib does not do that -> ?
 
@@ -71,7 +71,7 @@ def _fallback():
     "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
     "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
-    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.001, 0.0015, 0.002, 0.003, 0.004]} | {"units": "mm"}, 
+    "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 1.5, 2, 3, 4]} | {"units": "mm"}, 
     # the levels for precip are a bit on the bright side, but still worth keeping consistent with RMSE. 
 
     # Bias:
@@ -84,7 +84,7 @@ def _fallback():
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
-    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 9), "levels": [-0.001, -0.0005, -0.00025, -0.0001, 0.0001, 0.00025, 0.0005, 0.001]} | {"units": "mm"}
+    "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 9), "levels": [-1, -0.5, -0.25, -0.1, 0.1, 0.25, 0.5, 1]} | {"units": "mm"}
 }
 
 CMAP_DEFAULTS = defaultdict(_fallback, _CMAP_DEFAULTS)
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 4ff57749..0db54b3f 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -125,6 +125,76 @@ def get_style(param, metric, units_override=None):
         }
     return (get_style,)
 
+# @app.cell
+# def _(LOG, np):
+#     """Preprocess fields with pint-based unit conversion and derived quantities."""
+#     try:
+#         import pint  # type: ignore
+
+#         _ureg = pint.UnitRegistry()
+
+#         def _k_to_c(arr):
+#             # robust conversion with pint, fallback if dtype unsupported
+#             try:
+#                 return (_ureg.Quantity(arr, _ureg.kelvin).to(_ureg.degC)).magnitude
+#             except Exception:
+#                 return arr - 273.15
+
+#         def _ms_to_knots(arr):
+#             # robust conversion with pint, fallback if dtype unsupported
+#             try:
+#                 return (
+#                     _ureg.Quantity(arr, _ureg.meter / _ureg.second).to(_ureg.knot)
+#                 ).magnitude
+#             except Exception:
+#                 return arr * 1.943844
+
+#         def _m_to_mm(arr):
+#             # robust conversion with pint, fallback if dtype unsupported
+#             try:
+#                 return (_ureg.Quantity(arr, _ureg.meter).to(_ureg.millimeter)).magnitude
+#             except Exception:
+#                 return arr * 1000
+
+#     except Exception:
+#         LOG.warning("pint not available; falling back hardcoded conversions")
+
+#         def _k_to_c(arr):
+#             return arr - 273.15
+
+#         def _ms_to_knots(arr):
+#             return arr * 1.943844
+
+#         def _m_to_mm(arr):
+#             return arr * 1000
+
+#     def preprocess_field(param: str, state: dict):
+#         """
+#         - Temperatures: K -> °C
+#         - Wind speed: sqrt(u^2 + v^2)
+#         - Precipitation: m -> mm
+#         Returns: (field_array, units_override or None)
+#         """
+#         fields = state["fields"]
+#         # temperature variables
+#         if param in ("T_2M", "TD_2M", "T", "TD"):
+#             return _k_to_c(fields[param]), "°C"
+#         # 10m wind speed (allow legacy 'uv' alias)
+#         if param == "SP_10M":
+#             u = fields["U_10M"]
+#             v = fields["V_10M"]
+#             return np.sqrt(u**2 + v**2), "m/s"
+#         # wind speed from standard-level components
+#         if param == "SP":
+#             u = fields["U"]
+#             v = fields["V"]
+#             return np.sqrt(u**2 + v**2), "m/s"
+#         if param == "TOT_PREC":
+#             return _m_to_mm(fields[param]), "mm"
+#         # default: passthrough
+#         return fields[param], None
+
+#     return (preprocess_field,)
 
 @app.cell
 def _(
@@ -162,7 +232,13 @@ def _(
     # # preprocess field (unit conversion, derived quantities)
     # field, units_override = preprocess_field(param, state)
 
-    plotter.plot_field(subplot, ds.values.ravel(), **get_style(var, metric))
+    # Quick fix for precipitation (might have to use preprocess_field in the end (see above))
+    if param == "TOT_PREC":
+        plot_vals = ds.values.ravel()*1000
+    else:
+        plot_vals = ds.values.ravel()
+
+    plotter.plot_field(subplot, plot_vals, **get_style(var, metric))
     # subplot.ax.add_geometries(
     #     state["lam_envelope"],
     #     edgecolor="black",

From 28ad4c5c343014109b4bd546e1d6ceeb04e59de4 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 26 Feb 2026 13:54:31 +0100
Subject: [PATCH 53/70] Map plotting not on default busy nodes.

Change back (clean up in the end)
---
 workflow/rules/plot.smk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 10f85c47..23b049d7 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -82,6 +82,7 @@ rule plot_summary_stat_maps:
         slurm_partition="postproc",
         cpus_per_task=1,
         runtime="10m",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
     params:
         nc_out_dir=lambda wc: (
             Path(OUT_ROOT) / f"data/runs/{wc.run_id}/verif_aggregated.nc"

From db60db5feded23a055c7257806c7fde374c1ce4b Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 26 Feb 2026 15:19:20 +0100
Subject: [PATCH 54/70] Preliminary Colour Levels for Wind Bias.

Might have to slightly adjust for ICON (emulator).
---
 src/plotting/colormap_defaults.py             | 10 +++++-----
 workflow/scripts/plot_summary_stat_maps.mo.py |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 4fc652e4..b46fde95 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -77,11 +77,11 @@ def _fallback():
     # Bias:
     # diverging colour scheme for the Bias to reflect the nature of the data (can be positive or negative, symmetric).
     # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
-    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"}, 
-    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
-    "SP_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "m/s"},
-    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
-    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels" : np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
+    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"}, 
+    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
+    "SP_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
+    "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
+    "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 9), "levels": [-1, -0.5, -0.25, -0.1, 0.1, 0.25, 0.5, 1]} | {"units": "mm"}
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 0db54b3f..728f49db 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -233,6 +233,8 @@ def _(
     # field, units_override = preprocess_field(param, state)
 
     # Quick fix for precipitation (might have to use preprocess_field in the end (see above))
+    # for wind speed, preprocess_field only has conversion from m/s to knots 
+    # (not the other way around), so I assume the values are in m/s
     if param == "TOT_PREC":
         plot_vals = ds.values.ravel()*1000
     else:

From 2fcfd8a339050a69f02d93c5dc44cdc729064bbb Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 26 Feb 2026 15:56:52 +0100
Subject: [PATCH 55/70] Replace EXPERIMENT_HASH by EXPERIMENT_NAME

Solves a Problem that occurred due to the merge.
---
 workflow/Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 695abc77..2c11ecb9 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -134,7 +134,7 @@ rule experiment_all:
             param=["SP_10M"],
             region=["centraleurope", "switzerland"], 
             season=["all", "DJF", "MAM", "JJA", "SON"], 
-            experiment=EXPERIMENT_HASH
+            experiment=EXPERIMENT_NAME
         ), 
         expand(
             OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
@@ -144,7 +144,7 @@ rule experiment_all:
             param=["SP_10M"],
             region=["centraleurope", "switzerland"], 
             season=["all", "DJF", "MAM", "JJA", "SON"], 
-            experiment=EXPERIMENT_HASH
+            experiment=EXPERIMENT_NAME
         )
 
 

From c00c5a074444fa29201090ddeb615c0b51b16491 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 2 Mar 2026 11:34:47 +0100
Subject: [PATCH 56/70] Colour Levels for Pressure Bias.

---
 src/plotting/colormap_defaults.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index b46fde95..21bde983 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -82,8 +82,8 @@ def _fallback():
     "SP_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
-    "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
-    "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11)} | {"units": "Pa"},
+    "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -110, stop = 111, step = 20)} | {"units": "Pa"},
+    "PS.BIAS.spatial":       {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -110, stop = 111, step = 20)} | {"units": "Pa"},
     "TOT_PREC.BIAS.spatial": {"cmap": plt.get_cmap("BrBG", 9), "levels": [-1, -0.5, -0.25, -0.1, 0.1, 0.25, 0.5, 1]} | {"units": "mm"}
 }
 

From 23d8c9c55a8bf4d68dc0057b25050bbc0a11f399 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 9 Mar 2026 16:56:31 +0100
Subject: [PATCH 57/70] Colour Levels for Pressure MAE and RMSE

---
 src/plotting/colormap_defaults.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index 21bde983..a555d1aa 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -57,8 +57,8 @@ def _fallback():
     "SP_10M.RMSE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
-    "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
-    "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
+    "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 7), "levels": [0, 50, 100, 150, 200, 250, 300, 350]} | {"units": "Pa"},
+    "PS.RMSE.spatial":       {"cmap": plt.get_cmap("Reds", 7), "levels": [0, 50, 100, 150, 200, 250, 300, 350]} | {"units": "Pa"},
     "TOT_PREC.RMSE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 1.5, 2, 3, 4]} | {"units": "mm"},
     # would ideally want a 6th colour on the high end of the colour scale, but somehow 
     # matplotlib does not do that -> ?
@@ -69,17 +69,17 @@ def _fallback():
     "SP_10M.MAE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
-    "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
-    "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "Pa"},
+    "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 7), "levels": [0, 50, 100, 150, 200, 250, 300, 350]} | {"units": "Pa"},
+    "PS.MAE.spatial":       {"cmap": plt.get_cmap("Reds", 7), "levels": [0, 50, 100, 150, 200, 250, 300, 350]} | {"units": "Pa"},
     "TOT_PREC.MAE.spatial": {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 1, 1.5, 2, 3, 4]} | {"units": "mm"}, 
     # the levels for precip are a bit on the bright side, but still worth keeping consistent with RMSE. 
 
     # Bias:
     # diverging colour scheme for the Bias to reflect the nature of the data (can be positive or negative, symmetric).
     # Red-Blue colour scheme for all variables except precipitation, where a Brown-Green scheme is more suggestive.
-    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"}, 
-    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
-    "SP_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
+    "U_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r",  9), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"}, 
+    "V_10M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r",  9), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
+    "SP_10M.BIAS.spatial":   {"cmap": plt.get_cmap("RdBu_r",  9), "levels": np.arange(start = -2.25, stop = 2.26, step = 0.5)} | {"units": "m/s"},
     "TD_2M.BIAS.spatial":    {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "T_2M.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -2.75, stop = 2.76, step = 0.5)} | {"units": "°C"},
     "PMSL.BIAS.spatial":     {"cmap": plt.get_cmap("RdBu_r", 11), "levels": np.arange(start = -110, stop = 111, step = 20)} | {"units": "Pa"},

From 84edd34a6c4a994ecd123ca07881655fc53f8b49 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 10 Mar 2026 17:18:26 +0100
Subject: [PATCH 58/70] Colour Levels for 10m Wind MAE and RMSE

---
 src/plotting/colormap_defaults.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/plotting/colormap_defaults.py b/src/plotting/colormap_defaults.py
index a555d1aa..7ad4878a 100644
--- a/src/plotting/colormap_defaults.py
+++ b/src/plotting/colormap_defaults.py
@@ -52,9 +52,9 @@ def _fallback():
     # always start at 0 so that the saturation of the colour corresponds to the error magnitude.
 
     # RMSE:
-    "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "SP_10M.RMSE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "U_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "m/s"},
+    "V_10M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "m/s"},
+    "SP_10M.RMSE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "m/s"},
     "TD_2M.RMSE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.RMSE.spatial":     {"cmap": plt.get_cmap("Reds", 7), "levels": [0, 50, 100, 150, 200, 250, 300, 350]} | {"units": "Pa"},
@@ -64,9 +64,9 @@ def _fallback():
     # matplotlib does not do that -> ?
 
     # MAE:
-    "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
-    "SP_10M.MAE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "vmin": 0} | {"units": "m/s"},
+    "U_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "m/s"},
+    "V_10M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "m/s"},
+    "SP_10M.MAE.spatial":   {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "m/s"},
     "TD_2M.MAE.spatial":    {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "T_2M.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 6), "levels": [0, 0.5, 1, 1.5, 2, 2.5, 3]} | {"units": "°C"},
     "PMSL.MAE.spatial":     {"cmap": plt.get_cmap("Reds", 7), "levels": [0, 50, 100, 150, 200, 250, 300, 350]} | {"units": "Pa"},

From ab36af25739a221cd075b180ab50484044e4e42a Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 20 Mar 2026 12:57:14 +0100
Subject: [PATCH 59/70] Bug fix

---
 src/verification/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index bded3022..87cb33b3 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -78,8 +78,7 @@ def _compute_scores(
     dim: list[str],
     prefix="",
     suffix="",
-    source="",
-    dim=["x", "y"],
+    source=""
 ) -> xr.Dataset:
     """
     Compute basic verification metrics between two xarray DataArrays (fcst and obs).

From 7dc82dc3a5aad0b1941c531c5737f6decc360466 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 20 Mar 2026 13:46:45 +0100
Subject: [PATCH 60/70] Another Bug Fix.

---
 src/verification/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index 87cb33b3..2d2fb601 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -115,7 +115,6 @@ def _compute_statistics(
     prefix="",
     suffix="",
     source="",
-    dim=["x", "y"],
 ) -> xr.Dataset:
     """
     Compute basic statistics of a xarray DataArray (data).

From d33f776bdc0ea9be71c8efface1f5d828f435ff2 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Fri, 20 Mar 2026 18:33:50 +0100
Subject: [PATCH 61/70] More Bug fixes.

---
 workflow/scripts/plot_summary_stat_maps.mo.py | 4 ++--
 workflow/scripts/verif_single_init.py         | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 728f49db..48b6cb33 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -215,8 +215,8 @@ def _(
     import matplotlib.pyplot as plt
 
     plotter = StatePlotter(
-        ds["longitude"].values.ravel(),
-        ds["latitude"].values.ravel(),
+        ds["lon"].values.ravel(),
+        ds["lat"].values.ravel(),
         outfn.parent,
     )
     fig = plotter.init_geoaxes(
diff --git a/workflow/scripts/verif_single_init.py b/workflow/scripts/verif_single_init.py
index f0ecdbd5..6b64c85b 100644
--- a/workflow/scripts/verif_single_init.py
+++ b/workflow/scripts/verif_single_init.py
@@ -72,7 +72,7 @@ def main(args: ScriptConfig):
     truth = truth.sel(time=fcst.time)
 
     # before verifying, calculate wind speed:
-    for ds in [fcst, analysis]:
+    for ds in [fcst, truth]:
         if "U_10M" in ds and "V_10M" in ds:
             LOG.info("Calculating Wind Speed (SP_10M)...")
             ds["SP_10M"] = (ds["U_10M"]**2 + ds["V_10M"]**2)**0.5

From 8aa43f7ee43f65cccc988449a9ce8235103d01d4 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Mon, 30 Mar 2026 17:57:55 +0200
Subject: [PATCH 62/70] Separate spatial verification files

The verification files (verif.nc) do now not contain spatial information
any more. Instead, this information is read from the forecast and truth
data (grib / zarr), aggregated over forecasts and written into a
verif_spatial directory for every run. This has the following advantages:

- The spatial information (redundant because it already pre-exists in
  .grib and .zarr files) is now not unnecessarily written to disk.
- The spatial verification pipeline is now separated to some degree
  from the normal one, meaning that the normal one could be run without
  the heavier spatial one.
- aggregation over forecasts now needs to be performed only for the
  parameters and lead times for which it is actually requested, instead
  of for all. This speeds up computations when only a specific subset of
  maps is of interest.

Co-authored-by: Francesco Zanetta <francesco.zanetta@meteoswiss.ch>
---
 src/verification/__init__.py                  |   9 +-
 workflow/Snakefile                            |  30 +-
 workflow/rules/plot.smk                       |  14 +-
 workflow/rules/verif.smk                      |  37 ++
 workflow/scripts/plot_summary_stat_maps.mo.py |  10 +-
 workflow/scripts/verif_spatial.py             | 363 ++++++++++++++++++
 6 files changed, 425 insertions(+), 38 deletions(-)
 create mode 100644 workflow/scripts/verif_spatial.py

diff --git a/src/verification/__init__.py b/src/verification/__init__.py
index 2d2fb601..2a1f0c37 100644
--- a/src/verification/__init__.py
+++ b/src/verification/__init__.py
@@ -226,16 +226,9 @@ def verify(
         score = xr.concat(score, dim="region")
         fcst_statistics = xr.concat(fcst_statistics, dim="region")
         obs_statistics = xr.concat(obs_statistics, dim="region")
-        score_spatial = _compute_scores(
-            fcst_aligned[param],
-            obs_aligned[param],
-            prefix=param + ".",
-            suffix=".spatial",
-            dim=[],
-        )
         statistics.append(xr.concat([fcst_statistics, obs_statistics], dim="source"))
         scores.append(
-            xr.merge([score, score_spatial], join="outer", compat="no_conflicts")
+            xr.merge([score], join="outer", compat="no_conflicts")
         )
 
     scores = _merge_metrics(scores)
diff --git a/workflow/Snakefile b/workflow/Snakefile
index 2c11ecb9..edd220ce 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -127,25 +127,25 @@ rule experiment_all:
             experiment=EXPERIMENT_NAME,
         ),
         expand(
-            OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
+            rules.plot_summary_stat_maps.output,
             run_id=collect_all_candidates(),
             leadtime=[6],
             metric=["BIAS"],
             param=["SP_10M"],
-            region=["centraleurope", "switzerland"], 
-            season=["all", "DJF", "MAM", "JJA", "SON"], 
-            experiment=EXPERIMENT_NAME
-        ), 
-        expand(
-            OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
-            baseline_id=collect_all_baselines(),
-            leadtime=[6],
-            metric=["BIAS"],
-            param=["SP_10M"],
-            region=["centraleurope", "switzerland"], 
-            season=["all", "DJF", "MAM", "JJA", "SON"], 
-            experiment=EXPERIMENT_NAME
-        )
+            region=["centraleurope", "switzerland"],
+            season=["all", "DJF", "MAM", "JJA", "SON"],
+            experiment=EXPERIMENT_NAME,
+        ),
+        # expand(
+        #     OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
+        #     baseline_id=collect_all_baselines(),
+        #     leadtime=[6],
+        #     metric=["BIAS"],
+        #     param=["SP_10M"],
+        #     region=["centraleurope", "switzerland"], 
+        #     season=["all", "DJF", "MAM", "JJA", "SON"], 
+        #     experiment=EXPERIMENT_NAME
+        # )
 
 
 rule showcase_all:
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index 14e2e4fc..ffe543b0 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -145,7 +145,7 @@ rule plot_summary_stat_maps:
     localrule: True
     input:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
-        verif_file=OUT_ROOT / "data/runs/{run_id}/verif_aggregated.nc",
+        verif_file=OUT_ROOT / "data/runs/{run_id}/verif_spatial/{param}_{leadtime}.nc",
     output:
         OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
     wildcard_constraints:
@@ -155,24 +155,18 @@ rule plot_summary_stat_maps:
         cpus_per_task=1,
         runtime="10m",
         slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
-    params:
-        nc_out_dir=lambda wc: (
-            Path(OUT_ROOT) / f"data/runs/{wc.run_id}/verif_aggregated.nc"
-            # not sure how to do this, because the baselines are in, e.g., output/data/baselines/COSMO-E/verif_aggregated.nc
-            # and the runs are in output/data/runs/runID/verif_aggregated.nc
-        ).resolve(),
     shell:
         """
         export ECCODES_DEFINITION_PATH=$(realpath .venv/share/eccodes-cosmo-resources/definitions)
-        python {input.script} \
+        uv run python {input.script} \
             --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
             --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
-            --season {wildcards.season} \
+            --season {wildcards.season}
         # interactive editing (needs to set localrule: True and use only one core)
         # marimo edit {input.script} -- \
         #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
         #     --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
-        #     --season {wildcards.season} \
+        #     --season {wildcards.season}
         """
 
 use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 03ca095a..13a2970f 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -96,6 +96,43 @@ rule verif_metrics:
             --output {output} > {log} 2>&1
         """
 
+rule verif_metrics_spatial:
+    input:
+        "src/verification/__init__.py",
+        "src/data_input/__init__.py",
+        script="workflow/scripts/verif_spatial.py",
+        inference_okfiles=lambda wc: expand(
+            rules.execute_inference.output.okfile,
+            init_time=_restrict_reftimes_to_hours(REFTIMES),
+            allow_missing=True,
+        ),
+        truth=config["truth"]["root"],
+    output:
+        OUT_ROOT / "data/runs/{run_id}/verif_spatial/{param}_{leadtime}.nc",
+    # wildcard_constraints:
+    # run_id="^" # to avoid ambiguitiy with run_baseline_verif
+    # TODO: implement logic to use experiment name instead of run_id as wildcard
+    params:
+        fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"),
+        fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
+        truth_label=config["truth"]["label"],
+    log:
+        OUT_ROOT / "logs/verif_metrics_spatial/{run_id}-{param}-{leadtime}.log",
+    resources:
+        cpus_per_task=24,
+        mem_mb=50_000,
+        runtime="60m",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
+    shell:
+        """
+        uv run workflow/scripts/verif_spatial.py \
+            --run_root output/data/runs/{wildcards.run_id} \
+            --truth {input.truth} \
+            --step {wildcards.leadtime} \
+            --param {wildcards.param} \
+            --output {output} > {log} 2>&1
+        """
+
 
 def _restrict_reftimes_to_hours(reftimes, hours=None):
     """Restrict the reference times to specific hours."""
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 48b6cb33..fa4adc82 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -92,10 +92,10 @@ def _(ArgumentParser, Path, np):
 
 
 @app.cell
-def _(init_hour, lead_time, metric, param, season, verif_file, xr):
+def _(metric, param, verif_file, xr):
     ds = xr.open_dataset(verif_file)
-    var = f"{param}.{metric}.spatial"
-    ds = ds[var].sel(init_hour=init_hour, lead_time=lead_time, season=season)
+    var = f"{param}.{metric}"
+    ds = ds[var]
     ds
     return ds, var
 
@@ -107,7 +107,7 @@ def get_style(param, metric, units_override=None):
         still needs to be passed as arguments to tripcolor()/tricontourf().
         """
         metric_key = f"{param}.{metric}.spatial"
-        cfg = CMAP_DEFAULTS[metric_key] if metric_key in CMAP_DEFAULTS else CMAP_DEFAULTS[param]
+        cfg = CMAP_DEFAULTS[metric_key] if metric_key in CMAP_DEFAULTS else CMAP_DEFAULTS.get(param, {})
         units = units_override if units_override is not None else cfg.get("units", "")
         return {
             "style": ekp.styles.Style(
@@ -240,7 +240,7 @@ def _(
     else:
         plot_vals = ds.values.ravel()
 
-    plotter.plot_field(subplot, plot_vals, **get_style(var, metric))
+    plotter.plot_field(subplot, plot_vals, **get_style(param, metric))
     # subplot.ax.add_geometries(
     #     state["lam_envelope"],
     #     edgecolor="black",
diff --git a/workflow/scripts/verif_spatial.py b/workflow/scripts/verif_spatial.py
new file mode 100644
index 00000000..58cb2624
--- /dev/null
+++ b/workflow/scripts/verif_spatial.py
@@ -0,0 +1,363 @@
+"""Compute spatial maps of temporally-aggregated forecast errors.
+
+For a fixed lead time and variable, iterates over all initialisation times found
+under a run directory, loads the corresponding GRIB forecast field and the
+matching truth slice from a reference zarr, maps the forecast onto the truth
+grid, and accumulates running error statistics without ever holding the full
+time series in memory.  The final BIAS / RMSE / MAE maps are written to a
+NetCDF file.
+
+Usage
+-----
+    uv run workflow/scripts/verif_spatial.py \\
+        output/data/runs/<run_id> \\
+        --truth /path/to/truth.zarr \\
+        --step 24 \\
+        --param T_2M
+"""
+
+import logging
+from argparse import ArgumentParser, Namespace
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import numpy as np
+import xarray as xr
+
+from data_input import load_fct_data_from_grib
+from verification.spatial import map_forecast_to_truth
+
+LOG = logging.getLogger(__name__)
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+
+DATETIME_FMT = "%Y%m%d%H%M"
+
+# Maps from standard parameter names to zarr variable names.
+# COSMO-2e zarrs use short CF names; COSMO-1e zarrs keep the COSMO names.
+_PARAMS_MAP_CO2 = {
+    "T_2M": "2t",
+    "TD_2M": "2d",
+    "U_10M": "10u",
+    "V_10M": "10v",
+    "PS": "sp",
+    "PMSL": "msl",
+    "TOT_PREC": "tp",
+}
+_PARAMS_MAP_CO1 = {k: k.replace("TOT_PREC", "TOT_PREC_6H") for k in _PARAMS_MAP_CO2}
+
+# Derived variables and the components they require.
+_DERIVED = {
+    "SP_10M": ("U_10M", "V_10M"),
+}
+
+
+def _params_map(truth_root: Path) -> dict[str, str]:
+    return _PARAMS_MAP_CO2 if "co2" in truth_root.name else _PARAMS_MAP_CO1
+
+
+def _compute_derived(ds: xr.Dataset, param: str) -> xr.DataArray:
+    """Compute a derived variable from its components already present in *ds*."""
+    if param == "SP_10M":
+        return (ds["U_10M"] ** 2 + ds["V_10M"] ** 2) ** 0.5
+    raise ValueError(f"No recipe for derived variable '{param}'")
+
+
+# ---------------------------------------------------------------------------
+# Truth loading
+# ---------------------------------------------------------------------------
+
+def _open_zarr_component(root: Path, param: str) -> xr.DataArray:
+    """Open a single native zarr variable lazily as a DataArray."""
+    zarr_param = _params_map(root)[param]
+
+    ds = xr.open_zarr(root, consolidated=False)
+    ds = ds.set_index(time="dates")
+
+    # Extract lat/lon before selecting on variable (they live on cell only).
+    spatial_dim = "cell"
+    lat = ds["latitudes"] if "latitudes" in ds else None
+    lon = ds["longitudes"] if "longitudes" in ds else None
+
+    ds = ds.assign_coords(variable=ds.attrs["variables"])
+    ds = ds.sel(variable=zarr_param).squeeze("ensemble", drop=True)
+
+    # Recover 2-D spatial shape when stored as a flat cell dimension.
+    if len(ds.attrs["field_shape"]) == 2:
+        ny, nx = ds.attrs["field_shape"]
+        y_idx, x_idx = np.unravel_index(np.arange(ny * nx), (ny, nx))
+        ds = ds.assign_coords(y=(spatial_dim, y_idx), x=(spatial_dim, x_idx))
+        ds = ds.set_index(**{spatial_dim: ("y", "x")}).unstack(spatial_dim)
+        spatial_dim = None  # now (y, x)
+
+    da = ds["data"].rename(param).drop_vars("variable", errors="ignore")
+
+    # Attach lat/lon as coordinates on the spatial dimension(s).
+    if lat is not None and lon is not None:
+        if spatial_dim is not None:
+            # flat 1-D case: cell/values dim
+            da = da.assign_coords(lat=(spatial_dim, lat.values), lon=(spatial_dim, lon.values))
+        else:
+            # 2-D case: lat/lon still on original flat index — attach via unstack
+            da = da.assign_coords(lat=(["y", "x"], lat.values.reshape(ny, nx)),
+                                  lon=(["y", "x"], lon.values.reshape(ny, nx)))
+
+    return da
+
+
+def open_truth_zarr(root: Path, param: str) -> xr.DataArray:
+    """Open the truth zarr lazily and return a DataArray for *param*.
+
+    For derived variables (e.g. SP_10M) the required components are loaded and
+    the derivation is applied on the fly.  The returned DataArray has dimensions
+    ``(time, y, x)`` or ``(time, values)`` and always exposes ``lat``/``lon``.
+    """
+    if param in _DERIVED:
+        components = {
+            c: _open_zarr_component(root, c).drop_vars("variable", errors="ignore")
+            for c in _DERIVED[param]
+        }
+        ds = xr.Dataset(components)
+        return _compute_derived(ds, param)
+    return _open_zarr_component(root, param)
+
+
+# ---------------------------------------------------------------------------
+# Init-time discovery
+# ---------------------------------------------------------------------------
+
+def iter_init_dirs(run_root: Path) -> list[tuple[datetime, Path]]:
+    """Return ``(reftime, grib_dir)`` pairs for every complete init time.
+
+    Expects subdirectories named ``YYYYMMDDHHMI`` directly under *run_root*.
+    GRIB files may live either directly in the init-time directory or inside a
+    ``grib/`` subdirectory.
+    """
+    result = []
+    for d in sorted(run_root.iterdir()):
+        if not d.is_dir():
+            continue
+        try:
+            reftime = datetime.strptime(d.name, DATETIME_FMT)
+        except ValueError:
+            continue
+        grib_dir = d / "grib" if (d / "grib").is_dir() else d
+        if not any(grib_dir.glob("*.grib")):
+            LOG.debug("No GRIB files in %s, skipping", grib_dir)
+            continue
+        result.append((reftime, grib_dir))
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main(args: Namespace) -> None:
+    LOG.info("=" * 60)
+    LOG.info("Spatial verification  param=%s  step=%dh", args.param, args.step)
+    LOG.info("Run root : %s", args.run_root)
+    LOG.info("Truth    : %s", args.truth)
+    LOG.info("Output   : %s", args.output)
+    LOG.info("=" * 60)
+
+    # Open the truth zarr once; individual time slices are loaded on demand.
+    truth_da = open_truth_zarr(args.truth, args.param)
+    # Normalise to datetime64[ns] so membership checks work regardless of zarr precision.
+    truth_da = truth_da.assign_coords(
+        time=truth_da.time.values.astype("datetime64[ns]")
+    )
+    # Rename flat spatial dim to 'values' if the zarr uses 'cell'.
+    if "cell" in truth_da.dims:
+        truth_da = truth_da.rename({"cell": "values"})
+    truth_times = set(truth_da.time.values)  # keep as datetime64, tolist() yields ints for ns precision
+    LOG.info("Truth opened lazily: %s", truth_da)
+
+    init_dirs = iter_init_dirs(args.run_root)
+    LOG.info("Found %d init time directories", len(init_dirs))
+
+    step_td = timedelta(hours=args.step)
+
+    # Running accumulators – initialised on the first successfully processed
+    # sample so that we can infer the spatial shape from the data.
+    accum_n: np.ndarray | None = None
+    accum_sum_e: np.ndarray | None = None
+    accum_sum_se: np.ndarray | None = None
+    accum_sum_ae: np.ndarray | None = None
+    ref_truth_slice: xr.DataArray | None = None  # kept for output coordinates
+
+    n_ok = 0
+    n_skip = 0
+
+    for reftime, grib_dir in init_dirs:
+        valid_time = np.datetime64(reftime + step_td).astype("datetime64[ns]")
+
+        if valid_time not in truth_times:
+            LOG.debug("Valid time %s not in truth, skipping %s", valid_time, reftime)
+            n_skip += 1
+            continue
+
+        LOG.info(
+            "Processing reftime=%s  valid=%s",
+            reftime.strftime(DATETIME_FMT),
+            valid_time,
+        )
+
+        # --- load forecast ---
+        grib_params = list(_DERIVED[args.param]) if args.param in _DERIVED else [args.param]
+        try:
+            fcst = load_fct_data_from_grib(
+                root=grib_dir,
+                reftime=reftime,
+                steps=[args.step],
+                params=grib_params,
+            )
+        except Exception as exc:
+            LOG.warning("Could not load GRIB for %s: %s", reftime, exc)
+            n_skip += 1
+            continue
+
+        # Drop lead_time dimension (single step requested).
+        if "lead_time" in fcst.dims:
+            fcst = fcst.sel(lead_time=np.timedelta64(args.step, "h"))
+
+        # Compute derived variable if needed.
+        if args.param in _DERIVED:
+            fcst = fcst.assign({args.param: _compute_derived(fcst, args.param)})
+
+        # --- load truth slice ---
+        truth_slice = truth_da.sel(time=valid_time).compute()
+        # For derived variables truth_da is already the derived DataArray,
+        # so wrap it in a Dataset for map_forecast_to_truth compatibility.
+        truth_ds = truth_slice.to_dataset(name=args.param) if isinstance(truth_slice, xr.DataArray) else truth_slice
+
+        # --- map forecast onto truth grid ---
+        try:
+            fcst_mapped = map_forecast_to_truth(fcst, truth_ds)
+        except Exception as exc:
+            LOG.warning("Spatial mapping failed for %s: %s", reftime, exc)
+            n_skip += 1
+            continue
+
+        fcst_param = fcst_mapped[args.param]
+        # Squeeze any ensemble/eps dimension (deterministic run stored with size-1 eps).
+        for dim in ["eps", "ensemble", "number"]:
+            if dim in fcst_param.dims and fcst_param.sizes[dim] == 1:
+                fcst_param = fcst_param.squeeze(dim, drop=True)
+        fcst_vals = fcst_param.values
+        truth_vals = truth_slice.values
+        error = fcst_vals - truth_vals  # shape: spatial dims of truth
+
+        # --- initialise accumulators ---
+        if accum_n is None:
+            accum_n = np.zeros(error.shape, dtype=np.int64)
+            accum_sum_e = np.zeros(error.shape, dtype=np.float64)
+            accum_sum_se = np.zeros(error.shape, dtype=np.float64)
+            accum_sum_ae = np.zeros(error.shape, dtype=np.float64)
+            ref_truth_slice = truth_slice
+
+        # --- accumulate (NaN-safe) ---
+        valid = ~np.isnan(error)
+        accum_n[valid] += 1
+        accum_sum_e[valid] += error[valid]
+        accum_sum_se[valid] += error[valid] ** 2
+        accum_sum_ae[valid] += np.abs(error[valid])
+        n_ok += 1
+
+    LOG.info("Finished: %d init times processed, %d skipped", n_ok, n_skip)
+
+    if n_ok == 0:
+        LOG.error("No data could be processed – no output written.")
+        return
+
+    # --- compute aggregate maps ---
+    with np.errstate(invalid="ignore", divide="ignore"):
+        bias = np.where(accum_n > 0, accum_sum_e / accum_n, np.nan).astype(np.float32)
+        rmse = np.where(accum_n > 0, np.sqrt(accum_sum_se / accum_n), np.nan).astype(
+            np.float32
+        )
+        mae = np.where(accum_n > 0, accum_sum_ae / accum_n, np.nan).astype(np.float32)
+        count = np.where(accum_n > 0, accum_n, np.nan).astype(np.float32)
+
+    # Build a spatial template: keep only spatial dims and lat/lon coords,
+    # dropping scalar coords like time (added by .sel).
+    spatial_coords = {
+        c: ref_truth_slice[c]
+        for c in ref_truth_slice.coords
+        if set(ref_truth_slice[c].dims).issubset(set(ref_truth_slice.dims))
+        and c != "time"
+    }
+
+    def _da(data: np.ndarray) -> xr.DataArray:
+        return xr.DataArray(data, dims=ref_truth_slice.dims, coords=spatial_coords)
+
+    out = xr.Dataset(
+        {
+            f"{args.param}.BIAS": _da(bias),
+            f"{args.param}.RMSE": _da(rmse),
+            f"{args.param}.MAE": _da(mae),
+            f"{args.param}.N": _da(count),
+        },
+        attrs={
+            "param": args.param,
+            "step_h": args.step,
+            "run_root": str(args.run_root),
+            "n_processed": n_ok,
+            "n_skipped": n_skip,
+        },
+    )
+
+    LOG.info("Output dataset:\n%s", out)
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    out.to_netcdf(args.output)
+    LOG.info("Saved to %s", args.output)
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser(
+        description=(
+            "Compute spatial maps of temporally-aggregated forecast errors "
+            "by streaming over GRIB files from a model run."
+        )
+    )
+    parser.add_argument(
+        "--run_root",
+        type=Path,
+        help="Root directory of a model run (e.g. output/data/runs/<run_id>).",
+    )
+    parser.add_argument(
+        "--truth",
+        type=Path,
+        required=True,
+        help="Path to the reference zarr dataset.",
+    )
+    parser.add_argument(
+        "--step",
+        type=int,
+        required=True,
+        help="Forecast lead time in hours (e.g. 24).",
+    )
+    parser.add_argument(
+        "--param",
+        type=str,
+        required=True,
+        help="Variable to verify (e.g. T_2M, TD_2M, U_10M).",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=None,
+        help=(
+            "Output NetCDF file. "
+            "Default: <run_root>/verif_spatial_<param>_step<NNN>h.nc"
+        ),
+    )
+    args = parser.parse_args()
+
+    if args.output is None:
+        args.output = (
+            args.run_root / f"verif_spatial_{args.param}_step{args.step:03d}h.nc"
+        )
+
+    main(args)

From c291f0fcfcf5aa410b8db8b590d70004a96ec8d5 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Tue, 31 Mar 2026 17:46:41 +0200
Subject: [PATCH 63/70] Introduce Season Handling

(not thoroughly tested)
---
 workflow/scripts/plot_summary_stat_maps.mo.py |  4 +-
 workflow/scripts/verif_spatial.py             | 95 ++++++++++++-------
 2 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index fa4adc82..27231c64 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -92,10 +92,10 @@ def _(ArgumentParser, Path, np):
 
 
 @app.cell
-def _(metric, param, verif_file, xr):
+def _(metric, param, season, verif_file, xr):
     ds = xr.open_dataset(verif_file)
     var = f"{param}.{metric}"
-    ds = ds[var]
+    ds = ds[var].sel(season=season)
     ds
     return ds, var
 
diff --git a/workflow/scripts/verif_spatial.py b/workflow/scripts/verif_spatial.py
index 58cb2624..246b8c68 100644
--- a/workflow/scripts/verif_spatial.py
+++ b/workflow/scripts/verif_spatial.py
@@ -34,6 +34,20 @@
 
 DATETIME_FMT = "%Y%m%d%H%M"
 
+SEASONS = ["DJF", "MAM", "JJA", "SON", "all"]
+
+
+def _season_of(dt: datetime) -> str:
+    """Return the meteorological season string for a given datetime."""
+    month = dt.month
+    if month in (12, 1, 2):
+        return "DJF"
+    if month in (3, 4, 5):
+        return "MAM"
+    if month in (6, 7, 8):
+        return "JJA"
+    return "SON"
+
 # Maps from standard parameter names to zarr variable names.
 # COSMO-2e zarrs use short CF names; COSMO-1e zarrs keep the COSMO names.
 _PARAMS_MAP_CO2 = {
@@ -179,12 +193,13 @@ def main(args: Namespace) -> None:
 
     step_td = timedelta(hours=args.step)
 
-    # Running accumulators – initialised on the first successfully processed
-    # sample so that we can infer the spatial shape from the data.
-    accum_n: np.ndarray | None = None
-    accum_sum_e: np.ndarray | None = None
-    accum_sum_se: np.ndarray | None = None
-    accum_sum_ae: np.ndarray | None = None
+    # Running accumulators per season – initialised on the first successfully
+    # processed sample so that we can infer the spatial shape from the data.
+    # Each entry is a numpy array over the spatial dimension(s).
+    accum_n:      dict[str, np.ndarray | None] = {s: None for s in SEASONS}
+    accum_sum_e:  dict[str, np.ndarray | None] = {s: None for s in SEASONS}
+    accum_sum_se: dict[str, np.ndarray | None] = {s: None for s in SEASONS}
+    accum_sum_ae: dict[str, np.ndarray | None] = {s: None for s in SEASONS}
     ref_truth_slice: xr.DataArray | None = None  # kept for output coordinates
 
     n_ok = 0
@@ -249,20 +264,23 @@ def main(args: Namespace) -> None:
         truth_vals = truth_slice.values
         error = fcst_vals - truth_vals  # shape: spatial dims of truth
 
-        # --- initialise accumulators ---
-        if accum_n is None:
-            accum_n = np.zeros(error.shape, dtype=np.int64)
-            accum_sum_e = np.zeros(error.shape, dtype=np.float64)
-            accum_sum_se = np.zeros(error.shape, dtype=np.float64)
-            accum_sum_ae = np.zeros(error.shape, dtype=np.float64)
+        # --- initialise accumulators on first valid sample ---
+        if accum_n["all"] is None:
+            for s in SEASONS:
+                accum_n[s]      = np.zeros(error.shape, dtype=np.int64)
+                accum_sum_e[s]  = np.zeros(error.shape, dtype=np.float64)
+                accum_sum_se[s] = np.zeros(error.shape, dtype=np.float64)
+                accum_sum_ae[s] = np.zeros(error.shape, dtype=np.float64)
             ref_truth_slice = truth_slice
 
-        # --- accumulate (NaN-safe) ---
+        # --- accumulate into the matching season bucket and "all" (NaN-safe) ---
+        season = _season_of(reftime)
         valid = ~np.isnan(error)
-        accum_n[valid] += 1
-        accum_sum_e[valid] += error[valid]
-        accum_sum_se[valid] += error[valid] ** 2
-        accum_sum_ae[valid] += np.abs(error[valid])
+        for s in [season, "all"]:
+            accum_n[s][valid]      += 1
+            accum_sum_e[s][valid]  += error[valid]
+            accum_sum_se[s][valid] += error[valid] ** 2
+            accum_sum_ae[s][valid] += np.abs(error[valid])
         n_ok += 1
 
     LOG.info("Finished: %d init times processed, %d skipped", n_ok, n_skip)
@@ -271,33 +289,40 @@ def main(args: Namespace) -> None:
         LOG.error("No data could be processed – no output written.")
         return
 
-    # --- compute aggregate maps ---
-    with np.errstate(invalid="ignore", divide="ignore"):
-        bias = np.where(accum_n > 0, accum_sum_e / accum_n, np.nan).astype(np.float32)
-        rmse = np.where(accum_n > 0, np.sqrt(accum_sum_se / accum_n), np.nan).astype(
-            np.float32
-        )
-        mae = np.where(accum_n > 0, accum_sum_ae / accum_n, np.nan).astype(np.float32)
-        count = np.where(accum_n > 0, accum_n, np.nan).astype(np.float32)
-
-    # Build a spatial template: keep only spatial dims and lat/lon coords,
-    # dropping scalar coords like time (added by .sel).
+    # --- compute aggregate maps per season, then stack into a season dimension ---
     spatial_coords = {
         c: ref_truth_slice[c]
         for c in ref_truth_slice.coords
         if set(ref_truth_slice[c].dims).issubset(set(ref_truth_slice.dims))
         and c != "time"
     }
-
-    def _da(data: np.ndarray) -> xr.DataArray:
-        return xr.DataArray(data, dims=ref_truth_slice.dims, coords=spatial_coords)
+    spatial_dims = list(ref_truth_slice.dims)
+    out_dims = ["season"] + spatial_dims
+    out_coords = {"season": SEASONS, **spatial_coords}
+
+    def _seasonal_da(compute_fn) -> xr.DataArray:
+        """Stack per-season arrays into a (season, *spatial) DataArray."""
+        slices = []
+        for s in SEASONS:
+            n = accum_n[s]
+            with np.errstate(invalid="ignore", divide="ignore"):
+                slices.append(compute_fn(n, s).astype(np.float32))
+        return xr.DataArray(np.stack(slices), dims=out_dims, coords=out_coords)
 
     out = xr.Dataset(
         {
-            f"{args.param}.BIAS": _da(bias),
-            f"{args.param}.RMSE": _da(rmse),
-            f"{args.param}.MAE": _da(mae),
-            f"{args.param}.N": _da(count),
+            f"{args.param}.BIAS": _seasonal_da(
+                lambda n, s: np.where(n > 0, accum_sum_e[s] / n, np.nan)
+            ),
+            f"{args.param}.RMSE": _seasonal_da(
+                lambda n, s: np.where(n > 0, np.sqrt(accum_sum_se[s] / n), np.nan)
+            ),
+            f"{args.param}.MAE": _seasonal_da(
+                lambda n, s: np.where(n > 0, accum_sum_ae[s] / n, np.nan)
+            ),
+            f"{args.param}.N": _seasonal_da(
+                lambda n, s: np.where(n > 0, n, np.nan)
+            ),
         },
         attrs={
             "param": args.param,

From 8afb9ac55df24c2133d4ca60519246a26380071e Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 1 Apr 2026 13:56:12 +0200
Subject: [PATCH 64/70] Additional Log Statements for Map Plotting plus

handling of the special case when a map is all-NaN
(then a uniform grey map is plotted, with an annotation).
---
 workflow/rules/plot.smk                       |  6 +++-
 workflow/scripts/plot_summary_stat_maps.mo.py | 30 +++++++++++++++----
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index ffe543b0..c8550d4d 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -150,6 +150,8 @@ rule plot_summary_stat_maps:
         OUT_ROOT / "results/{experiment}/metrics/spatial/runs/{run_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
     wildcard_constraints:
         leadtime=r"\d+",  # only digits
+    log:
+        OUT_ROOT / "logs/plot_summary_stat_maps/{experiment}/{run_id}-{param}-{metric}-{region}-{season}-{leadtime}.log",
     resources:
         slurm_partition="postproc",
         cpus_per_task=1,
@@ -161,7 +163,7 @@ rule plot_summary_stat_maps:
         uv run python {input.script} \
             --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
             --param {wildcards.param} --leadtime {wildcards.leadtime} --metric {wildcards.metric} \
-            --season {wildcards.season}
+            --season {wildcards.season} > {log} 2>&1
         # interactive editing (needs to set localrule: True and use only one core)
         # marimo edit {input.script} -- \
         #     --input {input.verif_file} --outfn {output[0]} --region {wildcards.region} \
@@ -175,6 +177,8 @@ use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
         verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
     output:
         OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
+    log:
+        OUT_ROOT / "logs/plot_summary_stat_maps/{experiment}/{baseline_id}-{param}-{metric}-{region}-{season}-{leadtime}.log",
     params:
         nc_out_dir=lambda wc: (
             Path(OUT_ROOT) / f"data/baselines/{wc.baseline_id}/verif_aggregated.nc"
diff --git a/workflow/scripts/plot_summary_stat_maps.mo.py b/workflow/scripts/plot_summary_stat_maps.mo.py
index 27231c64..50d2d414 100644
--- a/workflow/scripts/plot_summary_stat_maps.mo.py
+++ b/workflow/scripts/plot_summary_stat_maps.mo.py
@@ -92,10 +92,14 @@ def _(ArgumentParser, Path, np):
 
 
 @app.cell
-def _(metric, param, season, verif_file, xr):
+def _(LOG, metric, param, season, verif_file, xr):
     ds = xr.open_dataset(verif_file)
+    LOG.info("Opened dataset: %s", ds)
     var = f"{param}.{metric}"
+    LOG.info("Selecting variable '%s' for season '%s'", var, season)
     ds = ds[var].sel(season=season)
+    LOG.info("Selected DataArray: dims=%s, shape=%s, dtype=%s", ds.dims, ds.shape, ds.dtype)
+    LOG.info("Value range: min=%.4g, max=%.4g, n_nan=%d", float(ds.min()), float(ds.max()), int(ds.isnull().sum()))
     ds
     return ds, var
 
@@ -205,10 +209,11 @@ def _(
     get_style,
     lead_time,
     metric,
+    np,
     outfn,
     param,
-    region, 
-    season, 
+    region,
+    season,
     var,
 ):
     # plot individual fields
@@ -233,14 +238,27 @@ def _(
     # field, units_override = preprocess_field(param, state)
 
     # Quick fix for precipitation (might have to use preprocess_field in the end (see above))
-    # for wind speed, preprocess_field only has conversion from m/s to knots 
+    # for wind speed, preprocess_field only has conversion from m/s to knots
     # (not the other way around), so I assume the values are in m/s
     if param == "TOT_PREC":
         plot_vals = ds.values.ravel()*1000
     else:
         plot_vals = ds.values.ravel()
-
-    plotter.plot_field(subplot, plot_vals, **get_style(param, metric))
+    LOG.info("plot_vals: shape=%s, min=%.4g, max=%.4g, n_nan=%d",
+             plot_vals.shape, float(np.nanmin(plot_vals)), float(np.nanmax(plot_vals)), int(np.isnan(plot_vals).sum()))
+
+    style_kwargs = get_style(param, metric)
+    LOG.info("style_kwargs: %s", style_kwargs)
+
+    if np.all(np.isnan(plot_vals)):
+        LOG.warning("All values are NaN for %s %s season=%s — plotting empty map.", param, metric, season)
+        import matplotlib.patches as mpatches
+        subplot.ax.set_facecolor("#cccccc")
+        subplot.standard_layers()
+        grey_patch = mpatches.Patch(color="#cccccc", label="No data")
+        subplot.ax.legend(handles=[grey_patch], loc="lower left", fontsize=8)
+    else:
+        plotter.plot_field(subplot, plot_vals, **style_kwargs)
     # subplot.ax.add_geometries(
     #     state["lam_envelope"],
     #     edgecolor="black",

From f8a66027e8bcf8159e5feef231efda46d269dd55 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 1 Apr 2026 16:24:09 +0200
Subject: [PATCH 65/70] Fix all-NaN spatial verification output for cumulative
 params (TOT_PREC)

TOT_PREC GRIB values are cumulative totals that are disaggregated via
diff(lead_time) inside load_fct_data_from_grib. Loading only the
requested step meant the diff had a single element and always returned
NaN, producing all-NaN error maps for all seasons.

Fix: detect cumulative params via _CUMULATIVE_PARAMS and load the
preceding available GRIB step alongside the requested one, so the diff
produces a valid result at the target lead time.

Also add first-iteration debug logging (fcst/truth/mapped/error shape,
range, NaN count) and a per-iteration warning when the error is
all-NaN, to make similar issues immediately diagnosable from the logs.
---
 workflow/scripts/verif_spatial.py | 81 ++++++++++++++++++++++++++++++-
 1 file changed, 79 insertions(+), 2 deletions(-)

diff --git a/workflow/scripts/verif_spatial.py b/workflow/scripts/verif_spatial.py
index 246b8c68..28e8911d 100644
--- a/workflow/scripts/verif_spatial.py
+++ b/workflow/scripts/verif_spatial.py
@@ -137,6 +137,27 @@ def open_truth_zarr(root: Path, param: str) -> xr.DataArray:
     return _open_zarr_component(root, param)
 
 
+# ---------------------------------------------------------------------------
+# GRIB step helpers
+# ---------------------------------------------------------------------------
+
+# Parameters whose GRIB values are cumulative totals and must be disaggregated
+# via diff before verification. For these, the preceding step must also be
+# loaded so that load_fct_data_from_grib's diff produces a valid result.
+_CUMULATIVE_PARAMS = {"TOT_PREC"}
+
+
+def _preceding_step(grib_dir: Path, step: int) -> int | None:
+    """Return the largest available step smaller than *step* in *grib_dir*."""
+    available = sorted(
+        int(f.stem.split("_")[-1])
+        for f in grib_dir.glob("*.grib")
+        if f.stem.split("_")[-1].isdigit()
+    )
+    smaller = [s for s in available if s < step]
+    return smaller[-1] if smaller else None
+
+
 # ---------------------------------------------------------------------------
 # Init-time discovery
 # ---------------------------------------------------------------------------
@@ -219,13 +240,30 @@ def main(args: Namespace) -> None:
             valid_time,
         )
 
+        first_iter = n_ok == 0
+
         # --- load forecast ---
         grib_params = list(_DERIVED[args.param]) if args.param in _DERIVED else [args.param]
+
+        # Cumulative params (e.g. TOT_PREC) are disaggregated via diff inside
+        # load_fct_data_from_grib, so we need to also load the preceding step
+        # to get a valid result at the requested step.
+        if args.param in _CUMULATIVE_PARAMS:
+            prev_step = _preceding_step(grib_dir, args.step)
+            if prev_step is None:
+                LOG.warning("No preceding step found for cumulative param %s at step %d, skipping %s",
+                            args.param, args.step, reftime)
+                n_skip += 1
+                continue
+            load_steps = [prev_step, args.step]
+        else:
+            load_steps = [args.step]
+
         try:
             fcst = load_fct_data_from_grib(
                 root=grib_dir,
                 reftime=reftime,
-                steps=[args.step],
+                steps=load_steps,
                 params=grib_params,
             )
         except Exception as exc:
@@ -233,7 +271,7 @@ def main(args: Namespace) -> None:
             n_skip += 1
             continue
 
-        # Drop lead_time dimension (single step requested).
+        # Drop lead_time dimension (select only the requested step).
         if "lead_time" in fcst.dims:
             fcst = fcst.sel(lead_time=np.timedelta64(args.step, "h"))
 
@@ -241,12 +279,32 @@ def main(args: Namespace) -> None:
         if args.param in _DERIVED:
             fcst = fcst.assign({args.param: _compute_derived(fcst, args.param)})
 
+        if first_iter:
+            LOG.info("fcst (after step selection): %s", fcst)
+            fcst_raw = fcst[args.param].values if args.param in fcst else None
+            if fcst_raw is not None:
+                n_nan_fcst = int(np.isnan(fcst_raw).sum())
+                LOG.info("fcst[%s]: shape=%s, min=%.4g, max=%.4g, n_nan=%d",
+                         args.param, fcst_raw.shape,
+                         float(np.nanmin(fcst_raw)) if n_nan_fcst < fcst_raw.size else float("nan"),
+                         float(np.nanmax(fcst_raw)) if n_nan_fcst < fcst_raw.size else float("nan"),
+                         n_nan_fcst)
+
         # --- load truth slice ---
         truth_slice = truth_da.sel(time=valid_time).compute()
         # For derived variables truth_da is already the derived DataArray,
         # so wrap it in a Dataset for map_forecast_to_truth compatibility.
         truth_ds = truth_slice.to_dataset(name=args.param) if isinstance(truth_slice, xr.DataArray) else truth_slice
 
+        if first_iter:
+            truth_raw = truth_slice.values
+            n_nan_truth = int(np.isnan(truth_raw).sum())
+            LOG.info("truth_slice[%s]: shape=%s, min=%.4g, max=%.4g, n_nan=%d",
+                     args.param, truth_raw.shape,
+                     float(np.nanmin(truth_raw)) if n_nan_truth < truth_raw.size else float("nan"),
+                     float(np.nanmax(truth_raw)) if n_nan_truth < truth_raw.size else float("nan"),
+                     n_nan_truth)
+
         # --- map forecast onto truth grid ---
         try:
             fcst_mapped = map_forecast_to_truth(fcst, truth_ds)
@@ -264,6 +322,25 @@ def main(args: Namespace) -> None:
         truth_vals = truth_slice.values
         error = fcst_vals - truth_vals  # shape: spatial dims of truth
 
+        if first_iter:
+            n_nan_mapped = int(np.isnan(fcst_vals).sum())
+            LOG.info("fcst_mapped[%s]: shape=%s, min=%.4g, max=%.4g, n_nan=%d",
+                     args.param, fcst_vals.shape,
+                     float(np.nanmin(fcst_vals)) if n_nan_mapped < fcst_vals.size else float("nan"),
+                     float(np.nanmax(fcst_vals)) if n_nan_mapped < fcst_vals.size else float("nan"),
+                     n_nan_mapped)
+            n_nan_err = int(np.isnan(error).sum())
+            LOG.info("error: shape=%s, min=%.4g, max=%.4g, n_nan=%d / %d",
+                     error.shape,
+                     float(np.nanmin(error)) if n_nan_err < error.size else float("nan"),
+                     float(np.nanmax(error)) if n_nan_err < error.size else float("nan"),
+                     n_nan_err, error.size)
+
+        n_nan_error = int(np.isnan(error).sum())
+        if n_nan_error == error.size:
+            LOG.warning("reftime=%s: error is all-NaN (%d points) — nothing accumulated.",
+                        reftime.strftime(DATETIME_FMT), error.size)
+
         # --- initialise accumulators on first valid sample ---
         if accum_n["all"] is None:
             for s in SEASONS:

From 26f6f9d9e2be2f15be923d115af866ceb6d2bcb7 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Wed, 1 Apr 2026 16:33:16 +0200
Subject: [PATCH 66/70] Add opt-in spatial verification via `evalml experiment
 --spatial`

Introduce a dedicated `spatial_all` Snakemake target and a new
`--spatial` flag on `evalml experiment`. Spatial verification
(aggregation + map plots) is now only triggered on explicit request,
keeping the default experiment run lean.

Spatial parameters (params, leadtimes, metrics, regions, seasons)
are configured via an optional `spatial_verification:` block in the
config YAML, backed by a new `SpatialVerificationConfig` Pydantic
model with sensible defaults. All existing configs have been updated
with the block for easy customisation.
---
 config/forecasters-co1e.yaml             | 22 ++++++++++++++++
 config/forecasters-co2-disentangled.yaml | 22 ++++++++++++++++
 config/forecasters-co2-mod-01.yaml       | 22 ++++++++++++++++
 config/forecasters-co2.yaml              | 22 ++++++++++++++++
 config/forecasters-ich1-oper-fixed.yaml  | 26 +++++++++++++++++--
 config/forecasters-ich1-oper.yaml        | 22 ++++++++++++++++
 config/forecasters-ich1.yaml             | 22 ++++++++++++++++
 config/forecasters-ich1_mod_01.yaml      | 22 ++++++++++++++++
 config/forecasters-ich1_mod_02_1yr.yaml  | 22 ++++++++++++++++
 config/interpolators-co2.yaml            | 22 ++++++++++++++++
 src/evalml/cli.py                        | 12 +++++++--
 src/evalml/config.py                     | 32 ++++++++++++++++++++++++
 workflow/Snakefile                       | 25 ++++++++----------
 13 files changed, 274 insertions(+), 19 deletions(-)

diff --git a/config/forecasters-co1e.yaml b/config/forecasters-co1e.yaml
index 83846ec4..1a1bac39 100644
--- a/config/forecasters-co1e.yaml
+++ b/config/forecasters-co1e.yaml
@@ -57,3 +57,25 @@ profile:
   jobs: 50
   batch_rules:
     plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-co2-disentangled.yaml b/config/forecasters-co2-disentangled.yaml
index c9a75f2c..e962767a 100644
--- a/config/forecasters-co2-disentangled.yaml
+++ b/config/forecasters-co2-disentangled.yaml
@@ -76,3 +76,25 @@ profile:
     runtime: "1h"
     gpus: 0
   jobs: 50
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-co2-mod-01.yaml b/config/forecasters-co2-mod-01.yaml
index b95c6bad..c551a118 100644
--- a/config/forecasters-co2-mod-01.yaml
+++ b/config/forecasters-co2-mod-01.yaml
@@ -54,3 +54,25 @@ profile:
     runtime: "23h"
     gpus: 0
   jobs: 50
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-co2.yaml b/config/forecasters-co2.yaml
index 059ab97e..fa7cd811 100644
--- a/config/forecasters-co2.yaml
+++ b/config/forecasters-co2.yaml
@@ -53,3 +53,25 @@ profile:
   jobs: 50
   batch_rules:
     plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-ich1-oper-fixed.yaml b/config/forecasters-ich1-oper-fixed.yaml
index 1e9a09b4..2e76d8b6 100644
--- a/config/forecasters-ich1-oper-fixed.yaml
+++ b/config/forecasters-ich1-oper-fixed.yaml
@@ -7,8 +7,8 @@ dates:
   # end: 2025-06-20T00:00
   # frequency: 54h
   # or
-  - 2024-01-01T12:00
-  - 2024-02-01T18:00
+  # - 2024-01-01T12:00
+  # - 2024-02-01T18:00
   - 2025-03-01T00:00
 
 
@@ -68,3 +68,25 @@ profile:
   jobs: 50
   batch_rules:
     plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-ich1-oper.yaml b/config/forecasters-ich1-oper.yaml
index 6e5b011f..45c5d70d 100644
--- a/config/forecasters-ich1-oper.yaml
+++ b/config/forecasters-ich1-oper.yaml
@@ -64,3 +64,25 @@ profile:
   jobs: 50
   batch_rules:
     plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-ich1.yaml b/config/forecasters-ich1.yaml
index a5de9b54..4dc1a01d 100644
--- a/config/forecasters-ich1.yaml
+++ b/config/forecasters-ich1.yaml
@@ -76,3 +76,25 @@ profile:
   jobs: 50
   batch_rules:
     plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-ich1_mod_01.yaml b/config/forecasters-ich1_mod_01.yaml
index 909e8180..91110a02 100644
--- a/config/forecasters-ich1_mod_01.yaml
+++ b/config/forecasters-ich1_mod_01.yaml
@@ -55,3 +55,25 @@ profile:
     runtime: "23h"
     gpus: 0
   jobs: 50
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/forecasters-ich1_mod_02_1yr.yaml b/config/forecasters-ich1_mod_02_1yr.yaml
index e876f643..52615de1 100644
--- a/config/forecasters-ich1_mod_02_1yr.yaml
+++ b/config/forecasters-ich1_mod_02_1yr.yaml
@@ -55,3 +55,25 @@ profile:
     runtime: "23h"
     gpus: 0
   jobs: 50
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/config/interpolators-co2.yaml b/config/interpolators-co2.yaml
index 115dd6bc..46cd7a9a 100644
--- a/config/interpolators-co2.yaml
+++ b/config/interpolators-co2.yaml
@@ -84,3 +84,25 @@ profile:
   jobs: 50
   batch_rules:
     plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all
diff --git a/src/evalml/cli.py b/src/evalml/cli.py
index 839e7a21..25dde481 100644
--- a/src/evalml/cli.py
+++ b/src/evalml/cli.py
@@ -65,6 +65,7 @@ def execute_workflow(
     unlock: bool,
     report: Path | None,
     extra_smk_args: tuple[str, ...] = (),
+    extra_targets: list[str] = [],
 ):
     config = ConfigModel.model_validate(load_yaml(configfile))
 
@@ -80,7 +81,7 @@ def execute_workflow(
     if report and not dry_run:
         command += ["--report-after-run", "--report", str(report)]
 
-    command.append(target)
+    command += [target] + extra_targets
     command += list(extra_smk_args)
     if not verbose:
         command += ["--quiet", "rules"]  # reduce verobosity of snakemake output
@@ -97,8 +98,14 @@ def cli():
 @click.argument(
     "configfile", type=click.Path(exists=True, dir_okay=False, path_type=Path)
 )
+@click.option(
+    "--spatial",
+    is_flag=True,
+    default=False,
+    help="Also run spatial verification (computationally intensive).",
+)
 @workflow_options
-def experiment(configfile, cores, verbose, dry_run, unlock, report, extra_smk_args):
+def experiment(configfile, spatial, cores, verbose, dry_run, unlock, report, extra_smk_args):
     execute_workflow(
         configfile,
         "experiment_all",
@@ -108,6 +115,7 @@ def experiment(configfile, cores, verbose, dry_run, unlock, report, extra_smk_ar
         unlock,
         report,
         extra_smk_args,
+        extra_targets=["spatial_all"] if spatial else [],
     )
 
 
diff --git a/src/evalml/config.py b/src/evalml/config.py
index 802afabb..2ea773a2 100644
--- a/src/evalml/config.py
+++ b/src/evalml/config.py
@@ -200,6 +200,34 @@ class BaselineItem(BaseModel):
     baseline: BaselineConfig
 
 
+class SpatialVerificationConfig(BaseModel):
+    """Parameters controlling which spatial verification plots are produced."""
+
+    params: List[str] = Field(
+        default=["T_2M", "TD_2M", "U_10M", "V_10M", "SP_10M", "PS", "PMSL", "TOT_PREC"],
+        description=(
+            "List of parameters to plot. Supported values: T_2M, TD_2M, U_10M, V_10M, "
+            "PS, PMSL, TOT_PREC (native), and SP_10M (derived wind speed from U_10M/V_10M)."
+        ),
+    )
+    leadtimes: List[int] = Field(
+        default=list(range(6, 121, 6)),
+        description="List of lead times (hours) to plot.",
+    )
+    metrics: List[str] = Field(
+        default=["BIAS", "RMSE", "MAE"],
+        description="List of verification metrics to plot.",
+    )
+    regions: List[str] = Field(
+        default=["switzerland", "centraleurope"],
+        description="List of regions to plot.",
+    )
+    seasons: List[str] = Field(
+        default=["all", "DJF", "MAM", "JJA", "SON"],
+        description="List of seasons to plot.",
+    )
+
+
 class Locations(BaseModel):
     """Locations of data and services used in the workflow."""
 
@@ -310,6 +338,10 @@ class ConfigModel(BaseModel):
     stratification: Stratification
     locations: Locations
     profile: Profile
+    spatial_verification: SpatialVerificationConfig = Field(
+        default_factory=SpatialVerificationConfig,
+        description="Parameters for spatial verification plots (used with --spatial flag).",
+    )
 
     model_config = {
         "extra": "forbid",  # fail on misspelled keys
diff --git a/workflow/Snakefile b/workflow/Snakefile
index edd220ce..d8d6ea11 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -126,26 +126,21 @@ rule experiment_all:
             rules.verif_metrics_plot.output,
             experiment=EXPERIMENT_NAME,
         ),
+
+
+rule spatial_all:
+    """Target rule for spatial verification (opt-in via evalml experiment --spatial)."""
+    input:
         expand(
             rules.plot_summary_stat_maps.output,
             run_id=collect_all_candidates(),
-            leadtime=[6],
-            metric=["BIAS"],
-            param=["SP_10M"],
-            region=["centraleurope", "switzerland"],
-            season=["all", "DJF", "MAM", "JJA", "SON"],
+            leadtime=config["spatial_verification"]["leadtimes"],
+            metric=config["spatial_verification"]["metrics"],
+            param=config["spatial_verification"]["params"],
+            region=config["spatial_verification"]["regions"],
+            season=config["spatial_verification"]["seasons"],
             experiment=EXPERIMENT_NAME,
         ),
-        # expand(
-        #     OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
-        #     baseline_id=collect_all_baselines(),
-        #     leadtime=[6],
-        #     metric=["BIAS"],
-        #     param=["SP_10M"],
-        #     region=["centraleurope", "switzerland"], 
-        #     season=["all", "DJF", "MAM", "JJA", "SON"], 
-        #     experiment=EXPERIMENT_NAME
-        # )
 
 
 rule showcase_all:

From df65a717f23b48237e4a09bef429552fbddd624a Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 2 Apr 2026 11:28:53 +0200
Subject: [PATCH 67/70] Addendum suggested by Claude.

---
 workflow/tools/config.schema.json | 99 +++++++++++++++++++++++++++++++
 1 file changed, 99 insertions(+)

diff --git a/workflow/tools/config.schema.json b/workflow/tools/config.schema.json
index cc66d7fb..b483ade3 100644
--- a/workflow/tools/config.schema.json
+++ b/workflow/tools/config.schema.json
@@ -458,6 +458,101 @@
       "title": "Profile",
       "type": "object"
     },
+    "SpatialVerificationConfig": {
+      "description": "Parameters controlling which spatial verification plots are produced.",
+      "properties": {
+        "params": {
+          "default": [
+            "T_2M",
+            "TD_2M",
+            "U_10M",
+            "V_10M",
+            "SP_10M",
+            "PS",
+            "PMSL",
+            "TOT_PREC"
+          ],
+          "description": "List of parameters to plot. Supported values: T_2M, TD_2M, U_10M, V_10M, PS, PMSL, TOT_PREC (native), and SP_10M (derived wind speed from U_10M/V_10M).",
+          "items": {
+            "type": "string"
+          },
+          "title": "Params",
+          "type": "array"
+        },
+        "leadtimes": {
+          "default": [
+            6,
+            12,
+            18,
+            24,
+            30,
+            36,
+            42,
+            48,
+            54,
+            60,
+            66,
+            72,
+            78,
+            84,
+            90,
+            96,
+            102,
+            108,
+            114,
+            120
+          ],
+          "description": "List of lead times (hours) to plot.",
+          "items": {
+            "type": "integer"
+          },
+          "title": "Leadtimes",
+          "type": "array"
+        },
+        "metrics": {
+          "default": [
+            "BIAS",
+            "RMSE",
+            "MAE"
+          ],
+          "description": "List of verification metrics to plot.",
+          "items": {
+            "type": "string"
+          },
+          "title": "Metrics",
+          "type": "array"
+        },
+        "regions": {
+          "default": [
+            "switzerland",
+            "centraleurope"
+          ],
+          "description": "List of regions to plot.",
+          "items": {
+            "type": "string"
+          },
+          "title": "Regions",
+          "type": "array"
+        },
+        "seasons": {
+          "default": [
+            "all",
+            "DJF",
+            "MAM",
+            "JJA",
+            "SON"
+          ],
+          "description": "List of seasons to plot.",
+          "items": {
+            "type": "string"
+          },
+          "title": "Seasons",
+          "type": "array"
+        }
+      },
+      "title": "SpatialVerificationConfig",
+      "type": "object"
+    },
     "Stratification": {
       "description": "Stratification settings for the analysis.",
       "properties": {
@@ -579,6 +674,10 @@
     },
     "profile": {
       "$ref": "#/$defs/Profile"
+    },
+    "spatial_verification": {
+      "$ref": "#/$defs/SpatialVerificationConfig",
+      "description": "Parameters for spatial verification plots (used with --spatial flag)."
     }
   },
   "required": [

From be1bbefe818140433c6d09afd9038e0fcd7e8610 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 2 Apr 2026 17:16:58 +0200
Subject: [PATCH 68/70] Add default experiment config

Remove later, only for frl development (feel free to use).
---
 ...l_adj_lfr_05_Stage_E_only_default_exp.yaml | 87 +++++++++++++++++++
 1 file changed, 87 insertions(+)
 create mode 100644 config/forecasters-ich1-oper-small_adj_lfr_05_Stage_E_only_default_exp.yaml

diff --git a/config/forecasters-ich1-oper-small_adj_lfr_05_Stage_E_only_default_exp.yaml b/config/forecasters-ich1-oper-small_adj_lfr_05_Stage_E_only_default_exp.yaml
new file mode 100644
index 00000000..d9051a94
--- /dev/null
+++ b/config/forecasters-ich1-oper-small_adj_lfr_05_Stage_E_only_default_exp.yaml
@@ -0,0 +1,87 @@
+# yaml-language-server: $schema=../workflow/tools/config.schema.json
+description: |
+  Evaluate skill of ICON-CH1 forecasters from experiment 602 (excluding stage_B, AE, multi_dataset_icon_era5).
+
+dates:
+  start: 2025-03-01T00:00
+  end: 2025-05-31T00:00
+  frequency: 24h
+
+
+runs:
+
+  - forecaster:
+      inference_resources: 
+          slurm_partition: normal-shared
+      checkpoint: https://service.meteoswiss.ch/mlstore#/experiments/602/runs/fd63e17043014af59170c7beca516b95
+      label: stage_E_realch1
+      steps: 0/120/6
+      config: resources/inference/configs/sgm-multidataset-forecaster-global-ich1-oper.yaml
+      extra_requirements:
+        - git+https://github.com/ecmwf/anemoi-inference.git@b9aaee5df86614cad9d8d08b76876a4be4e980db
+
+
+baselines:
+  - baseline:
+      baseline_id: ICON-CH1-EPS
+      label: ICON-CH1-ctrl
+      root: /scratch/mch/cmerker/ICON-CH1-EPS
+      steps: 0/33/6
+  - baseline:
+      baseline_id: ICON-CH2-EPS
+      label: ICON-CH2-ctrl
+      root: /scratch/mch/cmerker/ICON-CH2-EPS
+      steps: 0/120/6
+
+truth:
+  label: KENDA-CH1
+  root: /store_new/mch/msopr/ml/datasets/mch-ich1-1km-2024-2025-1h-pl13-v1.0.zarr
+
+stratification:
+  regions:
+    - jura
+    - mittelland
+    - voralpen
+    - alpennordhang
+    - innerealpentaeler
+    - alpensuedseite
+  root: /scratch/mch/bhendj/regions/Prognoseregionen_LV95_20220517
+
+locations:
+  output_root: /scratch/mch/lfrey/2025_SEN_eval_ML/software/evalml/xyz_abc/evalml/output/
+
+profile:
+  executor: slurm
+  global_resources:
+    gpus: 16
+  default_resources:
+    slurm_partition: "postproc"
+    cpus_per_task: 1
+    mem_mb_per_cpu: 1800
+    runtime: "1h"
+    gpus: 0
+  jobs: 50
+  batch_rules:
+    plot_forecast_frame: 32
+
+spatial_verification:
+  params:
+    - T_2M
+    - TD_2M
+    - U_10M
+    - V_10M
+    - SP_10M
+    - PS
+    - PMSL
+    - TOT_PREC
+  leadtimes:
+    - 6
+    - 24
+  metrics:
+    - BIAS
+    - RMSE
+  regions:
+    - switzerland
+    - centraleurope
+  seasons:
+    - all

From cf4ae6de70d72c98357c0746be7ddca2e2c387c7 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 2 Apr 2026 21:35:55 +0200
Subject: [PATCH 69/70] Spatial Verification Pipeline also for Baselines.

---
 workflow/Snakefile       |  18 +++++--
 workflow/rules/plot.smk  |   2 +-
 workflow/rules/verif.smk | 109 ++++++++++++++++++++++++++-------------
 3 files changed, 88 insertions(+), 41 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index d8d6ea11..13ea8f05 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -135,10 +135,20 @@ rule spatial_all:
             rules.plot_summary_stat_maps.output,
             run_id=collect_all_candidates(),
             leadtime=config["spatial_verification"]["leadtimes"],
-            metric=config["spatial_verification"]["metrics"],
-            param=config["spatial_verification"]["params"],
-            region=config["spatial_verification"]["regions"],
-            season=config["spatial_verification"]["seasons"],
+            metric  =config["spatial_verification"]["metrics"],
+            param   =config["spatial_verification"]["params"],
+            region  =config["spatial_verification"]["regions"],
+            season  =config["spatial_verification"]["seasons"],
+            experiment=EXPERIMENT_NAME,
+        ),
+        expand(
+            rules.plot_summary_stat_maps_baseline.output,
+            baseline_id=list(BASELINE_CONFIGS),
+            leadtime=config["spatial_verification"]["leadtimes"],
+            metric  =config["spatial_verification"]["metrics"],
+            param   =config["spatial_verification"]["params"],
+            region  =config["spatial_verification"]["regions"],
+            season  =config["spatial_verification"]["seasons"],
             experiment=EXPERIMENT_NAME,
         ),
 
diff --git a/workflow/rules/plot.smk b/workflow/rules/plot.smk
index c8550d4d..7a15fddb 100644
--- a/workflow/rules/plot.smk
+++ b/workflow/rules/plot.smk
@@ -174,7 +174,7 @@ rule plot_summary_stat_maps:
 use rule plot_summary_stat_maps as plot_summary_stat_maps_baseline with:
     input:
         script="workflow/scripts/plot_summary_stat_maps.mo.py",
-        verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_aggregated.nc",
+        verif_file=OUT_ROOT / "data/baselines/{baseline_id}/verif_spatial/{param}_{leadtime}.nc",
     output:
         OUT_ROOT / "results/{experiment}/metrics/spatial/baselines/{baseline_id}/{param}_{metric}_{region}_{season}_{leadtime}.png",
     log:
diff --git a/workflow/rules/verif.smk b/workflow/rules/verif.smk
index 13a2970f..d4d0dba6 100644
--- a/workflow/rules/verif.smk
+++ b/workflow/rules/verif.smk
@@ -96,42 +96,6 @@ rule verif_metrics:
             --output {output} > {log} 2>&1
         """
 
-rule verif_metrics_spatial:
-    input:
-        "src/verification/__init__.py",
-        "src/data_input/__init__.py",
-        script="workflow/scripts/verif_spatial.py",
-        inference_okfiles=lambda wc: expand(
-            rules.execute_inference.output.okfile,
-            init_time=_restrict_reftimes_to_hours(REFTIMES),
-            allow_missing=True,
-        ),
-        truth=config["truth"]["root"],
-    output:
-        OUT_ROOT / "data/runs/{run_id}/verif_spatial/{param}_{leadtime}.nc",
-    # wildcard_constraints:
-    # run_id="^" # to avoid ambiguitiy with run_baseline_verif
-    # TODO: implement logic to use experiment name instead of run_id as wildcard
-    params:
-        fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"),
-        fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
-        truth_label=config["truth"]["label"],
-    log:
-        OUT_ROOT / "logs/verif_metrics_spatial/{run_id}-{param}-{leadtime}.log",
-    resources:
-        cpus_per_task=24,
-        mem_mb=50_000,
-        runtime="60m",
-        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
-    shell:
-        """
-        uv run workflow/scripts/verif_spatial.py \
-            --run_root output/data/runs/{wildcards.run_id} \
-            --truth {input.truth} \
-            --step {wildcards.leadtime} \
-            --param {wildcards.param} \
-            --output {output} > {log} 2>&1
-        """
 
 
 def _restrict_reftimes_to_hours(reftimes, hours=None):
@@ -199,3 +163,76 @@ rule verif_metrics_plot:
         """
         uv run {input.script} {input.verif} --output_dir {output} > {log} 2>&1
         """
+
+rule verif_metrics_spatial:
+    input:
+        "src/verification/__init__.py",
+        "src/data_input/__init__.py",
+        script="workflow/scripts/verif_spatial.py",
+        inference_okfiles=lambda wc: expand(
+            rules.execute_inference.output.okfile,
+            init_time=_restrict_reftimes_to_hours(REFTIMES),
+            allow_missing=True,
+        ),
+        truth=config["truth"]["root"],
+    output:
+        OUT_ROOT / "data/runs/{run_id}/verif_spatial/{param}_{leadtime}.nc",
+    # wildcard_constraints:
+    # run_id="^" # to avoid ambiguitiy with run_baseline_verif
+    # TODO: implement logic to use experiment name instead of run_id as wildcard
+    params:
+        fcst_label=lambda wc: RUN_CONFIGS[wc.run_id].get("label"),
+        fcst_steps=lambda wc: RUN_CONFIGS[wc.run_id]["steps"],
+        truth_label=config["truth"]["label"],
+    log:
+        OUT_ROOT / "logs/verif_metrics_spatial/{run_id}-{param}-{leadtime}.log",
+    resources:
+        cpus_per_task=24,
+        mem_mb=50_000,
+        runtime="60m",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
+    shell:
+        """
+        uv run {input.script} \
+            --run_root output/data/runs/{wildcards.run_id} \
+            --truth {input.truth} \
+            --step {wildcards.leadtime} \
+            --param {wildcards.param} \
+            --output {output} > {log} 2>&1
+        """
+
+rule verif_metrics_spatial_baseline:
+    input:
+        script="workflow/scripts/verif_spatial_baseline.py",
+        baseline_zarrs=lambda wc: expand(
+            "{root}/FCST{year}.zarr",
+            root=BASELINE_CONFIGS[wc.baseline_id].get("root"),
+            year=sorted({t.strftime("%y") for t in REFTIMES}),
+        ),
+        truth=config["truth"]["root"],
+    output:
+        OUT_ROOT / "data/baselines/{baseline_id}/verif_spatial/{param}_{leadtime}.nc",
+    params:
+        baseline_label=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("label"),
+        baseline_steps=lambda wc: BASELINE_CONFIGS[wc.baseline_id]["steps"],
+        baseline_root=lambda wc: BASELINE_CONFIGS[wc.baseline_id].get("root"),
+        truth_label=config["truth"]["label"],
+    log:
+        OUT_ROOT / "logs/verif_metrics_spatial_baseline/{baseline_id}-{param}-{leadtime}.log",
+    resources:
+        cpus_per_task=24,
+        mem_mb=50_000,
+        runtime="60m",
+        slurm_extra="--exclude=nid001229,nid001225,nid001226,nid001227,nid001230"
+    shell:
+        """
+        uv run {input.script} \
+            --baseline_root {params.baseline_root} \
+            --truth {input.truth} \
+            --step {wildcards.leadtime} \
+            --param {wildcards.param} \
+            --label "{params.baseline_label}" \
+            --steps "{params.baseline_steps}" \
+            --truth_label "{params.truth_label}" \
+            --output {output} > {log} 2>&1
+        """

From c6a392d39387561837d8981fba94c2c1e3479a08 Mon Sep 17 00:00:00 2001
From: Louis-Frey <Louis.Frey@meteoswiss.ch>
Date: Thu, 2 Apr 2026 22:00:02 +0200
Subject: [PATCH 70/70] Cosmetic Change.

---
 workflow/Snakefile | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/workflow/Snakefile b/workflow/Snakefile
index 13ea8f05..49c1a427 100644
--- a/workflow/Snakefile
+++ b/workflow/Snakefile
@@ -134,21 +134,21 @@ rule spatial_all:
         expand(
             rules.plot_summary_stat_maps.output,
             run_id=collect_all_candidates(),
-            leadtime=config["spatial_verification"]["leadtimes"],
-            metric  =config["spatial_verification"]["metrics"],
-            param   =config["spatial_verification"]["params"],
-            region  =config["spatial_verification"]["regions"],
-            season  =config["spatial_verification"]["seasons"],
+            leadtime = config["spatial_verification"]["leadtimes"],
+            metric   = config["spatial_verification"]["metrics"],
+            param    = config["spatial_verification"]["params"],
+            region   = config["spatial_verification"]["regions"],
+            season   = config["spatial_verification"]["seasons"],
             experiment=EXPERIMENT_NAME,
         ),
         expand(
             rules.plot_summary_stat_maps_baseline.output,
             baseline_id=list(BASELINE_CONFIGS),
-            leadtime=config["spatial_verification"]["leadtimes"],
-            metric  =config["spatial_verification"]["metrics"],
-            param   =config["spatial_verification"]["params"],
-            region  =config["spatial_verification"]["regions"],
-            season  =config["spatial_verification"]["seasons"],
+            leadtime = config["spatial_verification"]["leadtimes"],
+            metric   = config["spatial_verification"]["metrics"],
+            param    = config["spatial_verification"]["params"],
+            region   = config["spatial_verification"]["regions"],
+            season   = config["spatial_verification"]["seasons"],
             experiment=EXPERIMENT_NAME,
         ),