From d4799ee2a38e0f475d67b43134965df7df9d626a Mon Sep 17 00:00:00 2001
From: Chiara Ghielmini <Chiara.Ghielmini@meteoswiss.ch>
Date: Wed, 25 Mar 2026 16:49:02 +0100
Subject: [PATCH 1/6] add reports again

---
 util/dataframe_ops.py |  1 +
 util/fof_utils.py     | 23 ++++++++++++++---------
 2 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
index 7720ab8..e4f577f 100644
--- a/util/dataframe_ops.py
+++ b/util/dataframe_ops.py
@@ -480,6 +480,7 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
                 ref_df[list(cols_without_rules)].to_xarray(),
                 cur_df[list(cols_without_rules)].to_xarray(),
                 detailed_logger,
+                key
             )
             if t != e:
                 return True
diff --git a/util/fof_utils.py b/util/fof_utils.py
index f9e544d..fabd6a3 100644
--- a/util/fof_utils.py
+++ b/util/fof_utils.py
@@ -169,7 +169,7 @@ def write_different_size_log(var, size1, size2, detailed_logger):
     )
 
 
-def compare_var_and_attr_ds(ds1, ds2, detailed_logger):
+def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
     """
     Variable by variable and attribute by attribute,
     comparison of the two datasets.
@@ -178,23 +178,23 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger):
     total_all, equal_all = 0, 0
     list_to_skip = ["source", "i_body", "l_body", "veri_data"]
 
-    for var in sorted(set(ds1.data_vars).union(ds2.data_vars)):
-        if var in ds1.data_vars and var in ds2.data_vars and var not in list_to_skip:
+    for var in set(ds1.data_vars).intersection(ds2.data_vars):
+        if key == "reports" and var not in list_to_skip:
 
-            total, equal = process_var(ds1, ds2, var, detailed_logger)
+            total, equal = process_var(ds1, ds2, var, detailed_logger, prova="vars")
             total_all += total
             equal_all += equal
 
-        if var in ds1.attrs and var in ds2.attrs and var not in list_to_skip:
+        if key == "observations" and var not in list_to_skip:
 
-            total, equal = process_var(ds1, ds2, var, detailed_logger)
+            total, equal = process_var(ds1, ds2, var, detailed_logger, prova="attrs")
             total_all += total
             equal_all += equal
 
     return total_all, equal_all
 
 
-def process_var(ds1, ds2, var, detailed_logger):
+def process_var(ds1, ds2, var, detailed_logger, prova=None):
     """
     This function first checks whether two arrays have the same size.
     If they do, their values are compared.
@@ -203,8 +203,13 @@ def process_var(ds1, ds2, var, detailed_logger):
     number of matching elements.
     """
 
-    arr1 = fill_nans_for_float32(ds1[var].values)
-    arr2 = fill_nans_for_float32(ds2[var].values)
+    if prova == "attrs":
+        arr1 = np.array(ds1[var], dtype=object)
+        arr2 = np.array(ds2[var], dtype=object)
+    if prova == "vars":
+        arr1 = fill_nans_for_float32(ds1[var].values)
+        arr2 = fill_nans_for_float32(ds2[var].values)
+
     if arr1.size == arr2.size:
         t, e, diff = compare_arrays(arr1, arr2, var)
         if diff.size != 0:

From 255dd54d631ac46d07a3444bd1694455782df985 Mon Sep 17 00:00:00 2001
From: Chiara Ghielmini <Chiara.Ghielmini@meteoswiss.ch>
Date: Mon, 30 Mar 2026 11:31:30 +0200
Subject: [PATCH 2/6] restore old version

---
 util/fof_utils.py | 62 ++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 6 deletions(-)

diff --git a/util/fof_utils.py b/util/fof_utils.py
index fabd6a3..706c7e6 100644
--- a/util/fof_utils.py
+++ b/util/fof_utils.py
@@ -181,19 +181,69 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
     for var in set(ds1.data_vars).intersection(ds2.data_vars):
         if key == "reports" and var not in list_to_skip:
 
-            total, equal = process_var(ds1, ds2, var, detailed_logger, prova="vars")
-            total_all += total
-            equal_all += equal
+            arr1 = fill_nans_for_float32(ds1[var].values)
+            arr2 = fill_nans_for_float32(ds2[var].values)
+
+            if arr1.size == arr2.size:
+                t, e, diff = compare_arrays(arr1, arr2, var)
+            
+            else:
+                t, e = max(arr1.size, arr2.size), 0
+                write_different_size(var, arr1.size, arr2.size, detailed_logger)
+
+            #total, equal = process_var(ds1, ds2, var, detailed_logger, prova="vars")
+            total_all += t
+            equal_all += e
 
         if key == "observations" and var not in list_to_skip:
 
-            total, equal = process_var(ds1, ds2, var, detailed_logger, prova="attrs")
-            total_all += total
-            equal_all += equal
+            arr1 = np.array(ds1.attrs[var], dtype=object)
+            arr2 = np.array(ds2.attrs[var], dtype=object)
+            if arr1.size == arr2.size:
+                t, e, diff = compare_arrays(arr1, arr2, var)
+
+            else:
+                t, e = max(arr1.size, arr2.size), 0
+                write_different_size_log(var, arr1.size, arr2.size, detailed_logger)
+
+            total_all += t
+            equal_all += e
 
     return total_all, equal_all
 
 
+def compare_arrays(arr1, arr2, var_name):
+    """
+    Comparison of two arrays containing the values of the same variable.
+    If not the same, it tells you in percentage terms how different they are.
+    """
+    total = arr1.size
+
+    if np.array_equal(arr1, arr2):
+        equal = total
+        diff = np.array([])
+
+    elif (
+        np.issubdtype(arr1.dtype, np.number)
+        and np.issubdtype(arr2.dtype, np.number)
+        and np.array_equal(arr1, arr2, equal_nan=True)
+    ):
+        equal = total
+        diff = np.array([])
+
+    else:
+        mask_equal = arr1 == arr2
+        equal = mask_equal.sum()
+        percent = (equal / total) * 100
+        print(
+            f"Differences in '{var_name}': {percent:.2f}% equal. "
+            f"{total} total entries for this variable"
+        )
+        diff_idx = np.where(~mask_equal.ravel())[0]
+        diff = diff_idx
+
+    return total, equal, diff
+
 def process_var(ds1, ds2, var, detailed_logger, prova=None):
     """
     This function first checks whether two arrays have the same size.

From 9ebe8d1593921646a2c656ac5e2df51059c30559 Mon Sep 17 00:00:00 2001
From: Chiara Ghielmini <Chiara.Ghielmini@meteoswiss.ch>
Date: Mon, 30 Mar 2026 13:38:49 +0200
Subject: [PATCH 3/6] invert repo obs

---
 util/fof_utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/fof_utils.py b/util/fof_utils.py
index 706c7e6..0f097fe 100644
--- a/util/fof_utils.py
+++ b/util/fof_utils.py
@@ -179,7 +179,7 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
     list_to_skip = ["source", "i_body", "l_body", "veri_data"]
 
     for var in set(ds1.data_vars).intersection(ds2.data_vars):
-        if key == "reports" and var not in list_to_skip:
+        if key == "observations" and var not in list_to_skip:
 
             arr1 = fill_nans_for_float32(ds1[var].values)
             arr2 = fill_nans_for_float32(ds2[var].values)
@@ -195,7 +195,7 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
             total_all += t
             equal_all += e
 
-        if key == "observations" and var not in list_to_skip:
+        if key == "reports" and var not in list_to_skip:
 
             arr1 = np.array(ds1.attrs[var], dtype=object)
             arr2 = np.array(ds2.attrs[var], dtype=object)

From 2037afab62023ee59990b92d4c8648351d00e44f Mon Sep 17 00:00:00 2001
From: Chiara Ghielmini <Chiara.Ghielmini@meteoswiss.ch>
Date: Mon, 30 Mar 2026 13:47:34 +0200
Subject: [PATCH 4/6] rr

---
 util/fof_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/util/fof_utils.py b/util/fof_utils.py
index 0f097fe..faae563 100644
--- a/util/fof_utils.py
+++ b/util/fof_utils.py
@@ -179,7 +179,7 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
     list_to_skip = ["source", "i_body", "l_body", "veri_data"]
 
     for var in set(ds1.data_vars).intersection(ds2.data_vars):
-        if key == "observations" and var not in list_to_skip:
+        if key == "reports" and var not in list_to_skip:
 
             arr1 = fill_nans_for_float32(ds1[var].values)
             arr2 = fill_nans_for_float32(ds2[var].values)
@@ -195,10 +195,10 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
             total_all += t
             equal_all += e
 
-        if key == "reports" and var not in list_to_skip:
+        if key == "observations" and var not in list_to_skip:
 
-            arr1 = np.array(ds1.attrs[var], dtype=object)
-            arr2 = np.array(ds2.attrs[var], dtype=object)
+            arr1 = np.array(ds1[var], dtype=object)
+            arr2 = np.array(ds2[var], dtype=object)
             if arr1.size == arr2.size:
                 t, e, diff = compare_arrays(arr1, arr2, var)
 

From d66f51838a4d0b57a5afe39b69e51499a574377c Mon Sep 17 00:00:00 2001
From: Chiara Ghielmini <Chiara.Ghielmini@meteoswiss.ch>
Date: Tue, 31 Mar 2026 10:30:32 +0200
Subject: [PATCH 5/6] f

---
 util/dataframe_ops.py |  8 +++++--
 util/fof_utils.py     | 49 ++++++++++---------------------------------
 2 files changed, 17 insertions(+), 40 deletions(-)

diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
index e4f577f..ec433b3 100644
--- a/util/dataframe_ops.py
+++ b/util/dataframe_ops.py
@@ -82,10 +82,13 @@ def parse_probtest_fof(path):
     and df_obs respectively.
     """
     ds = xr.open_dataset(path)
+    
     ds_report, ds_obs = split_feedback_dataset(ds)
+  
     df_report, df_obs = (
         pd.DataFrame(d.to_dataframe().reset_index()) for d in (ds_report, ds_obs)
     )
+  
 
     return df_report, df_obs
 
@@ -475,12 +478,13 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
         cols_with_rules = [col for col in common_cols if col in rules_dict]
         cols_without_rules = [col for col in common_cols if col not in rules_dict]
 
+        
+
         if cols_without_rules:
             t, e = compare_var_and_attr_ds(
                 ref_df[list(cols_without_rules)].to_xarray(),
                 cur_df[list(cols_without_rules)].to_xarray(),
-                detailed_logger,
-                key
+                detailed_logger
             )
             if t != e:
                 return True
diff --git a/util/fof_utils.py b/util/fof_utils.py
index faae563..db51c54 100644
--- a/util/fof_utils.py
+++ b/util/fof_utils.py
@@ -50,6 +50,7 @@ def split_feedback_dataset(ds):
 
     sort_keys_reports = ["lat", "lon", "statid", "time_nomi", "codetype"]
     ds_report_sorted = ds_reports.sortby(sort_keys_reports)
+    print(ds_report_sorted["r_check"].values)
 
     lbody = ds["l_body"].values
 
@@ -119,7 +120,8 @@ def clean_value(x):
     alignment when printing the value.
     """
     if isinstance(x, bytes):
-        return x.decode("utf-8", errors="replace").rstrip(" '")
+        return x.decode().rstrip(" '")
+       # return x.decode("utf-8", errors="replace").rstrip(" '")
     return str(x).rstrip(" '")
 
 
@@ -169,7 +171,7 @@ def write_different_size_log(var, size1, size2, detailed_logger):
     )
 
 
-def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
+def compare_var_and_attr_ds(ds1, ds2, detailed_logger):
     """
     Variable by variable and attribute by attribute,
     comparison of the two datasets.
@@ -178,36 +180,12 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger, key):
     total_all, equal_all = 0, 0
     list_to_skip = ["source", "i_body", "l_body", "veri_data"]
 
-    for var in set(ds1.data_vars).intersection(ds2.data_vars):
-        if key == "reports" and var not in list_to_skip:
+    for var in set(ds1.data_vars).union(ds2.data_vars):
+        if var in ds1.data_vars and var in ds2.data_vars and var not in list_to_skip:
 
-            arr1 = fill_nans_for_float32(ds1[var].values)
-            arr2 = fill_nans_for_float32(ds2[var].values)
-
-            if arr1.size == arr2.size:
-                t, e, diff = compare_arrays(arr1, arr2, var)
-            
-            else:
-                t, e = max(arr1.size, arr2.size), 0
-                write_different_size(var, arr1.size, arr2.size, detailed_logger)
-
-            #total, equal = process_var(ds1, ds2, var, detailed_logger, prova="vars")
-            total_all += t
-            equal_all += e
-
-        if key == "observations" and var not in list_to_skip:
-
-            arr1 = np.array(ds1[var], dtype=object)
-            arr2 = np.array(ds2[var], dtype=object)
-            if arr1.size == arr2.size:
-                t, e, diff = compare_arrays(arr1, arr2, var)
-
-            else:
-                t, e = max(arr1.size, arr2.size), 0
-                write_different_size_log(var, arr1.size, arr2.size, detailed_logger)
-
-            total_all += t
-            equal_all += e
+            total, equal = process_var(ds1, ds2, var, detailed_logger)
+            total_all += total
+            equal_all += equal
 
     return total_all, equal_all
 
@@ -252,13 +230,8 @@ def process_var(ds1, ds2, var, detailed_logger, prova=None):
     The function outputs the total number of elements and the
     number of matching elements.
     """
-
-    if prova == "attrs":
-        arr1 = np.array(ds1[var], dtype=object)
-        arr2 = np.array(ds2[var], dtype=object)
-    if prova == "vars":
-        arr1 = fill_nans_for_float32(ds1[var].values)
-        arr2 = fill_nans_for_float32(ds2[var].values)
+    arr1 = fill_nans_for_float32(ds1[var].values)
+    arr2 = fill_nans_for_float32(ds2[var].values)
 
     if arr1.size == arr2.size:
         t, e, diff = compare_arrays(arr1, arr2, var)

From 5a24388e497b685e8e14b81fe9e6e3b61e647933 Mon Sep 17 00:00:00 2001
From: Chiara Ghielmini <Chiara.Ghielmini@meteoswiss.ch>
Date: Thu, 2 Apr 2026 10:32:34 +0200
Subject: [PATCH 6/6] allow 64

---
 util/dataframe_ops.py | 13 +++++++++----
 util/fof_utils.py     | 12 +++++++-----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
index ec433b3..dc08dfb 100644
--- a/util/dataframe_ops.py
+++ b/util/dataframe_ops.py
@@ -47,12 +47,17 @@ def compute_rel_diff_dataframe(df1, df2):
     return out
 
 
-def compute_division(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
+def compute_division(df1: pd.DataFrame, df2) -> pd.DataFrame:
     # avoid division by 0 and put nan instead
-    out = df1 / df2.replace({0: np.nan})
-    # put 0 if numerator is 0 as well
+    if np.isscalar(df2):
+        if df2 == 0:
+            return df1 * np.nan
+        out = df1 / df2
+    else:
+        out = df1 / df2.replace({0: np.nan})
+    
     out[df1 == 0] = 0
-    return out
+    return outt
 
 
 def parse_probtest_stats(path, index_col=None):
diff --git a/util/fof_utils.py b/util/fof_utils.py
index db51c54..42c0e90 100644
--- a/util/fof_utils.py
+++ b/util/fof_utils.py
@@ -109,9 +109,12 @@ def fill_nans_for_float32(arr):
     """
     To make sure nan values are recognised.
     """
-    if arr.dtype == np.float32 and np.isnan(arr).any():
-        return np.where(np.isnan(arr), -999999, arr)
-    return arr
+    if not np.issubdtype(arr.dtype, np.floating):
+        return arr
+
+    arr = arr.astype(np.float64, copy=False)
+
+    return np.where(np.isnan(arr), -999999.0, arr)
 
 
 def clean_value(x):
@@ -182,7 +185,6 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger):
 
     for var in set(ds1.data_vars).union(ds2.data_vars):
         if var in ds1.data_vars and var in ds2.data_vars and var not in list_to_skip:
-
             total, equal = process_var(ds1, ds2, var, detailed_logger)
             total_all += total
             equal_all += equal
@@ -222,7 +224,7 @@ def compare_arrays(arr1, arr2, var_name):
 
     return total, equal, diff
 
-def process_var(ds1, ds2, var, detailed_logger, prova=None):
+def process_var(ds1, ds2, var, detailed_logger):
     """
     This function first checks whether two arrays have the same size.
     If they do, their values are compared.