MeteoSwiss · cghielmini · Mar 25, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/util/dataframe_ops.py b/util/dataframe_ops.py
@@ -47,12 +47,17 @@ def compute_rel_diff_dataframe(df1, df2):
     return out
 
 
-def compute_division(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
+def compute_division(df1: pd.DataFrame, df2) -> pd.DataFrame:
     # avoid division by 0 and put nan instead
-    out = df1 / df2.replace({0: np.nan})
-    # put 0 if numerator is 0 as well
+    if np.isscalar(df2):
+        if df2 == 0:
+            return df1 * np.nan
+        out = df1 / df2
+    else:
+        out = df1 / df2.replace({0: np.nan})
+
     out[df1 == 0] = 0
-    return out
+    return outt
 
 
 def parse_probtest_stats(path, index_col=None):
@@ -82,10 +87,13 @@ def parse_probtest_fof(path):
     and df_obs respectively.
     """
     ds = xr.open_dataset(path)
+
     ds_report, ds_obs = split_feedback_dataset(ds)
+
     df_report, df_obs = (
         pd.DataFrame(d.to_dataframe().reset_index()) for d in (ds_report, ds_obs)
     )
+
 
     return df_report, df_obs
 
@@ -475,11 +483,13 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
         cols_with_rules = [col for col in common_cols if col in rules_dict]
         cols_without_rules = [col for col in common_cols if col not in rules_dict]
 
+
+
         if cols_without_rules:
             t, e = compare_var_and_attr_ds(
                 ref_df[list(cols_without_rules)].to_xarray(),
                 cur_df[list(cols_without_rules)].to_xarray(),
-                detailed_logger,
+                detailed_logger
             )
             if t != e:
                 return True

diff --git a/util/fof_utils.py b/util/fof_utils.py
@@ -50,6 +50,7 @@ def split_feedback_dataset(ds):
 
     sort_keys_reports = ["lat", "lon", "statid", "time_nomi", "codetype"]
     ds_report_sorted = ds_reports.sortby(sort_keys_reports)
+    print(ds_report_sorted["r_check"].values)
 
     lbody = ds["l_body"].values
 
@@ -108,9 +109,12 @@ def fill_nans_for_float32(arr):
     """
     To make sure nan values are recognised.
     """
-    if arr.dtype == np.float32 and np.isnan(arr).any():
-        return np.where(np.isnan(arr), -999999, arr)
-    return arr
+    if not np.issubdtype(arr.dtype, np.floating):
+        return arr
+
+    arr = arr.astype(np.float64, copy=False)
+
+    return np.where(np.isnan(arr), -999999.0, arr)
 
 
 def clean_value(x):
@@ -119,7 +123,8 @@ def clean_value(x):
     alignment when printing the value.
     """
     if isinstance(x, bytes):
-        return x.decode("utf-8", errors="replace").rstrip(" '")
+        return x.decode().rstrip(" '")
+       # return x.decode("utf-8", errors="replace").rstrip(" '")
     return str(x).rstrip(" '")
 
 
@@ -178,21 +183,46 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger):
     total_all, equal_all = 0, 0
     list_to_skip = ["source", "i_body", "l_body", "veri_data"]
 
-    for var in sorted(set(ds1.data_vars).union(ds2.data_vars)):
+    for var in set(ds1.data_vars).union(ds2.data_vars):
         if var in ds1.data_vars and var in ds2.data_vars and var not in list_to_skip:
-
             total, equal = process_var(ds1, ds2, var, detailed_logger)
             total_all += total
             equal_all += equal
 
-        if var in ds1.attrs and var in ds2.attrs and var not in list_to_skip:
+    return total_all, equal_all
 
-            total, equal = process_var(ds1, ds2, var, detailed_logger)
-            total_all += total
-            equal_all += equal
 
-    return total_all, equal_all
+def compare_arrays(arr1, arr2, var_name):
+    """
+    Comparison of two arrays containing the values of the same variable.
+    If not the same, it tells you in percentage terms how different they are.
+    """
+    total = arr1.size
 
+    if np.array_equal(arr1, arr2):
+        equal = total
+        diff = np.array([])
+
+    elif (
+        np.issubdtype(arr1.dtype, np.number)
+        and np.issubdtype(arr2.dtype, np.number)
+        and np.array_equal(arr1, arr2, equal_nan=True)
+    ):
+        equal = total
+        diff = np.array([])
+
+    else:
+        mask_equal = arr1 == arr2
+        equal = mask_equal.sum()
+        percent = (equal / total) * 100
+        print(
+            f"Differences in '{var_name}': {percent:.2f}% equal. "
+            f"{total} total entries for this variable"
+        )
+        diff_idx = np.where(~mask_equal.ravel())[0]
+        diff = diff_idx
+
+    return total, equal, diff
 
 def process_var(ds1, ds2, var, detailed_logger):
     """
@@ -202,9 +232,9 @@ def process_var(ds1, ds2, var, detailed_logger):
     The function outputs the total number of elements and the
     number of matching elements.
     """
-
     arr1 = fill_nans_for_float32(ds1[var].values)
     arr2 = fill_nans_for_float32(ds2[var].values)
+
     if arr1.size == arr2.size:
         t, e, diff = compare_arrays(arr1, arr2, var)
         if diff.size != 0: