Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions util/dataframe_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,17 @@ def compute_rel_diff_dataframe(df1, df2):
return out


def compute_division(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
def compute_division(df1: pd.DataFrame, df2) -> pd.DataFrame:
# avoid division by 0 and put nan instead
out = df1 / df2.replace({0: np.nan})
# put 0 if numerator is 0 as well
if np.isscalar(df2):
if df2 == 0:
return df1 * np.nan
out = df1 / df2
else:
out = df1 / df2.replace({0: np.nan})

out[df1 == 0] = 0
return out
return outt


def parse_probtest_stats(path, index_col=None):
Expand Down Expand Up @@ -82,10 +87,13 @@ def parse_probtest_fof(path):
and df_obs respectively.
"""
ds = xr.open_dataset(path)

ds_report, ds_obs = split_feedback_dataset(ds)

df_report, df_obs = (
pd.DataFrame(d.to_dataframe().reset_index()) for d in (ds_report, ds_obs)
)


return df_report, df_obs

Expand Down Expand Up @@ -475,11 +483,13 @@ def check_multiple_solutions_from_dict(dict_ref, dict_cur, rules, log_file_name)
cols_with_rules = [col for col in common_cols if col in rules_dict]
cols_without_rules = [col for col in common_cols if col not in rules_dict]



if cols_without_rules:
t, e = compare_var_and_attr_ds(
ref_df[list(cols_without_rules)].to_xarray(),
cur_df[list(cols_without_rules)].to_xarray(),
detailed_logger,
detailed_logger
)
if t != e:
return True
Expand Down
54 changes: 42 additions & 12 deletions util/fof_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def split_feedback_dataset(ds):

sort_keys_reports = ["lat", "lon", "statid", "time_nomi", "codetype"]
ds_report_sorted = ds_reports.sortby(sort_keys_reports)
print(ds_report_sorted["r_check"].values)

lbody = ds["l_body"].values

Expand Down Expand Up @@ -108,9 +109,12 @@ def fill_nans_for_float32(arr):
"""
To make sure nan values are recognised.
"""
if arr.dtype == np.float32 and np.isnan(arr).any():
return np.where(np.isnan(arr), -999999, arr)
return arr
if not np.issubdtype(arr.dtype, np.floating):
return arr

arr = arr.astype(np.float64, copy=False)

return np.where(np.isnan(arr), -999999.0, arr)


def clean_value(x):
Expand All @@ -119,7 +123,8 @@ def clean_value(x):
alignment when printing the value.
"""
if isinstance(x, bytes):
return x.decode("utf-8", errors="replace").rstrip(" '")
return x.decode().rstrip(" '")
# return x.decode("utf-8", errors="replace").rstrip(" '")
return str(x).rstrip(" '")


Expand Down Expand Up @@ -178,21 +183,46 @@ def compare_var_and_attr_ds(ds1, ds2, detailed_logger):
total_all, equal_all = 0, 0
list_to_skip = ["source", "i_body", "l_body", "veri_data"]

for var in sorted(set(ds1.data_vars).union(ds2.data_vars)):
for var in set(ds1.data_vars).union(ds2.data_vars):
if var in ds1.data_vars and var in ds2.data_vars and var not in list_to_skip:

total, equal = process_var(ds1, ds2, var, detailed_logger)
total_all += total
equal_all += equal

if var in ds1.attrs and var in ds2.attrs and var not in list_to_skip:
return total_all, equal_all

total, equal = process_var(ds1, ds2, var, detailed_logger)
total_all += total
equal_all += equal

return total_all, equal_all
def compare_arrays(arr1, arr2, var_name):
"""
Comparison of two arrays containing the values of the same variable.
If not the same, it tells you in percentage terms how different they are.
"""
total = arr1.size

if np.array_equal(arr1, arr2):
equal = total
diff = np.array([])

elif (
np.issubdtype(arr1.dtype, np.number)
and np.issubdtype(arr2.dtype, np.number)
and np.array_equal(arr1, arr2, equal_nan=True)
):
equal = total
diff = np.array([])

else:
mask_equal = arr1 == arr2
equal = mask_equal.sum()
percent = (equal / total) * 100
print(
f"Differences in '{var_name}': {percent:.2f}% equal. "
f"{total} total entries for this variable"
)
diff_idx = np.where(~mask_equal.ravel())[0]
diff = diff_idx

return total, equal, diff

def process_var(ds1, ds2, var, detailed_logger):
"""
Expand All @@ -202,9 +232,9 @@ def process_var(ds1, ds2, var, detailed_logger):
The function outputs the total number of elements and the
number of matching elements.
"""

arr1 = fill_nans_for_float32(ds1[var].values)
arr2 = fill_nans_for_float32(ds2[var].values)

if arr1.size == arr2.size:
t, e, diff = compare_arrays(arr1, arr2, var)
if diff.size != 0:
Expand Down
Loading