Skip to content

Commit 93833e3

Browse files
committed
Fix drop_conflicts to handle non-bool returns from __eq__
When merging attrs with drop_conflicts, objects whose __eq__ returns non-bool values (e.g., numpy arrays) would raise ValueError. Now these are caught and treated as non-equivalent, causing the attribute to be dropped rather than raising an error. Fixes regression from pydata#10726
1 parent 4394792 commit 93833e3

File tree

2 files changed

+143
-5
lines changed

2 files changed

+143
-5
lines changed

xarray/structure/merge.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -641,11 +641,24 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
641641
if key not in result and key not in dropped_keys
642642
}
643643
)
644-
result = {
645-
key: value
646-
for key, value in result.items()
647-
if key not in attrs or equivalent(attrs[key], value)
648-
}
644+
# Filter out attributes that have conflicts
645+
filtered_result = {}
646+
for key, value in result.items():
647+
if key not in attrs:
648+
# No conflict, keep the attribute
649+
filtered_result[key] = value
650+
else:
651+
# Check if values are equivalent
652+
try:
653+
if equivalent(attrs[key], value):
654+
# Values are equivalent, keep the attribute
655+
filtered_result[key] = value
656+
# else: Values differ, drop the attribute (don't add to filtered_result)
657+
except ValueError:
658+
# Likely an ambiguous truth value from numpy array comparison
659+
# Treat as non-equivalent and drop the attribute
660+
pass
661+
result = filtered_result
649662
dropped_keys |= {key for key in attrs if key not in result}
650663
return result
651664
elif combine_attrs == "identical":

xarray/tests/test_merge.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,131 @@ def test_merge_attrs_drop_conflicts(self):
235235
expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0})
236236
assert_identical(actual, expected)
237237

238+
def test_merge_attrs_drop_conflicts_non_bool_eq(self):
239+
"""Test drop_conflicts behavior when __eq__ returns non-bool values.
240+
241+
When comparing attribute values, the _equivalent_drop_conflicts() function
242+
uses == which can return non-bool values. The new behavior treats ambiguous
243+
or falsy equality results as non-equivalent, dropping the attribute rather
244+
than raising an error.
245+
"""
246+
import numpy as np
247+
248+
# Test with numpy arrays (which return arrays from ==)
249+
arr1 = np.array([1, 2, 3])
250+
arr2 = np.array([1, 2, 3])
251+
arr3 = np.array([4, 5, 6])
252+
253+
ds1 = xr.Dataset(attrs={"arr": arr1, "scalar": 1})
254+
ds2 = xr.Dataset(attrs={"arr": arr2, "scalar": 1}) # Same array values
255+
ds3 = xr.Dataset(attrs={"arr": arr3, "other": 2}) # Different array values
256+
257+
# Arrays are considered equivalent if they have the same values
258+
actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
259+
assert "arr" in actual.attrs # Should keep the array since they're equivalent
260+
assert actual.attrs["scalar"] == 1
261+
262+
# Different arrays cause the attribute to be dropped
263+
actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
264+
assert "arr" not in actual.attrs # Should drop due to conflict
265+
assert "other" in actual.attrs
266+
267+
# Test with custom objects that return non-bool from __eq__
268+
class CustomEq:
269+
"""Object whose __eq__ returns a non-bool value."""
270+
271+
def __init__(self, value):
272+
self.value = value
273+
274+
def __eq__(self, other):
275+
if not isinstance(other, CustomEq):
276+
return False
277+
# Return a numpy array (truthy if all elements are non-zero)
278+
return np.array([self.value == other.value])
279+
280+
def __repr__(self):
281+
return f"CustomEq({self.value})"
282+
283+
obj1 = CustomEq(42)
284+
obj2 = CustomEq(42) # Same value
285+
obj3 = CustomEq(99) # Different value
286+
287+
ds4 = xr.Dataset(attrs={"custom": obj1, "x": 1})
288+
ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1})
289+
ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2})
290+
291+
# Objects with same value (returning truthy array [True])
292+
actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
293+
assert "custom" in actual.attrs
294+
assert actual.attrs["x"] == 1
295+
296+
# Objects with different values (returning falsy array [False])
297+
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
298+
assert "custom" not in actual.attrs # Dropped due to conflict
299+
assert actual.attrs["x"] == 1
300+
assert actual.attrs["y"] == 2
301+
302+
# Test edge case: object whose __eq__ returns empty array (ambiguous truth value)
303+
class EmptyArrayEq:
304+
def __eq__(self, other):
305+
if not isinstance(other, EmptyArrayEq):
306+
return False
307+
return np.array([]) # Empty array has ambiguous truth value
308+
309+
def __repr__(self):
310+
return "EmptyArrayEq()"
311+
312+
empty_obj1 = EmptyArrayEq()
313+
empty_obj2 = EmptyArrayEq()
314+
315+
ds7 = xr.Dataset(attrs={"empty": empty_obj1})
316+
ds8 = xr.Dataset(attrs={"empty": empty_obj2})
317+
318+
# With new behavior: ambiguous truth values are treated as non-equivalent
319+
# So the attribute is dropped instead of raising an error
320+
actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
321+
assert "empty" not in actual.attrs # Dropped due to ambiguous comparison
322+
323+
# Test with object that returns multi-element array (also ambiguous)
324+
class MultiArrayEq:
325+
def __eq__(self, other):
326+
if not isinstance(other, MultiArrayEq):
327+
return False
328+
return np.array([True, False]) # Multi-element array is ambiguous
329+
330+
def __repr__(self):
331+
return "MultiArrayEq()"
332+
333+
multi_obj1 = MultiArrayEq()
334+
multi_obj2 = MultiArrayEq()
335+
336+
ds9 = xr.Dataset(attrs={"multi": multi_obj1})
337+
ds10 = xr.Dataset(attrs={"multi": multi_obj2})
338+
339+
# With new behavior: ambiguous arrays are treated as non-equivalent
340+
actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
341+
assert "multi" not in actual.attrs # Dropped due to ambiguous comparison
342+
343+
# Test with all-True multi-element array (unambiguous truthy)
344+
class AllTrueArrayEq:
345+
def __eq__(self, other):
346+
if not isinstance(other, AllTrueArrayEq):
347+
return False
348+
return np.array([True, True, True]) # All True, but still multi-element
349+
350+
def __repr__(self):
351+
return "AllTrueArrayEq()"
352+
353+
alltrue1 = AllTrueArrayEq()
354+
alltrue2 = AllTrueArrayEq()
355+
356+
ds11 = xr.Dataset(attrs={"alltrue": alltrue1})
357+
ds12 = xr.Dataset(attrs={"alltrue": alltrue2})
358+
359+
# Multi-element arrays are ambiguous even if all True
360+
actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
361+
assert "alltrue" not in actual.attrs # Dropped due to ambiguous comparison
362+
238363
def test_merge_attrs_no_conflicts_compat_minimal(self):
239364
"""make sure compat="minimal" does not silence errors"""
240365
ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})

0 commit comments

Comments
 (0)