diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1aef6b9db3d..57031ec8d7d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,92 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- **All xarray operations now preserve attributes by default** (:issue:`3891`, :issue:`2582`). + Previously, operations would drop attributes unless explicitly told to preserve them via ``keep_attrs=True``. + Additionally, when attributes are preserved in binary operations, they now combine attributes from both + operands using ``drop_conflicts`` (keeping matching attributes, dropping conflicts), instead of keeping + only the left operand's attributes. + + **What changed:** + + .. code-block:: python + + # Before (xarray <2025.09.1): + data = xr.DataArray([1, 2, 3], attrs={"units": "meters", "long_name": "height"}) + result = data.mean() + result.attrs # {} - Attributes lost! + + # After (xarray ≥2025.09.1): + data = xr.DataArray([1, 2, 3], attrs={"units": "meters", "long_name": "height"}) + result = data.mean() + result.attrs # {"units": "meters", "long_name": "height"} - Attributes preserved! + + **Affected operations include:** + + *Computational operations:* + + - Reductions: ``mean()``, ``sum()``, ``std()``, ``var()``, ``min()``, ``max()``, ``median()``, ``quantile()``, etc. + - Rolling windows: ``rolling().mean()``, ``rolling().sum()``, etc. + - Groupby: ``groupby().mean()``, ``groupby().sum()``, etc. + - Resampling: ``resample().mean()``, etc. + - Weighted: ``weighted().mean()``, ``weighted().sum()``, etc. + - ``apply_ufunc()`` and NumPy universal functions + + *Binary operations:* + + - Arithmetic: ``+``, ``-``, ``*``, ``/``, ``**``, ``//``, ``%`` (combines attributes using ``drop_conflicts``) + - Comparisons: ``<``, ``>``, ``==``, ``!=``, ``<=``, ``>=`` (combines attributes using ``drop_conflicts``) + - With scalars: ``data * 2``, ``10 - data`` (preserves data's attributes) + + *Data manipulation:* + + - Missing data: ``fillna()``, ``dropna()``, ``interpolate_na()``, ``ffill()``, ``bfill()`` + - Indexing/selection: ``isel()``, ``sel()``, ``where()``, ``clip()`` + - Alignment: ``interp()``, ``reindex()``, ``align()`` + - Transformations: ``map()``, ``pipe()``, ``assign()``, ``assign_coords()`` + - Shape operations: ``expand_dims()``, ``squeeze()``, ``transpose()``, ``stack()``, ``unstack()`` + + **Binary operations - combines attributes with ``drop_conflicts``:** + + .. code-block:: python + + a = xr.DataArray([1, 2], attrs={"units": "m", "source": "sensor_a"}) + b = xr.DataArray([3, 4], attrs={"units": "m", "source": "sensor_b"}) + (a + b).attrs # {"units": "m"} - Matching values kept, conflicts dropped + (b + a).attrs # {"units": "m"} - Order doesn't matter for drop_conflicts + + **How to restore previous behavior:** + + 1. **Globally for your entire script:** + + .. code-block:: python + + import xarray as xr + + xr.set_options(keep_attrs=False) # Affects all subsequent operations + + 2. **For specific operations:** + + .. code-block:: python + + result = data.mean(dim="time", keep_attrs=False) + + 3. **For code blocks:** + + .. code-block:: python + + with xr.set_options(keep_attrs=False): + # All operations in this block drop attrs + result = data1 + data2 + + 4. **Remove attributes after operations:** + + .. code-block:: python + + result = data.mean().drop_attrs() + + By `Maximilian Roos `_. + - :py:meth:`Dataset.update` now returns ``None``, instead of the updated dataset. This completes the deprecation cycle started in version 0.17. The method still updates the dataset in-place. (:issue:`10167`) diff --git a/xarray/computation/apply_ufunc.py b/xarray/computation/apply_ufunc.py index e205059841d..a3900cf2715 100644 --- a/xarray/computation/apply_ufunc.py +++ b/xarray/computation/apply_ufunc.py @@ -1214,7 +1214,7 @@ def apply_ufunc( func = functools.partial(func, **kwargs) if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) if isinstance(keep_attrs, bool): keep_attrs = "override" if keep_attrs else "drop" diff --git a/xarray/computation/computation.py b/xarray/computation/computation.py index 14b1ae6e240..c2fbd4171b0 100644 --- a/xarray/computation/computation.py +++ b/xarray/computation/computation.py @@ -701,7 +701,7 @@ def where(cond, x, y, keep_attrs=None): * lon (lon) int64 24B 10 11 12 >>> xr.where(y.lat < 1, y, -1) - Size: 72B + Size: 72B array([[ 0. , 0.1, 0.2], [-1. , -1. , -1. ], [-1. , -1. , -1. ]]) @@ -726,7 +726,7 @@ def where(cond, x, y, keep_attrs=None): from xarray.core.dataset import Dataset if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) # alignment for three arguments is complicated, so don't support it yet from xarray.computation.apply_ufunc import apply_ufunc diff --git a/xarray/computation/weighted.py b/xarray/computation/weighted.py index 4c7711d0e2b..b311290aabf 100644 --- a/xarray/computation/weighted.py +++ b/xarray/computation/weighted.py @@ -448,7 +448,6 @@ def _weighted_quantile_1d( result = result.transpose("quantile", ...) result = result.assign_coords(quantile=q).squeeze() - return result def _implementation(self, func, dim, **kwargs): @@ -551,7 +550,6 @@ def _implementation(self, func, dim, **kwargs) -> DataArray: class DatasetWeighted(Weighted["Dataset"]): def _implementation(self, func, dim, **kwargs) -> Dataset: self._check_dim(dim) - return self.obj.map(func, dim=dim, **kwargs) diff --git a/xarray/core/common.py b/xarray/core/common.py index b4a2dc1104f..b064290c41e 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1314,7 +1314,7 @@ def isnull(self, keep_attrs: bool | None = None) -> Self: from xarray.computation.apply_ufunc import apply_ufunc if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) return apply_ufunc( duck_array_ops.isnull, @@ -1357,7 +1357,7 @@ def notnull(self, keep_attrs: bool | None = None) -> Self: from xarray.computation.apply_ufunc import apply_ufunc if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) return apply_ufunc( duck_array_ops.notnull, diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a9326ccc76f..20a8b0d4951 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3889,8 +3889,8 @@ def reduce( supplied, then the reduction is calculated over the flattened array (by calling `f(x)` without an axis argument). keep_attrs : bool or None, optional - If True, the variable's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If True (default), the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new object will be returned without attributes. keepdims : bool, default: False If True, the dimensions which are reduced are left in the result diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2310d391e67..1c1a824bc42 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6774,8 +6774,8 @@ def reduce( Dimension(s) over which to apply `func`. By default `func` is applied over all dimensions. keep_attrs : bool or None, optional - If True, the dataset's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If True (default), the dataset's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new object will be returned without attributes. keepdims : bool, default: False If True, the dimensions which are reduced are left in the result @@ -6833,7 +6833,7 @@ def reduce( dims = parse_dims_as_set(dim, set(self._dims.keys())) if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) variables: dict[Hashable, Variable] = {} for name, var in self._variables.items(): @@ -6924,7 +6924,7 @@ def map( bar (x) float64 16B 1.0 2.0 """ if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) variables = { k: maybe_wrap_array(v, func(v, *args, **kwargs)) for k, v in self.data_vars.items() @@ -6937,11 +6937,14 @@ def map( if keep_attrs: for k, v in variables.items(): v._copy_attrs_from(self.data_vars[k]) - for k, v in coords.items(): - if k not in self.coords: - continue - v._copy_attrs_from(self.coords[k]) + if k in self.coords: + v._copy_attrs_from(self.coords[k]) + else: + for v in variables.values(): + v.attrs = {} + for v in coords.values(): + v.attrs = {} attrs = self.attrs if keep_attrs else None return type(self)(variables, coords=coords, attrs=attrs) @@ -7669,9 +7672,14 @@ def _binary_op(self, other, f, reflexive=False, join=None) -> Dataset: self, other = align(self, other, join=align_type, copy=False) g = f if not reflexive else lambda x, y: f(y, x) ds = self._calculate_binary_op(g, other, join=align_type) - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) if keep_attrs: - ds.attrs = self.attrs + # Combine attributes from both operands, dropping conflicts + from xarray.structure.merge import merge_attrs + + self_attrs = self.attrs + other_attrs = getattr(other, "attrs", {}) + ds.attrs = merge_attrs([self_attrs, other_attrs], "drop_conflicts") return ds def _inplace_binary_op(self, other, f) -> Self: @@ -8265,7 +8273,7 @@ def quantile( coord_names = {k for k in self.coords if k in variables} indexes = {k: v for k, v in self._indexes.items() if k in variables} if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) attrs = self.attrs if keep_attrs else None new = self._replace_with_new_dims( variables, coord_names=coord_names, attrs=attrs, indexes=indexes @@ -8327,7 +8335,7 @@ def rank( coord_names = set(self.coords) if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) attrs = self.attrs if keep_attrs else None return self._replace(variables, coord_names, attrs=attrs) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 99441b1a8d4..bb5905e31e4 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -428,7 +428,7 @@ def map( # type: ignore[override] # Copied from xarray.Dataset so as not to call type(self), which causes problems (see https://github.com/xarray-contrib/datatree/issues/188). # TODO Refactor xarray upstream to avoid needing to overwrite this. if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) variables = { k: maybe_wrap_array(v, func(v, *args, **kwargs)) for k, v in self.data_vars.items() diff --git a/xarray/core/variable.py b/xarray/core/variable.py index b9cea53f53d..14b97ada05d 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1741,8 +1741,8 @@ def reduce( # type: ignore[override] the reduction is calculated over the flattened array (by calling `func(x)` without an axis argument). keep_attrs : bool, optional - If True, the variable's attributes (`attrs`) will be copied from - the original object to the new one. If False (default), the new + If True (default), the variable's attributes (`attrs`) will be copied from + the original object to the new one. If False, the new object will be returned without attributes. keepdims : bool, default: False If True, the dimensions which are reduced are left in the result @@ -1757,7 +1757,7 @@ def reduce( # type: ignore[override] removed. """ keep_attrs_ = ( - _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs + _get_keep_attrs(default=True) if keep_attrs is None else keep_attrs ) # Note that the call order for Variable.mean is @@ -2009,7 +2009,7 @@ def quantile( _quantile_func = duck_array_ops.quantile if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) scalar = utils.is_scalar(q) q = np.atleast_1d(np.asarray(q, dtype=np.float64)) @@ -2350,7 +2350,7 @@ def isnull(self, keep_attrs: bool | None = None): from xarray.computation.apply_ufunc import apply_ufunc if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) return apply_ufunc( duck_array_ops.isnull, @@ -2384,7 +2384,7 @@ def notnull(self, keep_attrs: bool | None = None): from xarray.computation.apply_ufunc import apply_ufunc if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) return apply_ufunc( duck_array_ops.notnull, @@ -2435,8 +2435,17 @@ def _binary_op(self, other, f, reflexive=False): other_data, self_data, dims = _broadcast_compat_data(other, self) else: self_data, other_data, dims = _broadcast_compat_data(self, other) - keep_attrs = _get_keep_attrs(default=False) - attrs = self._attrs if keep_attrs else None + keep_attrs = _get_keep_attrs(default=True) + if keep_attrs: + # Combine attributes from both operands, dropping conflicts + from xarray.structure.merge import merge_attrs + + # Access attrs property to normalize None to {} due to property side effect + self_attrs = self.attrs + other_attrs = getattr(other, "attrs", {}) + attrs = merge_attrs([self_attrs, other_attrs], "drop_conflicts") + else: + attrs = None with np.errstate(all="ignore"): new_data = ( f(self_data, other_data) if not reflexive else f(other_data, self_data) @@ -2526,7 +2535,7 @@ def _unravel_argminmax( } if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) + keep_attrs = _get_keep_attrs(default=True) if keep_attrs: for v in result.values(): v.attrs = self.attrs diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index 8faa839d874..d9223a394a6 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -100,7 +100,7 @@ def test_coarsen_keep_attrs(funcname, argument) -> None: attrs=global_attrs, ) - # attrs are now kept per default + # attrs are kept by default func = getattr(ds.coarsen(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == global_attrs @@ -199,7 +199,7 @@ def test_coarsen_da_keep_attrs(funcname, argument) -> None: name="name", ) - # attrs are now kept per default + # attrs are kept by default func = getattr(da.coarsen(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 569013b43dc..5254b0197d6 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -694,10 +694,7 @@ def test_broadcast_compat_data_2d() -> None: def test_keep_attrs() -> None: def add(a, b, keep_attrs): - if keep_attrs: - return apply_ufunc(operator.add, a, b, keep_attrs=keep_attrs) - else: - return apply_ufunc(operator.add, a, b) + return apply_ufunc(operator.add, a, b, keep_attrs=keep_attrs) a = xr.DataArray([0, 1], [("x", [0, 1])]) a.attrs["attr"] = "da" @@ -733,7 +730,7 @@ def add(a, b, keep_attrs): pytest.param( None, [{"a": 1}, {"a": 2}, {"a": 3}], - {}, + {"a": 1}, False, id="default", ), @@ -802,7 +799,7 @@ def test_keep_attrs_strategies_variable(strategy, attrs, expected, error) -> Non pytest.param( None, [{"a": 1}, {"a": 2}, {"a": 3}], - {}, + {"a": 1}, False, id="default", ), @@ -872,7 +869,7 @@ def test_keep_attrs_strategies_dataarray(strategy, attrs, expected, error) -> No pytest.param( None, [{"a": 1}, {"a": 2}, {"a": 3}], - {}, + {"a": 1}, False, id="default", ), @@ -967,7 +964,7 @@ def test_keep_attrs_strategies_dataarray_variables( pytest.param( None, [{"a": 1}, {"a": 2}, {"a": 3}], - {}, + {"a": 1}, False, id="default", ), @@ -1037,7 +1034,7 @@ def test_keep_attrs_strategies_dataset(strategy, attrs, expected, error) -> None pytest.param( None, [{"a": 1}, {"a": 2}, {"a": 3}], - {}, + {"a": 1}, False, id="default", ), diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 41604ef00ee..97896d67b56 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3054,16 +3054,21 @@ def test_quantile_interpolation_deprecated(self, method) -> None: da.quantile(q, method=method, interpolation=method) def test_reduce_keep_attrs(self) -> None: - # Test dropped attrs + # Test default behavior (keeps attrs for reduction operations) vm = self.va.mean() - assert len(vm.attrs) == 0 - assert vm.attrs == {} + assert len(vm.attrs) == len(self.attrs) + assert vm.attrs == self.attrs - # Test kept attrs + # Test explicitly keeping attrs vm = self.va.mean(keep_attrs=True) assert len(vm.attrs) == len(self.attrs) assert vm.attrs == self.attrs + # Test explicitly dropping attrs + vm = self.va.mean(keep_attrs=False) + assert len(vm.attrs) == 0 + assert vm.attrs == {} + def test_assign_attrs(self) -> None: expected = DataArray([], attrs=dict(a=1, b=2)) expected.attrs["a"] = 1 @@ -4950,20 +4955,25 @@ def test_min( result0 = ar.min(keep_attrs=True) assert_identical(result0, expected0) + # Default keeps attrs for reduction operations result1 = ar.min() expected1 = expected0.copy() - expected1.attrs = {} assert_identical(result1, expected1) result2 = ar.min(skipna=False) if nanindex is not None and ar.dtype.kind != "O": expected2 = ar.isel(x=nanindex, drop=True) - expected2.attrs = {} else: expected2 = expected1 assert_identical(result2, expected2) + # Test explicitly dropping attrs + result3 = ar.min(keep_attrs=False) + expected3 = expected0.copy() + expected3.attrs = {} + assert_identical(result3, expected3) + def test_max( self, x: np.ndarray, @@ -4982,20 +4992,25 @@ def test_max( result0 = ar.max(keep_attrs=True) assert_identical(result0, expected0) + # Default keeps attrs for reduction operations result1 = ar.max() expected1 = expected0.copy() - expected1.attrs = {} assert_identical(result1, expected1) result2 = ar.max(skipna=False) if nanindex is not None and ar.dtype.kind != "O": expected2 = ar.isel(x=nanindex, drop=True) - expected2.attrs = {} else: expected2 = expected1 assert_identical(result2, expected2) + # Test explicitly dropping attrs + result3 = ar.max(keep_attrs=False) + expected3 = expected0.copy() + expected3.attrs = {} + assert_identical(result3, expected3) + @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) @@ -5017,6 +5032,7 @@ def test_argmin( return expected0 = indarr[minindex] + expected0.attrs = self.attrs # argmin should preserve attrs from input result0 = ar.argmin() assert_identical(result0, expected0) @@ -5028,7 +5044,7 @@ def test_argmin( result2 = ar.argmin(skipna=False) if nanindex is not None and ar.dtype.kind != "O": expected2 = indarr.isel(x=nanindex, drop=True) - expected2.attrs = {} + expected2.attrs = self.attrs # Default keeps attrs for reduction operations else: expected2 = expected0 @@ -5055,6 +5071,7 @@ def test_argmax( return expected0 = indarr[maxindex] + expected0.attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmax() assert_identical(result0, expected0) @@ -5066,7 +5083,7 @@ def test_argmax( result2 = ar.argmax(skipna=False) if nanindex is not None and ar.dtype.kind != "O": expected2 = indarr.isel(x=nanindex, drop=True) - expected2.attrs = {} + expected2.attrs = self.attrs # Default keeps attrs for reduction operations else: expected2 = expected0 @@ -5124,6 +5141,7 @@ def test_idxmin( (coordarr1 * fill_value_0).isel(x=minindex, drop=True).astype("float") ) expected0.name = "x" + expected0.attrs = self.attrs # Default keeps attrs for reduction operations # Default fill value (NaN) result0 = ar0.idxmin() @@ -5143,7 +5161,7 @@ def test_idxmin( if nanindex is not None and ar0.dtype.kind != "O": expected3 = coordarr0.isel(x=nanindex, drop=True).astype("float") expected3.name = "x" - expected3.attrs = {} + expected3.attrs = self.attrs # Default keeps attrs for reduction operations else: expected3 = expected0.copy() @@ -5162,6 +5180,7 @@ def test_idxmin( expected5 = (coordarr1 * fill_value_5).isel(x=minindex, drop=True) expected5.name = "x" + expected5.attrs = self.attrs # Default keeps attrs for reduction operations result5 = ar0.idxmin(fill_value=-1.1) assert_identical(result5, expected5) @@ -5174,6 +5193,7 @@ def test_idxmin( expected6 = (coordarr1 * fill_value_6).isel(x=minindex, drop=True) expected6.name = "x" + expected6.attrs = self.attrs # Default keeps attrs for reduction operations result6 = ar0.idxmin(fill_value=-1) assert_identical(result6, expected6) @@ -5186,6 +5206,7 @@ def test_idxmin( expected7 = (coordarr1 * fill_value_7).isel(x=minindex, drop=True) expected7.name = "x" + expected7.attrs = self.attrs # Default keeps attrs for reduction operations result7 = ar0.idxmin(fill_value=-1j) assert_identical(result7, expected7) @@ -5239,6 +5260,7 @@ def test_idxmax( (coordarr1 * fill_value_0).isel(x=maxindex, drop=True).astype("float") ) expected0.name = "x" + expected0.attrs = self.attrs # Default keeps attrs for reduction operations # Default fill value (NaN) result0 = ar0.idxmax() @@ -5258,7 +5280,7 @@ def test_idxmax( if nanindex is not None and ar0.dtype.kind != "O": expected3 = coordarr0.isel(x=nanindex, drop=True).astype("float") expected3.name = "x" - expected3.attrs = {} + expected3.attrs = self.attrs # Default keeps attrs for reduction operations else: expected3 = expected0.copy() @@ -5277,6 +5299,7 @@ def test_idxmax( expected5 = (coordarr1 * fill_value_5).isel(x=maxindex, drop=True) expected5.name = "x" + expected5.attrs = self.attrs # Default keeps attrs for reduction operations result5 = ar0.idxmax(fill_value=-1.1) assert_identical(result5, expected5) @@ -5289,6 +5312,7 @@ def test_idxmax( expected6 = (coordarr1 * fill_value_6).isel(x=maxindex, drop=True) expected6.name = "x" + expected6.attrs = self.attrs # Default keeps attrs for reduction operations result6 = ar0.idxmax(fill_value=-1) assert_identical(result6, expected6) @@ -5301,6 +5325,7 @@ def test_idxmax( expected7 = (coordarr1 * fill_value_7).isel(x=maxindex, drop=True) expected7.name = "x" + expected7.attrs = self.attrs # Default keeps attrs for reduction operations result7 = ar0.idxmax(fill_value=-1j) assert_identical(result7, expected7) @@ -5326,6 +5351,8 @@ def test_argmin_dim( return expected0 = {"x": indarr[minindex]} + for da in expected0.values(): + da.attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmin(...) for key in expected0: assert_identical(result0[key], expected0[key]) @@ -5340,7 +5367,9 @@ def test_argmin_dim( result2 = ar.argmin(..., skipna=False) if nanindex is not None and ar.dtype.kind != "O": expected2 = {"x": indarr.isel(x=nanindex, drop=True)} - expected2["x"].attrs = {} + expected2[ + "x" + ].attrs = self.attrs # Default keeps attrs for reduction operations else: expected2 = expected0 @@ -5368,6 +5397,8 @@ def test_argmax_dim( return expected0 = {"x": indarr[maxindex]} + for da in expected0.values(): + da.attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmax(...) for key in expected0: assert_identical(result0[key], expected0[key]) @@ -5382,7 +5413,9 @@ def test_argmax_dim( result2 = ar.argmax(..., skipna=False) if nanindex is not None and ar.dtype.kind != "O": expected2 = {"x": indarr.isel(x=nanindex, drop=True)} - expected2["x"].attrs = {} + expected2[ + "x" + ].attrs = self.attrs # Default keeps attrs for reduction operations else: expected2 = expected0 @@ -5475,13 +5508,18 @@ def test_min( result0 = ar.min(dim="x", keep_attrs=True) assert_identical(result0, expected0) + # Default keeps attrs for reduction operations result1 = ar.min(dim="x") - expected1 = expected0 + assert_identical(result1, expected0) + + # Test explicitly dropping attrs + result1_no_attrs = ar.min(dim="x", keep_attrs=False) + expected1 = expected0.copy() expected1.attrs = {} - assert_identical(result1, expected1) + assert_identical(result1_no_attrs, expected1) result2 = ar.min(axis=1) - assert_identical(result2, expected1) + assert_identical(result2, expected0) # Default keeps attrs minindex = [ x if y is None or ar.dtype.kind == "O" else y @@ -5491,7 +5529,7 @@ def test_min( ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] expected2 = xr.concat(expected2list, dim="y") - expected2.attrs = {} + expected2.attrs = self.attrs # Default keeps attrs for reduction operations result3 = ar.min(dim="x", skipna=False) @@ -5520,13 +5558,18 @@ def test_max( result0 = ar.max(dim="x", keep_attrs=True) assert_identical(result0, expected0) + # Default keeps attrs for reduction operations result1 = ar.max(dim="x") + assert_identical(result1, expected0) + + # Test explicitly dropping attrs + result1_no_attrs = ar.max(dim="x", keep_attrs=False) expected1 = expected0.copy() expected1.attrs = {} - assert_identical(result1, expected1) + assert_identical(result1_no_attrs, expected1) result2 = ar.max(axis=1) - assert_identical(result2, expected1) + assert_identical(result2, expected0) # Default keeps attrs maxindex = [ x if y is None or ar.dtype.kind == "O" else y @@ -5536,7 +5579,7 @@ def test_max( ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] expected2 = xr.concat(expected2list, dim="y") - expected2.attrs = {} + expected2.attrs = self.attrs # Default keeps attrs for reduction operations result3 = ar.max(dim="x", skipna=False) @@ -5568,6 +5611,7 @@ def test_argmin( for yi, indi in enumerate(minindex) ] expected0 = xr.concat(expected0list, dim="y") + expected0.attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmin(dim="x") assert_identical(result0, expected0) @@ -5589,7 +5633,7 @@ def test_argmin( for yi, indi in enumerate(minindex) ] expected2 = xr.concat(expected2list, dim="y") - expected2.attrs = {} + expected2.attrs = self.attrs # Default keeps attrs for reduction operations result3 = ar.argmin(dim="x", skipna=False) @@ -5621,6 +5665,7 @@ def test_argmax( for yi, indi in enumerate(maxindex) ] expected0 = xr.concat(expected0list, dim="y") + expected0.attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmax(dim="x") assert_identical(result0, expected0) @@ -5642,7 +5687,7 @@ def test_argmax( for yi, indi in enumerate(maxindex) ] expected2 = xr.concat(expected2list, dim="y") - expected2.attrs = {} + expected2.attrs = self.attrs # Default keeps attrs for reduction operations result3 = ar.argmax(dim="x", skipna=False) @@ -5710,6 +5755,7 @@ def test_idxmin( ] expected0 = xr.concat(expected0list, dim="y") expected0.name = "x" + expected0.attrs = self.attrs # Default keeps attrs for reduction operations # Default fill value (NaN) with raise_if_dask_computes(max_computes=max_computes): @@ -5739,7 +5785,7 @@ def test_idxmin( ] expected3 = xr.concat(expected3list, dim="y") expected3.name = "x" - expected3.attrs = {} + expected3.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result3 = ar0.idxmin(dim="x", skipna=False) @@ -5758,6 +5804,7 @@ def test_idxmin( ] expected5 = xr.concat(expected5list, dim="y") expected5.name = "x" + expected5.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result5 = ar0.idxmin(dim="x", fill_value=-1.1) @@ -5771,6 +5818,7 @@ def test_idxmin( ] expected6 = xr.concat(expected6list, dim="y") expected6.name = "x" + expected6.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result6 = ar0.idxmin(dim="x", fill_value=-1) @@ -5784,6 +5832,7 @@ def test_idxmin( ] expected7 = xr.concat(expected7list, dim="y") expected7.name = "x" + expected7.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result7 = ar0.idxmin(dim="x", fill_value=-5j) @@ -5852,6 +5901,7 @@ def test_idxmax( ] expected0 = xr.concat(expected0list, dim="y") expected0.name = "x" + expected0.attrs = self.attrs # Default keeps attrs for reduction operations # Default fill value (NaN) with raise_if_dask_computes(max_computes=max_computes): @@ -5881,7 +5931,7 @@ def test_idxmax( ] expected3 = xr.concat(expected3list, dim="y") expected3.name = "x" - expected3.attrs = {} + expected3.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result3 = ar0.idxmax(dim="x", skipna=False) @@ -5900,6 +5950,7 @@ def test_idxmax( ] expected5 = xr.concat(expected5list, dim="y") expected5.name = "x" + expected5.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result5 = ar0.idxmax(dim="x", fill_value=-1.1) @@ -5913,6 +5964,7 @@ def test_idxmax( ] expected6 = xr.concat(expected6list, dim="y") expected6.name = "x" + expected6.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result6 = ar0.idxmax(dim="x", fill_value=-1) @@ -5926,6 +5978,7 @@ def test_idxmax( ] expected7 = xr.concat(expected7list, dim="y") expected7.name = "x" + expected7.attrs = self.attrs # Default keeps attrs for reduction operations with raise_if_dask_computes(max_computes=max_computes): result7 = ar0.idxmax(dim="x", fill_value=-5j) @@ -5960,6 +6013,9 @@ def test_argmin_dim( for yi, indi in enumerate(minindex) ] expected0 = {"x": xr.concat(expected0list, dim="y")} + expected0[ + "x" + ].attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmin(dim=["x"]) for key in expected0: @@ -5980,7 +6036,9 @@ def test_argmin_dim( for yi, indi in enumerate(minindex) ] expected2 = {"x": xr.concat(expected2list, dim="y")} - expected2["x"].attrs = {} + expected2[ + "x" + ].attrs = self.attrs # Default keeps attrs for reduction operations result2 = ar.argmin(dim=["x"], skipna=False) @@ -5991,8 +6049,8 @@ def test_argmin_dim( # TODO: remove cast once argmin typing is overloaded min_xind = cast(DataArray, ar.isel(expected0).argmin()) expected3 = { - "y": DataArray(min_xind), - "x": DataArray(minindex[min_xind.item()]), + "y": DataArray(min_xind, attrs=self.attrs), + "x": DataArray(minindex[min_xind.item()], attrs=self.attrs), } for key in expected3: @@ -6027,6 +6085,9 @@ def test_argmax_dim( for yi, indi in enumerate(maxindex) ] expected0 = {"x": xr.concat(expected0list, dim="y")} + expected0[ + "x" + ].attrs = self.attrs # Default keeps attrs for reduction operations result0 = ar.argmax(dim=["x"]) for key in expected0: @@ -6047,7 +6108,9 @@ def test_argmax_dim( for yi, indi in enumerate(maxindex) ] expected2 = {"x": xr.concat(expected2list, dim="y")} - expected2["x"].attrs = {} + expected2[ + "x" + ].attrs = self.attrs # Default keeps attrs for reduction operations result2 = ar.argmax(dim=["x"], skipna=False) @@ -6058,8 +6121,8 @@ def test_argmax_dim( # TODO: remove cast once argmax typing is overloaded max_xind = cast(DataArray, ar.isel(expected0).argmax()) expected3 = { - "y": DataArray(max_xind), - "x": DataArray(maxindex[max_xind.item()]), + "y": DataArray(max_xind, attrs=self.attrs), + "x": DataArray(maxindex[max_xind.item()], attrs=self.attrs), } for key in expected3: @@ -6289,7 +6352,7 @@ def test_argmin_dim( result0 = ar.argmin(dim=["x"]) assert isinstance(result0, dict) expected0 = { - key: xr.DataArray(value, dims=("y", "z")) + key: xr.DataArray(value, dims=("y", "z"), attrs=self.attrs) for key, value in minindices_x.items() } for key in expected0: @@ -6298,7 +6361,7 @@ def test_argmin_dim( result1 = ar.argmin(dim=["y"]) assert isinstance(result1, dict) expected1 = { - key: xr.DataArray(value, dims=("x", "z")) + key: xr.DataArray(value, dims=("x", "z"), attrs=self.attrs) for key, value in minindices_y.items() } for key in expected1: @@ -6307,7 +6370,7 @@ def test_argmin_dim( result2 = ar.argmin(dim=["z"]) assert isinstance(result2, dict) expected2 = { - key: xr.DataArray(value, dims=("x", "y")) + key: xr.DataArray(value, dims=("x", "y"), attrs=self.attrs) for key, value in minindices_z.items() } for key in expected2: @@ -6316,7 +6379,8 @@ def test_argmin_dim( result3 = ar.argmin(dim=("x", "y")) assert isinstance(result3, dict) expected3 = { - key: xr.DataArray(value, dims=("z")) for key, value in minindices_xy.items() + key: xr.DataArray(value, dims=("z"), attrs=self.attrs) + for key, value in minindices_xy.items() } for key in expected3: assert_identical(result3[key].drop_vars("z"), expected3[key]) @@ -6324,7 +6388,8 @@ def test_argmin_dim( result4 = ar.argmin(dim=("x", "z")) assert isinstance(result4, dict) expected4 = { - key: xr.DataArray(value, dims=("y")) for key, value in minindices_xz.items() + key: xr.DataArray(value, dims=("y"), attrs=self.attrs) + for key, value in minindices_xz.items() } for key in expected4: assert_identical(result4[key].drop_vars("y"), expected4[key]) @@ -6332,14 +6397,18 @@ def test_argmin_dim( result5 = ar.argmin(dim=("y", "z")) assert isinstance(result5, dict) expected5 = { - key: xr.DataArray(value, dims=("x")) for key, value in minindices_yz.items() + key: xr.DataArray(value, dims=("x"), attrs=self.attrs) + for key, value in minindices_yz.items() } for key in expected5: assert_identical(result5[key].drop_vars("x"), expected5[key]) result6 = ar.argmin(...) assert isinstance(result6, dict) - expected6 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()} + expected6 = { + key: xr.DataArray(value, attrs=self.attrs) + for key, value in minindices_xyz.items() + } for key in expected6: assert_identical(result6[key], expected6[key]) @@ -6352,7 +6421,7 @@ def test_argmin_dim( for key in minindices_x } expected7 = { - key: xr.DataArray(value, dims=("y", "z")) + key: xr.DataArray(value, dims=("y", "z"), attrs=self.attrs) for key, value in minindices_x.items() } @@ -6370,7 +6439,7 @@ def test_argmin_dim( for key in minindices_y } expected8 = { - key: xr.DataArray(value, dims=("x", "z")) + key: xr.DataArray(value, dims=("x", "z"), attrs=self.attrs) for key, value in minindices_y.items() } @@ -6388,7 +6457,7 @@ def test_argmin_dim( for key in minindices_z } expected9 = { - key: xr.DataArray(value, dims=("x", "y")) + key: xr.DataArray(value, dims=("x", "y"), attrs=self.attrs) for key, value in minindices_z.items() } @@ -6406,7 +6475,8 @@ def test_argmin_dim( for key in minindices_xy } expected10 = { - key: xr.DataArray(value, dims="z") for key, value in minindices_xy.items() + key: xr.DataArray(value, dims="z", attrs=self.attrs) + for key, value in minindices_xy.items() } result10 = ar.argmin(dim=("x", "y"), skipna=False) @@ -6423,7 +6493,8 @@ def test_argmin_dim( for key in minindices_xz } expected11 = { - key: xr.DataArray(value, dims="y") for key, value in minindices_xz.items() + key: xr.DataArray(value, dims="y", attrs=self.attrs) + for key, value in minindices_xz.items() } result11 = ar.argmin(dim=("x", "z"), skipna=False) @@ -6440,7 +6511,8 @@ def test_argmin_dim( for key in minindices_yz } expected12 = { - key: xr.DataArray(value, dims="x") for key, value in minindices_yz.items() + key: xr.DataArray(value, dims="x", attrs=self.attrs) + for key, value in minindices_yz.items() } result12 = ar.argmin(dim=("y", "z"), skipna=False) @@ -6456,7 +6528,10 @@ def test_argmin_dim( ) for key in minindices_xyz } - expected13 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()} + expected13 = { + key: xr.DataArray(value, attrs=self.attrs) + for key, value in minindices_xyz.items() + } result13 = ar.argmin(..., skipna=False) assert isinstance(result13, dict) @@ -6516,7 +6591,7 @@ def test_argmax_dim( result0 = ar.argmax(dim=["x"]) assert isinstance(result0, dict) expected0 = { - key: xr.DataArray(value, dims=("y", "z")) + key: xr.DataArray(value, dims=("y", "z"), attrs=self.attrs) for key, value in maxindices_x.items() } for key in expected0: @@ -6525,7 +6600,7 @@ def test_argmax_dim( result1 = ar.argmax(dim=["y"]) assert isinstance(result1, dict) expected1 = { - key: xr.DataArray(value, dims=("x", "z")) + key: xr.DataArray(value, dims=("x", "z"), attrs=self.attrs) for key, value in maxindices_y.items() } for key in expected1: @@ -6534,7 +6609,7 @@ def test_argmax_dim( result2 = ar.argmax(dim=["z"]) assert isinstance(result2, dict) expected2 = { - key: xr.DataArray(value, dims=("x", "y")) + key: xr.DataArray(value, dims=("x", "y"), attrs=self.attrs) for key, value in maxindices_z.items() } for key in expected2: @@ -6543,7 +6618,8 @@ def test_argmax_dim( result3 = ar.argmax(dim=("x", "y")) assert isinstance(result3, dict) expected3 = { - key: xr.DataArray(value, dims=("z")) for key, value in maxindices_xy.items() + key: xr.DataArray(value, dims=("z"), attrs=self.attrs) + for key, value in maxindices_xy.items() } for key in expected3: assert_identical(result3[key].drop_vars("z"), expected3[key]) @@ -6551,7 +6627,8 @@ def test_argmax_dim( result4 = ar.argmax(dim=("x", "z")) assert isinstance(result4, dict) expected4 = { - key: xr.DataArray(value, dims=("y")) for key, value in maxindices_xz.items() + key: xr.DataArray(value, dims=("y"), attrs=self.attrs) + for key, value in maxindices_xz.items() } for key in expected4: assert_identical(result4[key].drop_vars("y"), expected4[key]) @@ -6559,14 +6636,18 @@ def test_argmax_dim( result5 = ar.argmax(dim=("y", "z")) assert isinstance(result5, dict) expected5 = { - key: xr.DataArray(value, dims=("x")) for key, value in maxindices_yz.items() + key: xr.DataArray(value, dims=("x"), attrs=self.attrs) + for key, value in maxindices_yz.items() } for key in expected5: assert_identical(result5[key].drop_vars("x"), expected5[key]) result6 = ar.argmax(...) assert isinstance(result6, dict) - expected6 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()} + expected6 = { + key: xr.DataArray(value, attrs=self.attrs) + for key, value in maxindices_xyz.items() + } for key in expected6: assert_identical(result6[key], expected6[key]) @@ -6579,7 +6660,7 @@ def test_argmax_dim( for key in maxindices_x } expected7 = { - key: xr.DataArray(value, dims=("y", "z")) + key: xr.DataArray(value, dims=("y", "z"), attrs=self.attrs) for key, value in maxindices_x.items() } @@ -6597,7 +6678,7 @@ def test_argmax_dim( for key in maxindices_y } expected8 = { - key: xr.DataArray(value, dims=("x", "z")) + key: xr.DataArray(value, dims=("x", "z"), attrs=self.attrs) for key, value in maxindices_y.items() } @@ -6615,7 +6696,7 @@ def test_argmax_dim( for key in maxindices_z } expected9 = { - key: xr.DataArray(value, dims=("x", "y")) + key: xr.DataArray(value, dims=("x", "y"), attrs=self.attrs) for key, value in maxindices_z.items() } @@ -6633,7 +6714,8 @@ def test_argmax_dim( for key in maxindices_xy } expected10 = { - key: xr.DataArray(value, dims="z") for key, value in maxindices_xy.items() + key: xr.DataArray(value, dims="z", attrs=self.attrs) + for key, value in maxindices_xy.items() } result10 = ar.argmax(dim=("x", "y"), skipna=False) @@ -6650,7 +6732,8 @@ def test_argmax_dim( for key in maxindices_xz } expected11 = { - key: xr.DataArray(value, dims="y") for key, value in maxindices_xz.items() + key: xr.DataArray(value, dims="y", attrs=self.attrs) + for key, value in maxindices_xz.items() } result11 = ar.argmax(dim=("x", "z"), skipna=False) @@ -6667,7 +6750,8 @@ def test_argmax_dim( for key in maxindices_yz } expected12 = { - key: xr.DataArray(value, dims="x") for key, value in maxindices_yz.items() + key: xr.DataArray(value, dims="x", attrs=self.attrs) + for key, value in maxindices_yz.items() } result12 = ar.argmax(dim=("y", "z"), skipna=False) @@ -6683,7 +6767,10 @@ def test_argmax_dim( ) for key in maxindices_xyz } - expected13 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()} + expected13 = { + key: xr.DataArray(value, attrs=self.attrs) + for key, value in maxindices_xyz.items() + } result13 = ar.argmax(..., skipna=False) assert isinstance(result13, dict) @@ -6735,6 +6822,33 @@ def test_raise_no_warning_for_nan_in_binary_ops() -> None: _ = xr.DataArray([1, 2, np.nan]) > 0 +def test_binary_ops_attrs_drop_conflicts() -> None: + # Test that binary operations combine attrs with drop_conflicts behavior + attrs_a = {"units": "meters", "long_name": "distance", "source": "sensor_a"} + attrs_b = {"units": "feet", "resolution": "high", "source": "sensor_b"} + da1 = xr.DataArray([1, 2, 3], attrs=attrs_a) + da2 = xr.DataArray([4, 5, 6], attrs=attrs_b) + + # With keep_attrs=True (default), should combine attrs dropping conflicts + result = da1 + da2 + # "units" and "source" conflict, so they're dropped + # "long_name" only in da1, "resolution" only in da2, so they're kept + assert result.attrs == {"long_name": "distance", "resolution": "high"} + + # Test with identical values for some attrs + attrs_c = {"units": "meters", "type": "data", "source": "sensor_c"} + da3 = xr.DataArray([7, 8, 9], attrs=attrs_c) + result2 = da1 + da3 + # "units" has same value, so kept; "source" conflicts, so dropped + # "long_name" from da1, "type" from da3 + assert result2.attrs == {"units": "meters", "long_name": "distance", "type": "data"} + + # With keep_attrs=False, attrs should be empty + with xr.set_options(keep_attrs=False): + result3 = da1 + da2 + assert result3.attrs == {} + + @pytest.mark.filterwarnings("error") def test_no_warning_for_all_nan() -> None: _ = xr.DataArray([np.nan, np.nan]).mean() diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4af250a0fde..a8e2eb56633 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6023,18 +6023,24 @@ def test_reduce_keep_attrs(self) -> None: attrs = dict(_attrs) data.attrs = attrs - # Test dropped attrs + # Test default behavior (keeps attrs for reduction operations) ds = data.mean() - assert ds.attrs == {} - for v in ds.data_vars.values(): - assert v.attrs == {} + assert ds.attrs == attrs + for k, v in ds.data_vars.items(): + assert v.attrs == data[k].attrs - # Test kept attrs + # Test explicitly keeping attrs ds = data.mean(keep_attrs=True) assert ds.attrs == attrs for k, v in ds.data_vars.items(): assert v.attrs == data[k].attrs + # Test explicitly dropping attrs + ds = data.mean(keep_attrs=False) + assert ds.attrs == {} + for v in ds.data_vars.values(): + assert v.attrs == {} + @pytest.mark.filterwarnings( "ignore:Once the behaviour of DataArray:DeprecationWarning" ) @@ -6217,6 +6223,7 @@ def test_map(self) -> None: data = create_test_data() data.attrs["foo"] = "bar" + # data.map keeps all attrs by default assert_identical(data.map(np.mean), data.mean()) expected = data.mean(keep_attrs=True) @@ -6273,6 +6280,7 @@ def test_apply_pending_deprecated_map(self) -> None: data.attrs["foo"] = "bar" with pytest.warns(PendingDeprecationWarning): + # data.apply keeps all attrs by default assert_identical(data.apply(np.mean), data.mean()) def make_example_math_dataset(self): @@ -6752,7 +6760,9 @@ def test_binary_op_join_setting(self) -> None: ["keep_attrs", "expected"], ( pytest.param(False, {}, id="False"), - pytest.param(True, {"foo": "a", "bar": "b"}, id="True"), + pytest.param( + True, {"foo": "a", "bar": "b", "baz": "c"}, id="True" + ), # drop_conflicts combines non-conflicting attrs ), ) def test_binary_ops_keep_attrs(self, keep_attrs, expected) -> None: @@ -6763,6 +6773,36 @@ def test_binary_ops_keep_attrs(self, keep_attrs, expected) -> None: assert ds_result.attrs == expected + def test_binary_ops_attrs_drop_conflicts(self) -> None: + # Test that binary operations combine attrs with drop_conflicts behavior + attrs1 = {"units": "meters", "long_name": "distance", "source": "sensor_a"} + attrs2 = {"units": "feet", "resolution": "high", "source": "sensor_b"} + ds1 = xr.Dataset({"a": 1}, attrs=attrs1) + ds2 = xr.Dataset({"a": 2}, attrs=attrs2) + + # With keep_attrs=True (default), should combine attrs dropping conflicts + result = ds1 + ds2 + # "units" and "source" conflict, so they're dropped + # "long_name" only in ds1, "resolution" only in ds2, so they're kept + assert result.attrs == {"long_name": "distance", "resolution": "high"} + + # Test with identical values for some attrs + attrs3 = {"units": "meters", "type": "data", "source": "sensor_c"} + ds3 = xr.Dataset({"a": 3}, attrs=attrs3) + result2 = ds1 + ds3 + # "units" has same value, so kept; "source" conflicts, so dropped + # "long_name" from ds1, "type" from ds3 + assert result2.attrs == { + "units": "meters", + "long_name": "distance", + "type": "data", + } + + # With keep_attrs=False, attrs should be empty + with xr.set_options(keep_attrs=False): + result3 = ds1 + ds2 + assert result3.attrs == {} + def test_full_like(self) -> None: # For more thorough tests, see test_variable.py # Note: testing data_vars with mismatched dtypes diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index b54fd3cb959..4fb4f9aeea7 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -1029,14 +1029,14 @@ def func_keep(ds): expected = dt.mean(keep_attrs=True) xr.testing.assert_identical(result, expected) - # per default DatasetView.map does not keep attrs + # DatasetView.map keeps attrs by default def func(ds): - # x.mean() removes the attrs of the data_vars + # ds.map and x.mean() both keep attrs by default return ds.map(lambda x: x.mean()) result = xr.map_over_datasets(func, dt) expected = dt.mean() - xr.testing.assert_identical(result, expected.mean()) + xr.testing.assert_identical(result, expected) class TestAccess: diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 336a5e6c91c..1d7b118a2c5 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -955,8 +955,11 @@ def reorder_dims(x): actual = grouped + ds.coords["dim1"] assert_identical(expected, reorder_dims(actual)) + # Order matters for attrs - coord + grouped will not have attrs + # since coord has no attrs and binary ops keep attrs from first operand + expected_reversed = reorder_dims(ds.coords["dim1"] + ds) actual = ds.coords["dim1"] + grouped - assert_identical(expected, reorder_dims(actual)) + assert_identical(expected_reversed, reorder_dims(actual)) ds2 = 2 * ds expected = reorder_dims(ds + ds2) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 8ad1cbe11be..1be87ebcdae 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -97,12 +97,14 @@ def test_dataset_attr_retention(self) -> None: ds = create_test_dataset_attrs() original_attrs = ds.attrs - # Test default behaviour + # Test default behaviour (keeps attrs for reduction operations) result = ds.mean() - assert result.attrs == {} + assert result.attrs == original_attrs with xarray.set_options(keep_attrs="default"): result = ds.mean() - assert result.attrs == {} + assert ( + result.attrs == original_attrs + ) # "default" uses operation's default which is True for reduce with xarray.set_options(keep_attrs=True): result = ds.mean() @@ -117,12 +119,14 @@ def test_dataarray_attr_retention(self) -> None: da = create_test_dataarray_attrs() original_attrs = da.attrs - # Test default behaviour + # Test default behaviour (keeps attrs for reduction operations) result = da.mean() - assert result.attrs == {} + assert result.attrs == original_attrs with xarray.set_options(keep_attrs="default"): result = da.mean() - assert result.attrs == {} + assert ( + result.attrs == original_attrs + ) # "default" uses operation's default which is True for reduce with xarray.set_options(keep_attrs=True): result = da.mean() diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 4c72c336b88..2cc7e319037 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -1481,7 +1481,9 @@ def test_coord_strings(self) -> None: def test_non_linked_coords(self) -> None: # plot with coordinate names that are not dimensions - self.darray.coords["newy"] = self.darray.y + 150 + newy = self.darray.y + 150 + newy.attrs = {} # Clear attrs since binary ops keep them by default + self.darray.coords["newy"] = newy # Normal case, without transpose self.plotfunc(self.darray, x="x", y="newy") ax = plt.gca() @@ -1496,7 +1498,9 @@ def test_non_linked_coords_transpose(self) -> None: # and with transposed y and x axes # This used to raise an error with pcolormesh and contour # https://github.com/pydata/xarray/issues/788 - self.darray.coords["newy"] = self.darray.y + 150 + newy = self.darray.y + 150 + newy.attrs = {} # Clear attrs since binary ops keep them by default + self.darray.coords["newy"] = newy self.plotfunc(self.darray, x="newy", y="x") ax = plt.gca() assert "newy" == ax.get_xlabel() diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index d93216a3ccc..fed62c87502 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -391,7 +391,7 @@ def test_rolling_keep_attrs(self, funcname, argument) -> None: data, dims=("coord"), coords={"coord": coords}, attrs=attrs_da, name="name" ) - # attrs are now kept per default + # attrs are kept by default func = getattr(da.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == attrs_da @@ -550,7 +550,7 @@ def test_rolling_keep_attrs(self, funcname, argument) -> None: ds.da.attrs = da_attrs ds.da_not_rolled.attrs = da_not_rolled_attrs - # attrs are now kept per default + # attrs are kept by default func = getattr(ds.rolling(dim={"coord": 5}), funcname) result = func(*argument) assert result.attrs == global_attrs diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 8b3d43fb3d6..1ee07cca77a 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -372,7 +372,10 @@ def test_1d_math(self, dtype: np.typing.DTypeLike | None) -> None: # binary ops with all variables assert_array_equal(v + v, 2 * v) w = self.cls(["x"], y, {"foo": "bar"}) - assert_identical(v + w, self.cls(["x"], x + y).to_base_variable()) + # With drop_conflicts, v (no attrs) + w (has attrs) should keep w's attrs + # Note: IndexVariable ops return Variable, not IndexVariable + expected = self.cls(["x"], x + y, {"foo": "bar"}).to_base_variable() + assert_identical(v + w, expected) assert_array_equal((v * w).values, x * y) # something complicated @@ -1850,15 +1853,24 @@ def test_inplace_math_error(self): def test_reduce(self): v = Variable(["x", "y"], self.d, {"ignored": "attributes"}) - assert_identical(v.reduce(np.std, "x"), Variable(["y"], self.d.std(axis=0))) + # Reduce keeps attrs by default + expected = Variable(["y"], self.d.std(axis=0), {"ignored": "attributes"}) + assert_identical(v.reduce(np.std, "x"), expected) assert_identical(v.reduce(np.std, axis=0), v.reduce(np.std, dim="x")) assert_identical( - v.reduce(np.std, ["y", "x"]), Variable([], self.d.std(axis=(0, 1))) + v.reduce(np.std, ["y", "x"]), + Variable([], self.d.std(axis=(0, 1)), {"ignored": "attributes"}), + ) + assert_identical( + v.reduce(np.std), Variable([], self.d.std(), {"ignored": "attributes"}) + ) + # Chained reductions both keep attrs + expected_chained = Variable( + [], self.d.mean(axis=0).std(), {"ignored": "attributes"} ) - assert_identical(v.reduce(np.std), Variable([], self.d.std())) assert_identical( v.reduce(np.mean, "x").reduce(np.std, "y"), - Variable([], self.d.mean(axis=0).std()), + expected_chained, ) assert_allclose(v.mean("x"), v.reduce(np.mean, "x")) @@ -2105,27 +2117,62 @@ def test_reduce_keep_attrs(self): v = Variable(["x", "y"], self.d, _attrs) - # Test dropped attrs + # Test default behavior (keeps attrs for reduction operations) vm = v.mean() - assert len(vm.attrs) == 0 - assert vm.attrs == {} + assert len(vm.attrs) == len(_attrs) + assert vm.attrs == _attrs - # Test kept attrs + # Test explicitly keeping attrs vm = v.mean(keep_attrs=True) assert len(vm.attrs) == len(_attrs) assert vm.attrs == _attrs + # Test explicitly dropping attrs + vm = v.mean(keep_attrs=False) + assert len(vm.attrs) == 0 + assert vm.attrs == {} + def test_binary_ops_keep_attrs(self): _attrs = {"units": "test", "long_name": "testing"} a = Variable(["x", "y"], np.random.randn(3, 3), _attrs) b = Variable(["x", "y"], np.random.randn(3, 3), _attrs) - # Test dropped attrs + # Test kept attrs (now default) d = a - b # just one operation - assert d.attrs == {} - # Test kept attrs - with set_options(keep_attrs=True): - d = a - b assert d.attrs == _attrs + # Test dropped attrs + with set_options(keep_attrs=False): + d = a - b + assert d.attrs == {} + + def test_binary_ops_attrs_drop_conflicts(self): + # Test that binary operations combine attrs with drop_conflicts behavior + attrs_a = {"units": "meters", "long_name": "distance", "source": "sensor_a"} + attrs_b = {"units": "feet", "resolution": "high", "source": "sensor_b"} + a = Variable(["x"], [1, 2, 3], attrs_a) + b = Variable(["x"], [4, 5, 6], attrs_b) + + # With keep_attrs=True (default), should combine attrs dropping conflicts + result = a + b + # "units" and "source" conflict, so they're dropped + # "long_name" only in a, "resolution" only in b, so they're kept + assert result.attrs == {"long_name": "distance", "resolution": "high"} + + # Test with identical values for some attrs + attrs_c = {"units": "meters", "type": "data", "source": "sensor_c"} + c = Variable(["x"], [7, 8, 9], attrs_c) + result2 = a + c + # "units" has same value, so kept; "source" conflicts, so dropped + # "long_name" from a, "type" from c + assert result2.attrs == { + "units": "meters", + "long_name": "distance", + "type": "data", + } + + # With keep_attrs=False, attrs should be empty + with set_options(keep_attrs=False): + result3 = a + b + assert result3.attrs == {} def test_count(self): expected = Variable([], 3) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 5d27794cc8d..33b6b6fcab8 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -742,12 +742,13 @@ def test_weighted_operations_keep_attr(operation, as_dataset, keep_attrs): result = getattr(data.weighted(weights), operation)(**kwargs) + # When keep_attrs is None, it defaults to True + expected_keep = keep_attrs if keep_attrs is not None else True + if operation == "sum_of_weights": - assert result.attrs == (weights.attrs if keep_attrs else {}) - assert result.attrs == (weights.attrs if keep_attrs else {}) + assert result.attrs == (weights.attrs if expected_keep else {}) else: - assert result.attrs == (weights.attrs if keep_attrs else {}) - assert result.attrs == (data.attrs if keep_attrs else {}) + assert result.attrs == (data.attrs if expected_keep else {}) @pytest.mark.parametrize(