diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7d2b466dd48..94bdea688e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,9 @@ New Features - ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and :py:meth:`DataTree.to_zarr`. By `Stephan Hoyer `_. +- ``.sel`` operations now support the ``method`` and ``tolerance`` keyword arguments, + for the case of indexing with a slice. + By `Tom Nicholas `_. - ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr By `Ian Hunt-Isaak `_. diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 72d8db4c576..71bf2c0de05 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -560,23 +560,62 @@ def _sanitize_slice_element(x): def _query_slice(index, label, coord_name="", method=None, tolerance=None): + slice_label_start = _sanitize_slice_element(label.start) + slice_label_stop = _sanitize_slice_element(label.stop) + slice_index_step = _sanitize_slice_element(label.step) + if method is not None or tolerance is not None: - raise NotImplementedError( - "cannot use ``method`` argument if any indexers are slice objects" - ) - indexer = index.slice_indexer( - _sanitize_slice_element(label.start), - _sanitize_slice_element(label.stop), - _sanitize_slice_element(label.step), - ) - if not isinstance(indexer, slice): - # unlike pandas, in xarray we never want to silently convert a - # slice indexer into an array indexer - raise KeyError( - "cannot represent labeled-based slice indexer for coordinate " - f"{coord_name!r} with a slice over integer positions; the index is " - "unsorted or non-unique" + # `pandas.Index.slice_indexer` doesn't support method or tolerance (see https://github.com/pydata/xarray/issues/10710) + + if index.has_duplicates: + # `pandas.Index.get_indexer` disallows this, see https://github.com/pydata/xarray/pull/10711#discussion_r2331297608 + raise NotImplementedError( + "cannot use ``method`` argument with a slice object as an indexer and an index with non-unique values" + ) + + if method is None and tolerance is not None: + # copies default behaviour of slicing with no tolerance, which is to be exclusive at both ends + slice_index_start = index.get_indexer( + [slice_label_start], method="backfill", tolerance=tolerance + ) + slice_index_stop = index.get_indexer( + [slice_label_stop], method="pad", tolerance=tolerance + ) + else: + # minor optimization to only issue a single `.get_indexer` call to get both start and end + slice_index_start, slice_index_stop = index.get_indexer( + [slice_label_start, slice_label_stop], + method=method, + tolerance=tolerance, + ) + + if -1 in [slice_index_start, slice_index_stop]: + # how pandas indicates the "no match" case - we return empty slice + indexer = slice(0, 0) + else: + # +1 needed to emulate behaviour of xarray sel with slice without method kwarg, which is inclusive of point at stop label + # assumes no duplicates, but we have forbidden that case above + indexer = slice( + slice_index_start.item(), + slice_index_stop.item() + 1, + slice_index_step, + ) + else: + indexer = index.slice_indexer( + slice_label_start, + slice_label_stop, + slice_index_step, ) + + if not isinstance(indexer, slice): + # unlike pandas, in xarray we never want to silently convert a + # slice indexer into an array indexer + raise KeyError( + "cannot represent labeled-based slice indexer for coordinate " + f"{coord_name!r} with a slice over integer positions; the index is " + "unsorted or non-unique" + ) + return indexer diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 2cafb1f2fc1..1cff0678530 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2164,8 +2164,13 @@ def test_sel_method(self) -> None: actual = data.sel(dim2=[1.45], method="backfill") assert_identical(expected, actual) - with pytest.raises(NotImplementedError, match=r"slice objects"): - data.sel(dim2=slice(1, 3), method="ffill") + expected = data.isel(dim2=slice(2, 7)) + actual = data.sel(dim2=slice(1, 3), method="ffill") + assert_identical(expected, actual) + + expected = data.isel(dim2=slice(2, 7, 2)) + actual = data.sel(dim2=slice(1, 3, 2), method="ffill") + assert_identical(expected, actual) with pytest.raises(TypeError, match=r"``method``"): # this should not pass silently @@ -2175,6 +2180,110 @@ def test_sel_method(self) -> None: with pytest.raises(ValueError, match=r"cannot supply"): data.sel(dim1=0, method="nearest") + def test_sel_method_with_slice(self) -> None: + # regression test for https://github.com/pydata/xarray/issues/10710 + + data_int_coords = xr.Dataset(coords={"lat": ("lat", [20, 21, 22, 23])}) + expected = xr.Dataset(coords={"lat": ("lat", [21, 22])}) + actual = data_int_coords.sel(lat=slice(21, 22), method="nearest") + assert_identical(expected, actual) + + # check non-zero step + expected = xr.Dataset(coords={"lat": ("lat", [21])}) + actual = data_int_coords.sel(lat=slice(21, 22, 2), method="nearest") + assert_identical(expected, actual) + + # check consistency with not passing method kwarg, for case of ints, where method kwarg should be irrelevant + expected = data_int_coords.sel(lat=slice(21, 22)) + actual = data_int_coords.sel(lat=slice(21, 22), method="nearest") + assert_identical(expected, actual) + + data_float_coords = xr.Dataset( + coords={"lat": ("lat", [20.1, 21.1, 22.1, 23.1])} + ) + expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])}) + actual = data_float_coords.sel(lat=slice(21, 22), method="nearest") + assert_identical(expected, actual) + + # "no match" case - should return zero-size slice + expected = xr.Dataset(coords={"lat": ("lat", [])}) + actual = data_float_coords.sel( + lat=slice(21.5, 21.6), method="nearest", tolerance=1e-3 + ) + assert_identical(expected, actual) + + # test supposed default behaviour + expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])}) + actual = data_float_coords.sel(lat=slice(21.0, 22.2)) + assert_identical(expected, actual) + + # tolerance specified but method not specified + expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])}) + actual = data_float_coords.sel( + lat=slice(21.0, 22.2), + tolerance=1.0, + ) + assert_identical(expected, actual) + # test this matches default behaviour without tolerance specified + default = data_float_coords.sel(lat=slice(21.0, 22.2)) + assert_identical(default, actual) + + # "no match" case - should return zero-size slice + expected = xr.Dataset(coords={"lat": ("lat", [])}) + actual = data_float_coords.sel( + lat=slice(21.5, 21.6), method="nearest", tolerance=1e-3 + ) + assert_identical(expected, actual) + + # non-unique coordinate values + data_non_unique = xr.Dataset( + coords={"lat": ("lat", [20.1, 21.1, 21.1, 22.1, 22.1, 23.1])} + ) + expected = xr.Dataset(coords={"lat": ("lat", [21.1, 21.1, 22.1, 22.1])}) + with pytest.raises( + NotImplementedError, + match="slice object as an indexer and an index with non-unique values", + ): + data_non_unique.sel(lat=slice(21.0, 22.2), method="nearest") + + # check non-zero step + data_float_coords = xr.Dataset( + coords={"lat": ("lat", [20.1, 21.1, 22.1, 23.1])} + ) + expected = xr.Dataset(coords={"lat": ("lat", [21.1])}) + actual = data_float_coords.sel(lat=slice(21, 22, 2), method="nearest") + assert_identical(expected, actual) + + # backwards slices + data_int_coords = xr.Dataset(coords={"lat": ("lat", [23, 22, 21, 20])}) + expected = xr.Dataset(coords={"lat": ("lat", [22, 21])}) + actual = data_int_coords.sel(lat=slice(22, 21), method="nearest") + assert_identical(expected, actual) + + data_float_coords = xr.Dataset( + coords={"lat": ("lat", [23.1, 22.1, 21.1, 20.1])} + ) + expected = xr.Dataset(coords={"lat": ("lat", [22.1, 21.1])}) + actual = data_float_coords.sel(lat=slice(22, 21), method="nearest") + assert_identical(expected, actual) + + def test_sel_negative_slices(self) -> None: + data_int_coords = xr.Dataset(coords={"lat": ("lat", [-23, -22, -21, -20])}) + expected = xr.Dataset(coords={"lat": ("lat", [-22, -21])}) + actual = data_int_coords.sel(lat=slice(-22, -21)) + assert_identical(expected, actual) + + expected = xr.Dataset(coords={"lat": ("lat", [-22, -21])}) + actual = data_int_coords.sel(lat=slice(-22, -21), method="nearest") + assert_identical(expected, actual) + + data_float_coords = xr.Dataset( + coords={"lat": ("lat", [-23.1, -22.1, -21.1, -20.1])} + ) + expected = xr.Dataset(coords={"lat": ("lat", [-22.1, -21.1])}) + actual = data_float_coords.sel(lat=slice(-22, -21), method="nearest") + assert_identical(expected, actual) + def test_loc(self) -> None: data = create_test_data() expected = data.sel(dim3="a")