Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ New Features
- ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and
:py:meth:`DataTree.to_zarr`.
By `Stephan Hoyer <https://github.com/shoyer>`_.
- ``.sel`` operations now support the ``method`` and ``tolerance`` keyword arguments,
for the case of indexing with a slice.
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.

Expand Down
44 changes: 30 additions & 14 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,23 +560,39 @@ def _sanitize_slice_element(x):


def _query_slice(index, label, coord_name="", method=None, tolerance=None):
slice_label_start = _sanitize_slice_element(label.start)
slice_label_stop = _sanitize_slice_element(label.stop)
slice_index_step = _sanitize_slice_element(label.step)

if method is not None or tolerance is not None:
raise NotImplementedError(
"cannot use ``method`` argument if any indexers are slice objects"
# likely slower because it requires two lookups, but pandas.Index.slice_indexer doesn't support method or tolerance
# see https://github.com/pydata/xarray/issues/10710
slice_index_start = index.get_indexer(
[slice_label_start], method=method, tolerance=tolerance
)
indexer = index.slice_indexer(
_sanitize_slice_element(label.start),
_sanitize_slice_element(label.stop),
_sanitize_slice_element(label.step),
)
if not isinstance(indexer, slice):
# unlike pandas, in xarray we never want to silently convert a
# slice indexer into an array indexer
raise KeyError(
"cannot represent labeled-based slice indexer for coordinate "
f"{coord_name!r} with a slice over integer positions; the index is "
"unsorted or non-unique"
slice_index_stop = index.get_indexer(
[slice_label_stop], method=method, tolerance=tolerance
)

# +1 needed to emulate behaviour of xarray sel with slice without method kwarg, which is inclusive of point at stop label
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if the index is non-unique?

Copy link
Member Author

@TomNicholas TomNicholas Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The behaviour of the existing implementation is confusing. This passes:

data_non_unique = xr.Dataset(
    coords={"lat": ("lat", [20.1, 21.1, 21.1, 22.1, 22.1, 23.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [21.1, 21.1, 22.1, 22.1])})
actual = data_non_unique.sel(lat=slice(21.1, 22.1))
assert_identical(expected, actual)

but it relies upon calling pandas.Index.slice_indexer on a non-unique index, despite the docstring of that method saying "Index needs to be ordered and unique."!

Also, triggering my implementation (using actual = data_non_unique.sel(lat=slice(21.0, 22.2), method="nearest")) currently fails at .get_indexer with pandas.errors.InvalidIndexError: Reindexing only valid with uniquely valued Index objects. This restriction seems reasonable, but is not documented in the docstring of .get_indexer.

So to get the current (intuitive) behaviour we are relying on undefined behaviour in pandas, and we can't support method/tolerance for slices on non-unique indexers using .get_indexer because of undocumented restrictions in pandas 🙃

Copy link
Member Author

@TomNicholas TomNicholas Sep 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least I can clearly catch this case, which I've done in 199765e. That means this PR now reduces the NotImplementedError surface from "any Index passed a slice with method" to "any non-unique Index passed a slice with method".

indexer = slice(
slice_index_start.item(), slice_index_stop.item() + 1, slice_index_step
)
else:
indexer = index.slice_indexer(
slice_label_start,
slice_label_stop,
slice_index_step,
)
if not isinstance(indexer, slice):
# unlike pandas, in xarray we never want to silently convert a
# slice indexer into an array indexer
raise KeyError(
"cannot represent labeled-based slice indexer for coordinate "
f"{coord_name!r} with a slice over integer positions; the index is "
"unsorted or non-unique"
)

return indexer


Expand Down
69 changes: 67 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2164,8 +2164,13 @@ def test_sel_method(self) -> None:
actual = data.sel(dim2=[1.45], method="backfill")
assert_identical(expected, actual)

with pytest.raises(NotImplementedError, match=r"slice objects"):
data.sel(dim2=slice(1, 3), method="ffill")
expected = data.isel(dim2=slice(2, 7))
actual = data.sel(dim2=slice(1, 3), method="ffill")
assert_identical(expected, actual)

expected = data.isel(dim2=slice(2, 7, 2))
actual = data.sel(dim2=slice(1, 3, 2), method="ffill")
assert_identical(expected, actual)
Comment on lines +2167 to +2173
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a test for method='backfill'?


with pytest.raises(TypeError, match=r"``method``"):
# this should not pass silently
Expand All @@ -2175,6 +2180,66 @@ def test_sel_method(self) -> None:
with pytest.raises(ValueError, match=r"cannot supply"):
data.sel(dim1=0, method="nearest")

def test_sel_method_with_slice(self) -> None:
# regression test for https://github.com/pydata/xarray/issues/10710

data_int_coords = xr.Dataset(coords={"lat": ("lat", [20, 21, 22, 23])})
expected = xr.Dataset(coords={"lat": ("lat", [21, 22])})
actual = data_int_coords.sel(lat=slice(21, 22), method="nearest")
assert_identical(expected, actual)

# check non-zero step
expected = xr.Dataset(coords={"lat": ("lat", [21])})
actual = data_int_coords.sel(lat=slice(21, 22, 2), method="nearest")
assert_identical(expected, actual)

# check consistency with not passing method kwarg, for case of ints, where method kwarg should be irrelevant
expected = data_int_coords.sel(lat=slice(21, 22))
actual = data_int_coords.sel(lat=slice(21, 22), method="nearest")
assert_identical(expected, actual)

data_float_coords = xr.Dataset(
coords={"lat": ("lat", [20.1, 21.1, 22.1, 23.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])})
actual = data_float_coords.sel(lat=slice(21, 22), method="nearest")
assert_identical(expected, actual)

# check non-zero step
expected = xr.Dataset(coords={"lat": ("lat", [21.1])})
actual = data_float_coords.sel(lat=slice(21, 22, 2), method="nearest")
assert_identical(expected, actual)

# backwards slices
data_int_coords = xr.Dataset(coords={"lat": ("lat", [23, 22, 21, 20])})
expected = xr.Dataset(coords={"lat": ("lat", [22, 21])})
actual = data_int_coords.sel(lat=slice(22, 21), method="nearest")
assert_identical(expected, actual)

data_float_coords = xr.Dataset(
coords={"lat": ("lat", [23.1, 22.1, 21.1, 20.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [22.1, 21.1])})
actual = data_float_coords.sel(lat=slice(22, 21), method="nearest")
assert_identical(expected, actual)

def test_sel_negative_slices(self) -> None:
data_int_coords = xr.Dataset(coords={"lat": ("lat", [-23, -22, -21, -20])})
expected = xr.Dataset(coords={"lat": ("lat", [-22, -21])})
actual = data_int_coords.sel(lat=slice(-22, -21))
assert_identical(expected, actual)

expected = xr.Dataset(coords={"lat": ("lat", [-22, -21])})
actual = data_int_coords.sel(lat=slice(-22, -21), method="nearest")
assert_identical(expected, actual)

data_float_coords = xr.Dataset(
coords={"lat": ("lat", [-23.1, -22.1, -21.1, -20.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [-22.1, -21.1])})
actual = data_float_coords.sel(lat=slice(-22, -21), method="nearest")
assert_identical(expected, actual)

def test_loc(self) -> None:
data = create_test_data()
expected = data.sel(dim3="a")
Expand Down
Loading