-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Open
Labels
Description
What is your issue?
I was playing around with manually creating zarr stores and feeding them into xarray. I noticed some weird edge cases when a coord variable and a dimension name are not the same. I'm not sure that either of these cases are necessarily bugs, but the resulting behavior doesn't "feel right".
cc @TomNicholas
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "xarray[complete]@git+https://github.com/pydata/xarray.git@main",
# "zarr",
# "numpy",
# ]
# ///
#
import xarray as xr
import zarr
import numpy as np
fname = "mismatch_name_1.zarr"
z = zarr.open(fname)
z.create_array(
"blah",
data=np.arange(10, dtype=int),
dimension_names=["coord_1"],
overwrite=True,
)
z.create_array(
"data", data=np.arange(100, step=10), dimension_names=["coord_1"], overwrite=True
)
ds = xr.open_zarr(fname, consolidated=False)
print(ds)
# <xarray.Dataset> Size: 160B
# Dimensions: (coord_1: 10)
# Dimensions without coordinates: coord_1
# Data variables:
# blah (coord_1) int64 80B ...
# data (coord_1) int64 80B ...
#############################
fname = "mismatch_name_2.zarr"
z = zarr.open(fname)
z.create_array(
"coord_1",
data=np.arange(10, dtype=int),
dimension_names=["blah"],
overwrite=True,
)
z.create_array(
"data", data=np.arange(100, step=10), dimension_names=["coord_1"], overwrite=True
)
ds = xr.load_dataset(fname, consolidated=False)
print(ds)
# <xarray.Dataset> Size: 160B
# Dimensions: (coord_1: 10, blah: 10)
# Coordinates:
# coord_1 (blah) int64 80B 0 1 2 3 4 5 6 7 8 9
# Dimensions without coordinates: blah
# Data variables:
# data (coord_1) int64 80B 0 10 20 30 40 50 60 70 80 90
# doesn't fail but gives wrong selection
print(ds.sel(blah=4))
# <xarray.Dataset> Size: 88B
# Dimensions: (coord_1: 10)
# Coordinates:
# coord_1 int64 8B 4
# Data variables:
# data (coord_1) int64 80B 0 10 20 30 40 50 60 70 80 90
# fails with error
# KeyError: "no index found for coordinate 'coord_1'"
ds.sel(coord_1=4)