Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ Version NEXTVERSION

**2026-??-??**

* Read Kerchunk datasets with `cfdm.read`
(https://github.com/NCAS-CMS/cfdm/issues/385)
* Read open file handle datasets with `cfdm.read`
(https://github.com/NCAS-CMS/cfdm/issues/401)
* Write UGRID datasets with `cfdm.write`
(https://github.com/NCAS-CMS/cfdm/issues/271)
* New keyword to `cfdm.read`: ``filesystem``
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ inspecting it:

The ``cfdm`` package can:

* read field and domain constructs from netCDF, CDL, and Zarr datasets
with a choice of netCDF backends, and in local, http, and s3
locations,
* read field and domain constructs from netCDF, CDL, Zarr, and
Kerchunk datasets with a choice of netCDF backends, and in local,
http, and s3 locations,
* be fully flexible with respect to dataset storage chunking,
* create new field and domain constructs in memory,
* write and append field and domain constructs to netCDF and Zarr v3
Expand Down
2 changes: 1 addition & 1 deletion cfdm/cfdmimplementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3661,7 +3661,7 @@ def set_original_filenames(self, parent, filename):
if data is not None:
filenames += tuple(data._original_filenames())

parent._original_filenames(define=set(filenames))
parent._original_filenames(define=filenames)

def set_parameter(self, parent, parameter, value, copy=True):
"""Set a parameter on a component.
Expand Down
41 changes: 23 additions & 18 deletions cfdm/data/abstract/filearray.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,12 @@ def get_filename(self, normalise=False, default=AttributeError()):
)

if normalise and not self.has_remote_storage_protocol():
filename = abspath(filename)
try:
filename = abspath(filename)
except TypeError:
# filename is not a string (e.g. file handle, kerchunk
# mapper, etc.)
pass

return filename

Expand Down Expand Up @@ -485,26 +490,26 @@ def open(self, func, *args, **kwargs):

"""
filename = self.get_filename(normalise=True)
if isinstance(filename, str):
if self.has_remote_storage_protocol():
from urllib.parse import urlparse

if self.has_remote_storage_protocol():
from urllib.parse import urlparse

import fsspec
import fsspec

url = urlparse(filename)
if url.scheme == "s3":
filename = url.path[1:]
url = urlparse(filename)
if url.scheme == "s3":
filename = url.path[1:]

fs = fsspec.filesystem(
protocol=self.get_storage_protocol(),
**self.get_storage_options(),
)
filename = fs.open(filename, "rb")
else:
try:
filename = abspath(filename, uri=False)
except ValueError:
filename = abspath(filename)
fs = fsspec.filesystem(
protocol=self.get_storage_protocol(),
**self.get_storage_options(),
)
filename = fs.open(filename, "rb")
else:
try:
filename = abspath(filename, uri=False)
except ValueError:
filename = abspath(filename)

try:
dataset = func(filename, *args, **kwargs)
Expand Down
19 changes: 15 additions & 4 deletions cfdm/docstring/docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,21 @@
original file names, then the returned files will be the
collection of original files from all contributing sources.""",
# read datasets
"{{read datasets: (arbitrarily nested sequence of) `str`}}": """dataset: (arbitrarily nested sequence of) `str`
A string, or arbitrarily nested sequence of strings,
giving the dataset names, or directory names, from which
to read field or domain constructs.
"{{read datasets:}}": """datasets:
The dataset, or datasets, from which to read field or
domain constructs.

May be a string-valued path, a file-like object (such as
`io.BufferedReader`), or a directory-like object (such as
`fsspec.mapping.FSMap`); or a sequence of any combination
of these types.

Note that a Kerchunk dataset may be only read from a
directory-like object. For instance::

>>> fs = fsspec.filesystem('reference', fo='kerchunk.json')
>>> kerchunk = fs.get_mapper()
>>> f = {{package}}.read(kerchunk)

Local names may be relative paths and will have tilde and
shell environment variables expansions applied to them,
Expand Down
27 changes: 20 additions & 7 deletions cfdm/mixin/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,12 @@ def _original_filenames(self, define=None, update=None, clear=False):
# Replace the existing collection of original file names
if isinstance(define, str):
define = (define,)
elif not isinstance(define, (list, tuple, set)):
define = ()

filenames = tuple([abspath(name) for name in define])
filenames = [
abspath(name) for name in define if isinstance(name, str)
]

if update:
# Add new original file names to the existing collection
Expand All @@ -107,14 +111,21 @@ def _original_filenames(self, define=None, update=None, clear=False):
"at the same time"
)

filenames = self._get_component("original_filenames", ())
filenames = list(self._get_component("original_filenames", ()))

if isinstance(update, str):
update = (update,)
elif not isinstance(update, (list, tuple, set)):
update = ()

filenames += tuple([abspath(name) for name in update])
filenames += [
abspath(name) for name in update if isinstance(name, str)
]

if filenames:
if len(filenames) > 1:
if filenames is not None:
if len(filenames) <= 1:
filenames = tuple(filenames)
else:
filenames = tuple(set(filenames))

self._set_component("original_filenames", filenames, copy=False)
Expand All @@ -131,9 +142,11 @@ def _original_filenames(self, define=None, update=None, clear=False):

# Still here? Then return the existing original file names
if clear:
return set(self._del_component("original_filenames", ()))
filenames = self._del_component("original_filenames", ())
else:
filenames = self._get_component("original_filenames", ())

return set(self._get_component("original_filenames", ()))
return set(filenames)

def get_original_filenames(self):
"""The names of files containing the original data and metadata.
Expand Down
150 changes: 150 additions & 0 deletions cfdm/read_write/abstract/abstractio.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,156 @@ def dataset_open(self, *args, **kwargs):
class IORead(IO, metaclass=abc.ABCMeta):
"""Abstract base class for instantiating Fields from a dataset."""

@classmethod
def create_filesystem(cls, path, storage_options=None):
"""Create a file system for a path.

.. versionadded:: (cfdm) NEXTVERSION

:Parameters:

path: `str`
The path of the directory or file to be opened.

The protocol of the created file system is taken as the
URI schema of the *path*.

storage_options: `None` or dict`, optional
`fsspec.filesystem` keyword arguments to be used
during file system creation.

For a local path (e.g. ``'/homa/data/x.nc'``), `None`
will prevent a file system from being created.

For a remote path (e.g. ``'http://home/data/x.nc'``),
`None` is equivalent to an empty `dict`.

For a remote S3 path
(e.g. ``'s3://authority/data/x.nc'``), the
"endpoint_url" key is automatically added to the
storage options.

:Returns:

(path, file system) or (path, `None`)
The path of the directory or file, and its file
system.

The file system will be `None` if one wasn't created
(see *storage_options*).

For an input remote S3 path, the schema and authority
are removed from the output path (e.g. for a *path* of
``'s3://authority/data/x.nc'``, ``'data/x.nc'`` is
returned).

"""
from uritools import urisplit

u = urisplit(path)
scheme = u.scheme

if scheme in (None, "file"):
# --------------------------------------------------------
# Path is, e.g. ' file://...' or '/data/...'
# --------------------------------------------------------
if storage_options is None:
filesystem = None
else:
import fsspec

filesystem = fsspec.filesystem(
protocol="local", **storage_options
)

elif scheme == "s3":
# --------------------------------------------------------
# Path is 's3://...'
# --------------------------------------------------------
import fsspec

if storage_options is None:
storage_options = {}

client_kwargs = storage_options.get("client_kwargs", {})
if (
"endpoint_url" not in storage_options
and "endpoint_url" not in client_kwargs
):
authority = u.authority
if not authority:
authority = ""

storage_options = storage_options.copy()
storage_options["endpoint_url"] = f"https://{authority}"

filesystem = fsspec.filesystem(protocol=scheme, **storage_options)

path = u.path[1:]

else:
# --------------------------------------------------------
# Path is, e.g. 'http://...', 'myschema://...'
# --------------------------------------------------------
import fsspec

if storage_options is None:
storage_options = {}

filesystem = fsspec.filesystem(protocol=scheme, **storage_options)

return path, filesystem

@classmethod
def filesystem_open(cls, filesystem, dataset, open_options=None):
"""Open a dataset on a file system.

.. versionadded:: (cfdm) NEXTVERSION

:Parameters:

filesystem: file system
A pre-authenticated file system, such as
`fsspec.filesystem`.

dataset: `str`
The file system path to be opened.

open_options: `dict` or `None`, optional
The *filesystem* `open` method keyword
arguments. `None` is equivalent to an empty `dict`.
If the "mode" key is not set, then it defaults to
``'rb'``.

:Returns:

file-like object
The open file handle for the dataset.

"""
if open_options is None:
open_options = {"mode": "rb"}

if "mode" not in open_options:
open_options = open_options.copy()
open_options["mode"] = "rb"

try:
fh = filesystem.open(dataset, **open_options)
except AttributeError:
raise AttributeError(
f"The file system object {filesystem!r} does not have "
"an 'open' method. Please provide a valid file system "
"object (e.g. an fsspec.filesystem instance)."
)
except Exception as error:
raise RuntimeError(
f"Failed to open {dataset!r} using the file system "
f" object {filesystem!r}: {error}"
) from error

return fh

@abc.abstractmethod
def read(self, *args, **kwargs):
"""Read fields from a netCDF dataset."""
Expand Down
Loading