NCAS-CMS · davidhassell · Apr 9, 2026 · Feb 19, 2026 · Feb 19, 2026 · Mar 1, 2026
diff --git a/Changelog.rst b/Changelog.rst
@@ -3,6 +3,10 @@ Version NEXTVERSION
 
 **2026-??-??**
 
+* Read Kerchunk datasets with `cfdm.read`
+  (https://github.com/NCAS-CMS/cfdm/issues/385)
+* Read open file handle datasets with `cfdm.read`
+  (https://github.com/NCAS-CMS/cfdm/issues/401)
 * Write UGRID datasets with `cfdm.write`
   (https://github.com/NCAS-CMS/cfdm/issues/271)
 * New keyword to `cfdm.read`: ``filesystem``

diff --git a/README.md b/README.md
@@ -79,9 +79,9 @@ inspecting it:
 
 The ``cfdm`` package can:
 
-* read field and domain constructs from netCDF, CDL, and Zarr datasets
-  with a choice of netCDF backends, and in local, http, and s3
-  locations,
+* read field and domain constructs from netCDF, CDL, Zarr, and
+  Kerchunk datasets with a choice of netCDF backends, and in local,
+  http, and s3 locations,
 * be fully flexible with respect to dataset storage chunking,
 * create new field and domain constructs in memory,
 * write and append field and domain constructs to netCDF and Zarr v3

diff --git a/cfdm/cfdmimplementation.py b/cfdm/cfdmimplementation.py
@@ -3661,7 +3661,7 @@ def set_original_filenames(self, parent, filename):
             if data is not None:
                 filenames += tuple(data._original_filenames())
 
-        parent._original_filenames(define=set(filenames))
+        parent._original_filenames(define=filenames)
 
     def set_parameter(self, parent, parameter, value, copy=True):
         """Set a parameter on a component.

diff --git a/cfdm/data/abstract/filearray.py b/cfdm/data/abstract/filearray.py
@@ -363,7 +363,12 @@ def get_filename(self, normalise=False, default=AttributeError()):
             )
 
         if normalise and not self.has_remote_storage_protocol():
-            filename = abspath(filename)
+            try:
+                filename = abspath(filename)
+            except TypeError:
+                # filename is not a string (e.g. file handle, kerchunk
+                # mapper, etc.)
+                pass
 
         return filename
 
@@ -485,26 +490,26 @@ def open(self, func, *args, **kwargs):
 
         """
         filename = self.get_filename(normalise=True)
+        if isinstance(filename, str):
+            if self.has_remote_storage_protocol():
+                from urllib.parse import urlparse
 
-        if self.has_remote_storage_protocol():
-            from urllib.parse import urlparse
-
-            import fsspec
+                import fsspec
 
-            url = urlparse(filename)
-            if url.scheme == "s3":
-                filename = url.path[1:]
+                url = urlparse(filename)
+                if url.scheme == "s3":
+                    filename = url.path[1:]
 
-            fs = fsspec.filesystem(
-                protocol=self.get_storage_protocol(),
-                **self.get_storage_options(),
-            )
-            filename = fs.open(filename, "rb")
-        else:
-            try:
-                filename = abspath(filename, uri=False)
-            except ValueError:
-                filename = abspath(filename)
+                fs = fsspec.filesystem(
+                    protocol=self.get_storage_protocol(),
+                    **self.get_storage_options(),
+                )
+                filename = fs.open(filename, "rb")
+            else:
+                try:
+                    filename = abspath(filename, uri=False)
+                except ValueError:
+                    filename = abspath(filename)
 
         try:
             dataset = func(filename, *args, **kwargs)

diff --git a/cfdm/docstring/docstring.py b/cfdm/docstring/docstring.py
@@ -114,10 +114,21 @@
         original file names, then the returned files will be the
         collection of original files from all contributing sources.""",
     # read datasets
-    "{{read datasets: (arbitrarily nested sequence of) `str`}}": """dataset: (arbitrarily nested sequence of) `str`
-            A string, or arbitrarily nested sequence of strings,
-            giving the dataset names, or directory names, from which
-            to read field or domain constructs.
+    "{{read datasets:}}": """datasets:
+            The dataset, or datasets, from which to read field or
+            domain constructs.
+
+            May be a string-valued path, a file-like object (such as
+            `io.BufferedReader`), or a directory-like object (such as
+            `fsspec.mapping.FSMap`); or a sequence of any combination
+            of these types.
+
+            Note that a Kerchunk dataset may be only read from a
+            directory-like object. For instance::
+
+               >>> fs = fsspec.filesystem('reference', fo='kerchunk.json')
+               >>> kerchunk = fs.get_mapper()
+               >>> f = {{package}}.read(kerchunk)
 
             Local names may be relative paths and will have tilde and
             shell environment variables expansions applied to them,

diff --git a/cfdm/mixin/files.py b/cfdm/mixin/files.py
@@ -96,8 +96,12 @@ def _original_filenames(self, define=None, update=None, clear=False):
             # Replace the existing collection of original file names
             if isinstance(define, str):
                 define = (define,)
+            elif not isinstance(define, (list, tuple, set)):
+                define = ()
 
-            filenames = tuple([abspath(name) for name in define])
+            filenames = [
+                abspath(name) for name in define if isinstance(name, str)
+            ]
 
         if update:
             # Add new original file names to the existing collection
@@ -107,14 +111,21 @@ def _original_filenames(self, define=None, update=None, clear=False):
                     "at the same time"
                 )
 
-            filenames = self._get_component("original_filenames", ())
+            filenames = list(self._get_component("original_filenames", ()))
+
             if isinstance(update, str):
                 update = (update,)
+            elif not isinstance(update, (list, tuple, set)):
+                update = ()
 
-            filenames += tuple([abspath(name) for name in update])
+            filenames += [
+                abspath(name) for name in update if isinstance(name, str)
+            ]
 
-        if filenames:
-            if len(filenames) > 1:
+        if filenames is not None:
+            if len(filenames) <= 1:
+                filenames = tuple(filenames)
+            else:
                 filenames = tuple(set(filenames))
 
             self._set_component("original_filenames", filenames, copy=False)
@@ -131,9 +142,11 @@ def _original_filenames(self, define=None, update=None, clear=False):
 
         # Still here? Then return the existing original file names
         if clear:
-            return set(self._del_component("original_filenames", ()))
+            filenames = self._del_component("original_filenames", ())
+        else:
+            filenames = self._get_component("original_filenames", ())
 
-        return set(self._get_component("original_filenames", ()))
+        return set(filenames)
 
     def get_original_filenames(self):
         """The names of files containing the original data and metadata.

diff --git a/cfdm/read_write/abstract/abstractio.py b/cfdm/read_write/abstract/abstractio.py
@@ -29,6 +29,156 @@ def dataset_open(self, *args, **kwargs):
 class IORead(IO, metaclass=abc.ABCMeta):
     """Abstract base class for instantiating Fields from a dataset."""
 
+    @classmethod
+    def create_filesystem(cls, path, storage_options=None):
+        """Create a file system for a path.
+
+        .. versionadded:: (cfdm) NEXTVERSION
+
+        :Parameters:
+
+            path: `str`
+                The path of the directory or file to be opened.
+
+                The protocol of the created file system is taken as the
+                URI schema of the *path*.
+
+            storage_options: `None` or dict`, optional
+                `fsspec.filesystem` keyword arguments to be used
+                during file system creation.
+
+                For a local path (e.g. ``'/homa/data/x.nc'``), `None`
+                will prevent a file system from being created.
+
+                For a remote path (e.g. ``'http://home/data/x.nc'``),
+                `None` is equivalent to an empty `dict`.
+
+                For a remote S3 path
+                (e.g. ``'s3://authority/data/x.nc'``), the
+                "endpoint_url" key is automatically added to the
+                storage options.
+
+        :Returns:
+
+            (path, file system) or (path, `None`)
+                The path of the directory or file, and its file
+                system.
+
+                The file system will be `None` if one wasn't created
+                (see *storage_options*).
+
+                For an input remote S3 path, the schema and authority
+                are removed from the output path (e.g. for a *path* of
+                ``'s3://authority/data/x.nc'``, ``'data/x.nc'`` is
+                returned).
+
+        """
+        from uritools import urisplit
+
+        u = urisplit(path)
+        scheme = u.scheme
+
+        if scheme in (None, "file"):
+            # --------------------------------------------------------
+            # Path is, e.g. ' file://...' or '/data/...'
+            # --------------------------------------------------------
+            if storage_options is None:
+                filesystem = None
+            else:
+                import fsspec
+
+                filesystem = fsspec.filesystem(
+                    protocol="local", **storage_options
+                )
+
+        elif scheme == "s3":
+            # --------------------------------------------------------
+            # Path is 's3://...'
+            # --------------------------------------------------------
+            import fsspec
+
+            if storage_options is None:
+                storage_options = {}
+
+            client_kwargs = storage_options.get("client_kwargs", {})
+            if (
+                "endpoint_url" not in storage_options
+                and "endpoint_url" not in client_kwargs
+            ):
+                authority = u.authority
+                if not authority:
+                    authority = ""
+
+                storage_options = storage_options.copy()
+                storage_options["endpoint_url"] = f"https://{authority}"
+
+            filesystem = fsspec.filesystem(protocol=scheme, **storage_options)
+
+            path = u.path[1:]
+
+        else:
+            # --------------------------------------------------------
+            # Path is, e.g. 'http://...', 'myschema://...'
+            # --------------------------------------------------------
+            import fsspec
+
+            if storage_options is None:
+                storage_options = {}
+
+            filesystem = fsspec.filesystem(protocol=scheme, **storage_options)
+
+        return path, filesystem
+
+    @classmethod
+    def filesystem_open(cls, filesystem, dataset, open_options=None):
+        """Open a dataset on a file system.
+
+        .. versionadded:: (cfdm) NEXTVERSION
+
+        :Parameters:
+
+            filesystem: file system
+                A pre-authenticated file system, such as
+                `fsspec.filesystem`.
+
+            dataset: `str`
+                The file system path to be opened.
+
+            open_options: `dict` or `None`, optional
+                The *filesystem* `open` method keyword
+                arguments. `None` is equivalent to an empty `dict`.
+                If the "mode" key is not set, then it defaults to
+                ``'rb'``.
+
+        :Returns:
+
+            file-like object
+                The open file handle for the dataset.
+
+        """
+        if open_options is None:
+            open_options = {"mode": "rb"}
+
+        if "mode" not in open_options:
+            open_options = open_options.copy()
+            open_options["mode"] = "rb"
+
+        try:
+            fh = filesystem.open(dataset, **open_options)
+        except AttributeError:
+            raise AttributeError(
+                f"The file system object {filesystem!r} does not have "
+                "an 'open' method. Please provide a valid file system "
+                "object (e.g. an fsspec.filesystem instance)."
+            )
+        except Exception as error:
+            raise RuntimeError(
+                f"Failed to open {dataset!r} using the file system "
+                f" object {filesystem!r}: {error}"
+            ) from error
+
+        return fh
+
     @abc.abstractmethod
     def read(self, *args, **kwargs):
         """Read fields from a netCDF dataset."""