fsspec · martindurant · Feb 29, 2024 · Feb 20, 2024 · Feb 20, 2024 · Feb 22, 2024
diff --git a/kerchunk/hdf.py b/kerchunk/hdf.py
@@ -1,4 +1,5 @@
 import base64
+import io
 import logging
 from typing import Union, BinaryIO
 
@@ -47,7 +48,7 @@ class SingleHdf5ToZarr:
         to BinaryIO is optional), in which case must also provide url. If a str,
         file will be opened using fsspec and storage_options.
     url : string
-        URI of the HDF5 file, if passing a file-like object
+        URI of the HDF5 file, if passing a file-like object or h5py File/dataset
     spec : int
         The version of output to produce (see README of this repo)
     inline_threshold : int
@@ -69,11 +70,15 @@ class SingleHdf5ToZarr:
         This allows you to supply an fsspec.implementations.reference.LazyReferenceMapper
         to write out parquet as the references get filled, or some other dictionary-like class
         to customise how references get stored
+    var_pattern: str or None
+        If set, only variables with names matching this pattern (as regex) will be scanned
+        and included in this output. It is on the caller to ensure that all the coordinates
+        needed to represent a data variable are included.
     """
 
     def __init__(
         self,
-        h5f: "BinaryIO | str",
+        h5f: "BinaryIO | str | h5py.File",
         url: str = None,
         spec=1,
         inline_threshold=500,
@@ -89,8 +94,15 @@ def __init__(
             fs, path = fsspec.core.url_to_fs(h5f, **(storage_options or {}))
             self.input_file = fs.open(path, "rb")
             url = h5f
-        else:
+            self._h5f = h5py.File(self.input_file, mode="r")
+        elif isinstance(h5f, io.IOBase):
             self.input_file = h5f
+            self._h5f = h5py.File(self.input_file, mode="r")
+        else:
+            # assume h5py object (File or group/dataset)
+            self._h5f = h5f
+            fs, path = fsspec.core.url_to_fs(url, **(storage_options or {}))
+            self.input_file = fs.open(path, "rb")
         self.spec = spec
         self.inline = inline_threshold
         if vlen_encode not in ["embed", "null", "leave", "encode"]: