diff --git a/docs/source/api.rst b/docs/source/api.rst index 29b1f5f92..6b0135532 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -124,6 +124,7 @@ Built-in Implementations fsspec.implementations.local.LocalFileSystem fsspec.implementations.memory.MemoryFileSystem fsspec.implementations.reference.ReferenceFileSystem + fsspec.implementations.reference.LazyReferenceMapper fsspec.implementations.sftp.SFTPFileSystem fsspec.implementations.smb.SMBFileSystem fsspec.implementations.tar.TarFileSystem @@ -181,6 +182,9 @@ Built-in Implementations .. autoclass:: fsspec.implementations.reference.ReferenceFileSystem :members: __init__ +.. autoclass:: fsspec.implementations.reference.LazyReferenceMapper + :members: __init__ + .. autoclass:: fsspec.implementations.sftp.SFTPFileSystem :members: __init__ diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index ac20bdee2..25ec3febd 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -82,8 +82,12 @@ def ravel_multi_index(idx, sizes): class LazyReferenceMapper(collections.abc.MutableMapping): - """Interface to read parquet store as if it were a standard kerchunk - references dict.""" + """This interface can be used to read/write references from Parquet stores. + It is not intended for other types of references. + It can be used with Kerchunk's MultiZarrToZarr method to combine + references into a parquet store. + Examples of this use-case can be found here: + https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage""" # import is class level to prevent numpy dep requirement for fsspec @property @@ -108,9 +112,15 @@ def __init__( Root of parquet store fs : fsspec.AbstractFileSystem fsspec filesystem object, default is local filesystem. - cache_size : int + cache_size : int, default=128 Maximum size of LRU cache, where cache_size*record_size denotes the total number of references that can be loaded in memory at once. + categorical_threshold : int + Encode urls as pandas.Categorical to reduce memory footprint if the ratio + of the number of unique urls to total number of refs for each variable + is greater than or equal to this number. (default 10) + + """ self.root = root self.chunk_sizes = {}