Skip to content

Commit

Permalink
Adds LazyReferenceMapper to api.rst (#1378)
Browse files Browse the repository at this point in the history
  • Loading branch information
norlandrhagen authored Oct 21, 2023
1 parent ac290c8 commit 4593cf0
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
4 changes: 4 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ Built-in Implementations
fsspec.implementations.local.LocalFileSystem
fsspec.implementations.memory.MemoryFileSystem
fsspec.implementations.reference.ReferenceFileSystem
fsspec.implementations.reference.LazyReferenceMapper
fsspec.implementations.sftp.SFTPFileSystem
fsspec.implementations.smb.SMBFileSystem
fsspec.implementations.tar.TarFileSystem
Expand Down Expand Up @@ -181,6 +182,9 @@ Built-in Implementations
.. autoclass:: fsspec.implementations.reference.ReferenceFileSystem
:members: __init__

.. autoclass:: fsspec.implementations.reference.LazyReferenceMapper
:members: __init__

.. autoclass:: fsspec.implementations.sftp.SFTPFileSystem
:members: __init__

Expand Down
16 changes: 13 additions & 3 deletions fsspec/implementations/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,12 @@ def ravel_multi_index(idx, sizes):


class LazyReferenceMapper(collections.abc.MutableMapping):
"""Interface to read parquet store as if it were a standard kerchunk
references dict."""
"""This interface can be used to read/write references from Parquet stores.
It is not intended for other types of references.
It can be used with Kerchunk's MultiZarrToZarr method to combine
references into a parquet store.
Examples of this use-case can be found here:
https://fsspec.github.io/kerchunk/advanced.html?highlight=parquet#parquet-storage"""

# import is class level to prevent numpy dep requirement for fsspec
@property
Expand All @@ -108,9 +112,15 @@ def __init__(
Root of parquet store
fs : fsspec.AbstractFileSystem
fsspec filesystem object, default is local filesystem.
cache_size : int
cache_size : int, default=128
Maximum size of LRU cache, where cache_size*record_size denotes
the total number of references that can be loaded in memory at once.
categorical_threshold : int
Encode urls as pandas.Categorical to reduce memory footprint if the ratio
of the number of unique urls to total number of refs for each variable
is greater than or equal to this number. (default 10)
"""
self.root = root
self.chunk_sizes = {}
Expand Down

0 comments on commit 4593cf0

Please sign in to comment.