From b14939b1b8fa15a210dc3ba67e4bf9bd3cb3908a Mon Sep 17 00:00:00 2001 From: Jeremy Maitin-Shepard Date: Thu, 15 Jul 2021 13:37:01 -0700 Subject: [PATCH] Add support for zarr dimension_separator metadata field Historically zarr has named chunk files using dots by default, e.g. "1.2.3", but has also supported slashes, e.g. "1/2/3". However, the choice was not indicated in the .zarray metadata file, and instead needed to be specified out-of-band. In TensorStore, that was accomplished using the "key_encoding" spec member. Zarr recently added a dimension_separator field to the metadata format: https://github.com/zarr-developers/zarr-python/pull/715 This commit adds support for that field. Note that previously TensorStore did not accept .zarray files that contain unknown members, meaning that reading arrays with the dimension_separator field would fail. The existing "key_encoding" spec member is still supported, but deprecated in favor of specifying "dimension_separator" as part of the "metadata" field. When creating a new array, TensorStore will always write the "dimension_separator" field, in order to allow the array to be re-opened without specifying the "dimension_separator". Additionally, this change also makes TensorStore accept (and preserve) extra .zarray members, as the zarr spec has now also changed to require that. PiperOrigin-RevId: 384998911 Change-Id: I485141b7786084d57c28005e3076f70462cd5b9e --- python/tensorstore/tensorstore_class.cc | 10 ++ tensorstore/driver/zarr/BUILD | 1 + tensorstore/driver/zarr/driver.cc | 69 ++++++--- tensorstore/driver/zarr/driver_impl.h | 2 +- tensorstore/driver/zarr/driver_impl_test.cc | 6 +- tensorstore/driver/zarr/driver_test.cc | 159 ++++++++++++++++++-- tensorstore/driver/zarr/metadata.cc | 38 ++++- tensorstore/driver/zarr/metadata.h | 23 +++ tensorstore/driver/zarr/metadata_test.cc | 20 +++ tensorstore/driver/zarr/schema.yml | 31 ++-- tensorstore/driver/zarr/spec.cc | 23 ++- tensorstore/driver/zarr/spec.h | 16 -- tensorstore/driver/zarr/spec_test.cc | 40 ++--- 13 files changed, 327 insertions(+), 111 deletions(-) diff --git a/python/tensorstore/tensorstore_class.cc b/python/tensorstore/tensorstore_class.cc index 8e1835b33..e674a18f6 100644 --- a/python/tensorstore/tensorstore_class.cc +++ b/python/tensorstore/tensorstore_class.cc @@ -175,6 +175,7 @@ Asynchronous multi-dimensional array handle. 'id': 'blosc', 'shuffle': -1, }, + 'dimension_separator': '.', 'dtype': ' 'id': 'blosc', 'shuffle': -1, }, + 'dimension_separator': '.', 'dtype': 'f4', 'fill_value': None, 'filters': None, diff --git a/tensorstore/driver/zarr/BUILD b/tensorstore/driver/zarr/BUILD index 3e52a5740..8ab72f90e 100644 --- a/tensorstore/driver/zarr/BUILD +++ b/tensorstore/driver/zarr/BUILD @@ -262,6 +262,7 @@ tensorstore_cc_library( "//tensorstore:schema", "//tensorstore/index_space:index_transform_builder", "//tensorstore/internal:json", + "//tensorstore/internal:json_metadata_matching", "//tensorstore/util:quote_string", "//tensorstore/util:result", "@com_github_nlohmann_json//:nlohmann_json", diff --git a/tensorstore/driver/zarr/driver.cc b/tensorstore/driver/zarr/driver.cc index 9994e80a2..3875c71d4 100644 --- a/tensorstore/driver/zarr/driver.cc +++ b/tensorstore/driver/zarr/driver.cc @@ -41,8 +41,18 @@ namespace internal_zarr { namespace { constexpr const char kZarrMetadataKey[] = ".zarray"; -inline char GetChunkKeyEncodingSeparator(ChunkKeyEncoding key_encoding) { - return key_encoding == ChunkKeyEncoding::kDotSeparated ? '.' : '/'; +inline char GetDimensionSeparatorChar(DimensionSeparator dimension_separator) { + return dimension_separator == DimensionSeparator::kDotSeparated ? '.' : '/'; +} + +DimensionSeparator GetDimensionSeparator( + const ZarrPartialMetadata& partial_metadata, const ZarrMetadata& metadata) { + if (metadata.dimension_separator) { + return *metadata.dimension_separator; + } else if (partial_metadata.dimension_separator) { + return *partial_metadata.dimension_separator; + } + return DimensionSeparator::kDotSeparated; } Result ParseEncodedMetadata(std::string_view encoded_value) { @@ -95,15 +105,13 @@ class ZarrDriver template