From 9250136bf2c7ed6c5b64417a4884ebdae72095d2 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 28 Oct 2021 16:24:21 +0100 Subject: [PATCH 01/16] Add new current version 0.4 --- ome_zarr/format.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ome_zarr/format.py b/ome_zarr/format.py index d8c3a77b..99e8402c 100644 --- a/ome_zarr/format.py +++ b/ome_zarr/format.py @@ -136,4 +136,15 @@ def version(self) -> str: return "0.3" -CurrentFormat = FormatV03 +class FormatV04(FormatV03): + """ + Changelog: axes is list of dicts, + introduce transformations in multiscales (Nov 2021) + """ + + @property + def version(self) -> str: + return "0.4" + + +CurrentFormat = FormatV04 From 6557b265764fe6bc04935d5c19a66dc0a2abeaab Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 17 Dec 2021 16:10:28 +0000 Subject: [PATCH 02/16] writer validates axes for 0.4 version --- ome_zarr/writer.py | 117 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 94 insertions(+), 23 deletions(-) diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 0e94a467..54dbf66a 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -2,7 +2,7 @@ """ import logging -from typing import Any, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union import numpy as np import zarr @@ -13,18 +13,75 @@ LOGGER = logging.getLogger("ome_zarr.writer") +KNOWN_AXES = {"x": "space", "y": "space", "z": "space", "c": "channel", "t": "time"} -def _validate_axes_names( - ndim: int, axes: Union[str, List[str]] = None, fmt: Format = CurrentFormat() -) -> Union[None, List[str]]: - """Returns validated list of axes names or raise exception if invalid""" + +def _axes_to_dicts( + axes: Union[List[str], List[Dict[str, str]]] +) -> List[Dict[str, str]]: + """Returns a list of axis dicts with name and type""" + axes_dicts = [] + for axis in axes: + if isinstance(axis, str): + axis_dict = {"name": axis} + if axis in KNOWN_AXES: + axis_dict["type"] = KNOWN_AXES[axis] + axes_dicts.append(axis_dict) + else: + axes_dicts.append(axis) + return axes_dicts + + +def _axes_to_names(axes: List[Dict[str, str]]) -> List[str]: + """Returns a list of axis names""" + axes_names = [] + for axis in axes: + if "name" not in axis: + raise ValueError("Axis Dict %s has no 'name'" % axis) + axes_names.append(axis["name"]) + return axes_names + + +def _validate_axes_types(axes_dicts: List[Dict[str, str]]) -> None: + """ + Validate the axes types according to the spec, version 0.4+ + """ + axes_types = [axis.get("type") for axis in axes_dicts] + known_types = list(KNOWN_AXES.values()) + unknown_types = [atype for atype in axes_types if atype not in known_types] + if len(unknown_types) > 1: + raise ValueError( + "Too many unknown axes types. 1 allowed, found: %s" % unknown_types + ) + + def _last_index(item: str, item_list: List[Any]) -> int: + return max(loc for loc, val in enumerate(item_list) if val == item) + + if "time" in axes_types and _last_index("time", axes_types) > 0: + raise ValueError("'time' axis must be first dimension only") + + if axes_types.count("channel") > 1: + raise ValueError("Only 1 axis can be type 'channel'") + + if "channel" in axes_types and _last_index( + "channel", axes_types + ) > axes_types.index("space"): + raise ValueError("'space' axes must come after 'channel'") + + +def _validate_axes( + ndim: int = None, + axes: Union[str, List[str], List[Dict[str, str]]] = None, + fmt: Format = CurrentFormat(), +) -> Union[None, List[str], List[Dict[str, str]]]: + """Returns list of axes valid for fmt.version or raise exception if invalid""" if fmt.version in ("0.1", "0.2"): if axes is not None: LOGGER.info("axes ignored for version 0.1 or 0.2") return None - # handle version 0.3... + # We can guess axes for 2D and 5D data if axes is None: if ndim == 2: axes = ["y", "x"] @@ -37,16 +94,30 @@ def _validate_axes_names( "axes must be provided. Can't be guessed for 3D or 4D data" ) + # axes may be string e.g. "tczyx" if isinstance(axes, str): axes = list(axes) - if len(axes) != ndim: - raise ValueError("axes length must match number of dimensions") - _validate_axes(axes) - return axes + if ndim is not None and len(axes) != ndim: + raise ValueError( + f"axes length ({len(axes)}) must match number of dimensions ({ndim})" + ) + + # axes may be list of 'x', 'y' or list of {'name': 'x'} + axes_dicts = _axes_to_dicts(axes) + axes_names = _axes_to_names(axes_dicts) + + # check names (only enforced for version 0.3) + if fmt.version == "0.3": + _validate_axes_03(axes_names) + return axes_names + + _validate_axes_types(axes_dicts) + + return axes_dicts -def _validate_axes(axes: List[str], fmt: Format = CurrentFormat()) -> None: +def _validate_axes_03(axes: List[str]) -> None: val_axes = tuple(axes) if len(val_axes) == 2: @@ -75,7 +146,7 @@ def write_multiscale( group: zarr.Group, chunks: Union[Tuple[Any, ...], int] = None, fmt: Format = CurrentFormat(), - axes: Union[str, List[str]] = None, + axes: Union[str, List[str], List[Dict[str, str]]] = None, ) -> None: """ Write a pyramid with multiscale metadata to disk. @@ -93,13 +164,13 @@ def write_multiscale( fmt: Format The format of the ome_zarr data which should be used. Defaults to the most current. - axes: str or list of str - the names of the axes. e.g. "tczyx". Not needed for v0.1 or v0.2 - or for v0.3 if 2D or 5D. Otherwise this must be provided + axes: str or list of str or list of dict + List of axes dicts, or names. Not needed for v0.1 or v0.2 + or if 2D. Otherwise this must be provided """ dims = len(pyramid[0].shape) - axes = _validate_axes_names(dims, axes, fmt) + axes = _validate_axes(dims, axes, fmt) paths = [] for path, dataset in enumerate(pyramid): @@ -113,7 +184,7 @@ def write_multiscales_metadata( group: zarr.Group, paths: List[str], fmt: Format = CurrentFormat(), - axes: List[str] = None, + axes: Union[str, List[str], List[Dict[str, str]]] = None, ) -> None: """ Write the multiscales metadata in the group. @@ -142,7 +213,7 @@ def write_multiscales_metadata( if fmt.version in ("0.1", "0.2"): LOGGER.info("axes ignored for version 0.1 or 0.2") else: - _validate_axes(axes, fmt) + axes = _validate_axes(axes=axes, fmt=fmt) multiscales[0]["axes"] = axes group.attrs["multiscales"] = multiscales @@ -154,7 +225,7 @@ def write_image( byte_order: Union[str, List[str]] = "tczyx", scaler: Scaler = Scaler(), fmt: Format = CurrentFormat(), - axes: Union[str, List[str]] = None, + axes: Union[str, List[str], List[Dict[str, str]]] = None, **metadata: JSONDict, ) -> None: """Writes an image to the zarr store according to ome-zarr specification @@ -179,9 +250,9 @@ def write_image( fmt: Format The format of the ome_zarr data which should be used. Defaults to the most current. - axes: str or list of str - the names of the axes. e.g. "tczyx". Not needed for v0.1 or v0.2 - or for v0.3 if 2D or 5D. Otherwise this must be provided + axes: str or list of str or list of dict + List of axes dicts, or names. Not needed for v0.1 or v0.2 + or if 2D. Otherwise this must be provided """ if image.ndim > 5: @@ -195,7 +266,7 @@ def write_image( axes = None # check axes before trying to scale - _validate_axes_names(image.ndim, axes, fmt) + _validate_axes(image.ndim, axes, fmt) if chunks is not None: chunks = _retuple(chunks, image.shape) From 19a320c6c0cec3aad71bc9f8678d582df5de97f3 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 17 Dec 2021 16:20:00 +0000 Subject: [PATCH 03/16] reader handles axes as None, List of str or List of dict --- ome_zarr/reader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 13993479..f582e36b 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -282,8 +282,7 @@ def __init__(self, node: Node) -> None: "version", "0.1" ) # should this be matched with Format.version? datasets = multiscales[0]["datasets"] - # axes field was introduced in 0.3, before all data was 5d - axes = tuple(multiscales[0].get("axes", ["t", "c", "z", "y", "x"])) + axes = multiscales[0].get("axes") if len(set(axes) - axes_values) > 0: raise RuntimeError(f"Invalid axes names: {set(axes) - axes_values}") node.metadata["axes"] = axes @@ -301,6 +300,8 @@ def __init__(self, node: Node) -> None: for c in data.chunks ] LOGGER.info("resolution: %s", resolution) + if axes is not None: + axes = tuple(str(axis) for axis in axes) LOGGER.info(" - shape %s = %s", axes, data.shape) LOGGER.info(" - chunks = %s", chunk_sizes) LOGGER.info(" - dtype = %s", data.dtype) From a2b3b3e98b9c4a4b3115264344b6441704aef7a4 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 17 Dec 2021 22:53:58 +0000 Subject: [PATCH 04/16] Add tests --- ome_zarr/reader.py | 3 +- tests/test_writer.py | 102 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 88 insertions(+), 17 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index f582e36b..6e4f2ce2 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -283,7 +283,8 @@ def __init__(self, node: Node) -> None: ) # should this be matched with Format.version? datasets = multiscales[0]["datasets"] axes = multiscales[0].get("axes") - if len(set(axes) - axes_values) > 0: + if version == "0.3" and len(set(axes) - axes_values) > 0: + # TODO: validate axis for > V0.3 ? raise RuntimeError(f"Invalid axes names: {set(axes) - axes_values}") node.metadata["axes"] = axes datasets = [d["path"] for d in datasets] diff --git a/tests/test_writer.py b/tests/test_writer.py index 5110b3d4..1e3a27ac 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -4,12 +4,13 @@ import pytest import zarr -from ome_zarr.format import FormatV01, FormatV02, FormatV03 +from ome_zarr.format import FormatV01, FormatV02, FormatV03, FormatV04 from ome_zarr.io import parse_url from ome_zarr.reader import Multiscales, Reader from ome_zarr.scale import Scaler from ome_zarr.writer import ( - _validate_axes_names, + KNOWN_AXES, + _validate_axes, write_image, write_multiscales_metadata, ) @@ -55,6 +56,7 @@ def scaler(self, request): ), pytest.param(FormatV02, id="V02"), pytest.param(FormatV03, id="V03"), + pytest.param(FormatV04, id="V04"), ), ) def test_writer(self, shape, scaler, format_version): @@ -75,11 +77,11 @@ def test_writer(self, shape, scaler, format_version): reader = Reader(parse_url(f"{self.path}/test")) node = list(reader())[0] assert Multiscales.matches(node.zarr) - if version.version not in ("0.1", "0.2"): + if version.version in ("0.1", "0.2"): # v0.1 and v0.2 MUST be 5D - assert node.data[0].shape == shape - else: assert node.data[0].ndim == 5 + else: + assert node.data[0].shape == shape assert np.allclose(data, node.data[0][...].compute()) def test_dim_names(self): @@ -88,23 +90,23 @@ def test_dim_names(self): # v0.3 MUST specify axes for 3D or 4D data with pytest.raises(ValueError): - _validate_axes_names(3, axes=None, fmt=v03) + _validate_axes(3, axes=None, fmt=v03) # ndims must match axes length with pytest.raises(ValueError): - _validate_axes_names(3, axes="yx", fmt=v03) + _validate_axes(3, axes="yx", fmt=v03) # axes must be ordered tczyx with pytest.raises(ValueError): - _validate_axes_names(3, axes="yxt", fmt=v03) + _validate_axes(3, axes="yxt", fmt=v03) with pytest.raises(ValueError): - _validate_axes_names(2, axes=["x", "y"], fmt=v03) + _validate_axes(2, axes=["x", "y"], fmt=v03) with pytest.raises(ValueError): - _validate_axes_names(5, axes="xyzct", fmt=v03) + _validate_axes(5, axes="xyzct", fmt=v03) # valid axes - no change, converted to list - assert _validate_axes_names(2, axes=["y", "x"], fmt=v03) == ["y", "x"] - assert _validate_axes_names(5, axes="tczyx", fmt=v03) == [ + assert _validate_axes(2, axes=["y", "x"], fmt=v03) == ["y", "x"] + assert _validate_axes(5, axes="tczyx", fmt=v03) == [ "t", "c", "z", @@ -113,12 +115,12 @@ def test_dim_names(self): ] # if 2D or 5D, axes can be assigned automatically - assert _validate_axes_names(2, axes=None, fmt=v03) == ["y", "x"] - assert _validate_axes_names(5, axes=None, fmt=v03) == ["t", "c", "z", "y", "x"] + assert _validate_axes(2, axes=None, fmt=v03) == ["y", "x"] + assert _validate_axes(5, axes=None, fmt=v03) == ["t", "c", "z", "y", "x"] # for v0.1 or v0.2, axes should be None - assert _validate_axes_names(2, axes=["y", "x"], fmt=FormatV01()) is None - assert _validate_axes_names(2, axes=["y", "x"], fmt=FormatV02()) is None + assert _validate_axes(2, axes=["y", "x"], fmt=FormatV01()) is None + assert _validate_axes(2, axes=["y", "x"], fmt=FormatV02()) is None # check that write_image is checking axes data = self.create_data((125, 125)) @@ -130,6 +132,73 @@ def test_dim_names(self): axes="xyz", ) + def test_axes_dicts(self): + + v04 = FormatV04() + + # ALL axes must specify 'name' + with pytest.raises(ValueError): + _validate_axes(2, axes=[{"name": "y"}, {}], fmt=v04) + + all_dims = [ + {"name": "t", "type": "time"}, + {"name": "c", "type": "channel"}, + {"name": "z", "type": "space"}, + {"name": "y", "type": "space"}, + {"name": "x", "type": "space"}, + ] + + # auto axes for 2D, 5D, converted to dict for v0.4 + assert _validate_axes(2, axes=None, fmt=v04) == all_dims[-2:] + assert _validate_axes(5, axes=None, fmt=v04) == all_dims + + # convert from list or string + assert _validate_axes(3, axes=["z", "y", "x"], fmt=v04) == all_dims[-3:] + assert _validate_axes(4, axes="czyx", fmt=v04) == all_dims[-4:] + + # invalid based on ordering of types + with pytest.raises(ValueError): + assert _validate_axes(3, axes=["y", "c", "x"], fmt=v04) + with pytest.raises(ValueError): + assert _validate_axes(4, axes="ctyx", fmt=v04) + + # custom types + assert _validate_axes(3, axes=["foo", "y", "x"], fmt=v04) == [ + {"name": "foo"}, + all_dims[-2], + all_dims[-1], + ] + + # space types can be in ANY order + assert _validate_axes(3, axes=["x", "z", "y"], fmt=v04) == [ + all_dims[-1], + all_dims[-3], + all_dims[-2], + ] + + # Not allowed multiple custom types + with pytest.raises(ValueError): + _validate_axes(4, axes=["foo", "bar", "y", "x"], fmt=v04) + + # unconventional naming is allowed + strange_axes = [ + {"name": "duration", "type": "time"}, + {"name": "rotation", "type": "angle"}, + {"name": "dz", "type": "space"}, + {"name": "WIDTH", "type": "space"}, + ] + assert _validate_axes(4, axes=strange_axes, fmt=v04) == strange_axes + + # check that write_image is checking axes + data = self.create_data((125, 125)) + with pytest.raises(ValueError): + write_image( + image=data, + group=self.group, + fmt=v04, + axes="xt", + ) + class TestMultiscalesMetadata: @pytest.fixture(autouse=True) @@ -177,6 +246,7 @@ def test_version(self, fmt): def test_axes(self, axes): write_multiscales_metadata(self.root, ["0"], axes=axes) assert "multiscales" in self.root.attrs + axes = [{"name": name, "type": KNOWN_AXES[name]} for name in axes] assert self.root.attrs["multiscales"][0]["axes"] == axes @pytest.mark.parametrize("fmt", (FormatV01(), FormatV02())) From 02342b630c33d6adc54634c46e198724778cff1a Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 5 Jan 2022 10:17:28 +0000 Subject: [PATCH 05/16] Fix logging of axes names --- ome_zarr/reader.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 6e4f2ce2..e748e2f4 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -301,9 +301,10 @@ def __init__(self, node: Node) -> None: for c in data.chunks ] LOGGER.info("resolution: %s", resolution) + axes_names = None if axes is not None: - axes = tuple(str(axis) for axis in axes) - LOGGER.info(" - shape %s = %s", axes, data.shape) + axes_names = tuple(axis["name"] for axis in axes) + LOGGER.info(" - shape %s = %s", axes_names, data.shape) LOGGER.info(" - chunks = %s", chunk_sizes) LOGGER.info(" - dtype = %s", data.dtype) node.data.append(data) From 939295c82a4a0cf8f4ebad2e5fb31e662e8f70f7 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 5 Jan 2022 10:23:57 +0000 Subject: [PATCH 06/16] Don't reassign axes - fix mypy --- ome_zarr/writer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 54dbf66a..c970a5ff 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -213,8 +213,7 @@ def write_multiscales_metadata( if fmt.version in ("0.1", "0.2"): LOGGER.info("axes ignored for version 0.1 or 0.2") else: - axes = _validate_axes(axes=axes, fmt=fmt) - multiscales[0]["axes"] = axes + multiscales[0]["axes"] = _validate_axes(axes=axes, fmt=fmt) group.attrs["multiscales"] = multiscales From 789b95b098444c6f75dcfc9e4dc46fae8418f785 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 5 Jan 2022 10:27:24 +0000 Subject: [PATCH 07/16] Check for axes is None - fix mypy --- ome_zarr/writer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index c970a5ff..68e16439 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -213,7 +213,9 @@ def write_multiscales_metadata( if fmt.version in ("0.1", "0.2"): LOGGER.info("axes ignored for version 0.1 or 0.2") else: - multiscales[0]["axes"] = _validate_axes(axes=axes, fmt=fmt) + axes = _validate_axes(axes=axes, fmt=fmt) + if axes is not None: + multiscales[0]["axes"] = axes group.attrs["multiscales"] = multiscales From 4097eceb4ff4f4d18102ea08448147cbb28ff7e9 Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 5 Jan 2022 10:38:06 +0000 Subject: [PATCH 08/16] Fix logging of axes names for v0.4 and v0.3 --- ome_zarr/reader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index e748e2f4..720f6046 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -303,7 +303,9 @@ def __init__(self, node: Node) -> None: LOGGER.info("resolution: %s", resolution) axes_names = None if axes is not None: - axes_names = tuple(axis["name"] for axis in axes) + axes_names = tuple( + axis if isinstance(axis, str) else axis["name"] for axis in axes + ) LOGGER.info(" - shape %s = %s", axes_names, data.shape) LOGGER.info(" - chunks = %s", chunk_sizes) LOGGER.info(" - dtype = %s", data.dtype) From bd16dfde754668c4092274206082e29fdb1090d0 Mon Sep 17 00:00:00 2001 From: William Moore Date: Thu, 6 Jan 2022 17:04:51 +0000 Subject: [PATCH 09/16] Use validate_axes() in reader.py --- ome_zarr/format.py | 9 +++++++++ ome_zarr/reader.py | 9 +++++---- ome_zarr/writer.py | 8 ++++---- tests/test_writer.py | 46 ++++++++++++++++++++++---------------------- 4 files changed, 41 insertions(+), 31 deletions(-) diff --git a/ome_zarr/format.py b/ome_zarr/format.py index 99e8402c..fb22c43b 100644 --- a/ome_zarr/format.py +++ b/ome_zarr/format.py @@ -9,10 +9,19 @@ LOGGER = logging.getLogger("ome_zarr.format") +def format_from_version(version: str) -> "Format": + + for fmt in format_implementations(): + if fmt.version == version: + return fmt + raise ValueError(f"Version {version} not recognized") + + def format_implementations() -> Iterator["Format"]: """ Return an instance of each format implementation, newest to oldest. """ + yield FormatV04() yield FormatV03() yield FormatV02() yield FormatV01() diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 720f6046..2310eb61 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -9,8 +9,10 @@ import numpy as np from dask import delayed +from .format import format_from_version from .io import ZarrLocation from .types import JSONDict +from .writer import validate_axes LOGGER = logging.getLogger("ome_zarr.reader") @@ -275,7 +277,6 @@ def matches(zarr: ZarrLocation) -> bool: def __init__(self, node: Node) -> None: super().__init__(node) - axes_values = {"t", "c", "z", "y", "x"} try: multiscales = self.lookup("multiscales", []) version = multiscales[0].get( @@ -283,9 +284,9 @@ def __init__(self, node: Node) -> None: ) # should this be matched with Format.version? datasets = multiscales[0]["datasets"] axes = multiscales[0].get("axes") - if version == "0.3" and len(set(axes) - axes_values) > 0: - # TODO: validate axis for > V0.3 ? - raise RuntimeError(f"Invalid axes names: {set(axes) - axes_values}") + fmt = format_from_version(version) + # Raises ValueError if not valid + validate_axes(None, axes, fmt) node.metadata["axes"] = axes datasets = [d["path"] for d in datasets] self.datasets: List[str] = datasets diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 68e16439..cd411152 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -69,7 +69,7 @@ def _last_index(item: str, item_list: List[Any]) -> int: raise ValueError("'space' axes must come after 'channel'") -def _validate_axes( +def validate_axes( ndim: int = None, axes: Union[str, List[str], List[Dict[str, str]]] = None, fmt: Format = CurrentFormat(), @@ -170,7 +170,7 @@ def write_multiscale( """ dims = len(pyramid[0].shape) - axes = _validate_axes(dims, axes, fmt) + axes = validate_axes(dims, axes, fmt) paths = [] for path, dataset in enumerate(pyramid): @@ -213,7 +213,7 @@ def write_multiscales_metadata( if fmt.version in ("0.1", "0.2"): LOGGER.info("axes ignored for version 0.1 or 0.2") else: - axes = _validate_axes(axes=axes, fmt=fmt) + axes = validate_axes(axes=axes, fmt=fmt) if axes is not None: multiscales[0]["axes"] = axes group.attrs["multiscales"] = multiscales @@ -267,7 +267,7 @@ def write_image( axes = None # check axes before trying to scale - _validate_axes(image.ndim, axes, fmt) + validate_axes(image.ndim, axes, fmt) if chunks is not None: chunks = _retuple(chunks, image.shape) diff --git a/tests/test_writer.py b/tests/test_writer.py index 1e3a27ac..ccec6fe8 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -10,7 +10,7 @@ from ome_zarr.scale import Scaler from ome_zarr.writer import ( KNOWN_AXES, - _validate_axes, + validate_axes, write_image, write_multiscales_metadata, ) @@ -90,23 +90,23 @@ def test_dim_names(self): # v0.3 MUST specify axes for 3D or 4D data with pytest.raises(ValueError): - _validate_axes(3, axes=None, fmt=v03) + validate_axes(3, axes=None, fmt=v03) # ndims must match axes length with pytest.raises(ValueError): - _validate_axes(3, axes="yx", fmt=v03) + validate_axes(3, axes="yx", fmt=v03) # axes must be ordered tczyx with pytest.raises(ValueError): - _validate_axes(3, axes="yxt", fmt=v03) + validate_axes(3, axes="yxt", fmt=v03) with pytest.raises(ValueError): - _validate_axes(2, axes=["x", "y"], fmt=v03) + validate_axes(2, axes=["x", "y"], fmt=v03) with pytest.raises(ValueError): - _validate_axes(5, axes="xyzct", fmt=v03) + validate_axes(5, axes="xyzct", fmt=v03) # valid axes - no change, converted to list - assert _validate_axes(2, axes=["y", "x"], fmt=v03) == ["y", "x"] - assert _validate_axes(5, axes="tczyx", fmt=v03) == [ + assert validate_axes(2, axes=["y", "x"], fmt=v03) == ["y", "x"] + assert validate_axes(5, axes="tczyx", fmt=v03) == [ "t", "c", "z", @@ -115,12 +115,12 @@ def test_dim_names(self): ] # if 2D or 5D, axes can be assigned automatically - assert _validate_axes(2, axes=None, fmt=v03) == ["y", "x"] - assert _validate_axes(5, axes=None, fmt=v03) == ["t", "c", "z", "y", "x"] + assert validate_axes(2, axes=None, fmt=v03) == ["y", "x"] + assert validate_axes(5, axes=None, fmt=v03) == ["t", "c", "z", "y", "x"] # for v0.1 or v0.2, axes should be None - assert _validate_axes(2, axes=["y", "x"], fmt=FormatV01()) is None - assert _validate_axes(2, axes=["y", "x"], fmt=FormatV02()) is None + assert validate_axes(2, axes=["y", "x"], fmt=FormatV01()) is None + assert validate_axes(2, axes=["y", "x"], fmt=FormatV02()) is None # check that write_image is checking axes data = self.create_data((125, 125)) @@ -138,7 +138,7 @@ def test_axes_dicts(self): # ALL axes must specify 'name' with pytest.raises(ValueError): - _validate_axes(2, axes=[{"name": "y"}, {}], fmt=v04) + validate_axes(2, axes=[{"name": "y"}, {}], fmt=v04) all_dims = [ {"name": "t", "type": "time"}, @@ -149,28 +149,28 @@ def test_axes_dicts(self): ] # auto axes for 2D, 5D, converted to dict for v0.4 - assert _validate_axes(2, axes=None, fmt=v04) == all_dims[-2:] - assert _validate_axes(5, axes=None, fmt=v04) == all_dims + assert validate_axes(2, axes=None, fmt=v04) == all_dims[-2:] + assert validate_axes(5, axes=None, fmt=v04) == all_dims # convert from list or string - assert _validate_axes(3, axes=["z", "y", "x"], fmt=v04) == all_dims[-3:] - assert _validate_axes(4, axes="czyx", fmt=v04) == all_dims[-4:] + assert validate_axes(3, axes=["z", "y", "x"], fmt=v04) == all_dims[-3:] + assert validate_axes(4, axes="czyx", fmt=v04) == all_dims[-4:] # invalid based on ordering of types with pytest.raises(ValueError): - assert _validate_axes(3, axes=["y", "c", "x"], fmt=v04) + assert validate_axes(3, axes=["y", "c", "x"], fmt=v04) with pytest.raises(ValueError): - assert _validate_axes(4, axes="ctyx", fmt=v04) + assert validate_axes(4, axes="ctyx", fmt=v04) # custom types - assert _validate_axes(3, axes=["foo", "y", "x"], fmt=v04) == [ + assert validate_axes(3, axes=["foo", "y", "x"], fmt=v04) == [ {"name": "foo"}, all_dims[-2], all_dims[-1], ] # space types can be in ANY order - assert _validate_axes(3, axes=["x", "z", "y"], fmt=v04) == [ + assert validate_axes(3, axes=["x", "z", "y"], fmt=v04) == [ all_dims[-1], all_dims[-3], all_dims[-2], @@ -178,7 +178,7 @@ def test_axes_dicts(self): # Not allowed multiple custom types with pytest.raises(ValueError): - _validate_axes(4, axes=["foo", "bar", "y", "x"], fmt=v04) + validate_axes(4, axes=["foo", "bar", "y", "x"], fmt=v04) # unconventional naming is allowed strange_axes = [ @@ -187,7 +187,7 @@ def test_axes_dicts(self): {"name": "dz", "type": "space"}, {"name": "WIDTH", "type": "space"}, ] - assert _validate_axes(4, axes=strange_axes, fmt=v04) == strange_axes + assert validate_axes(4, axes=strange_axes, fmt=v04) == strange_axes # check that write_image is checking axes data = self.create_data((125, 125)) From 9f75ae350efd6ad2dfcb56997aa1f3b7861c066c Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 7 Jan 2022 11:31:22 +0000 Subject: [PATCH 10/16] Move axes logic to new Axes class --- ome_zarr/axes.py | 100 +++++++++++++++++++++++++++++++++++++++++++ ome_zarr/writer.py | 94 ++-------------------------------------- tests/test_writer.py | 8 +--- 3 files changed, 106 insertions(+), 96 deletions(-) create mode 100644 ome_zarr/axes.py diff --git a/ome_zarr/axes.py b/ome_zarr/axes.py new file mode 100644 index 00000000..ed767358 --- /dev/null +++ b/ome_zarr/axes.py @@ -0,0 +1,100 @@ +"""Axes class for validating and transforming axes +""" +from typing import Any, Dict, List, Union + +from .format import Format + +KNOWN_AXES = {"x": "space", "y": "space", "z": "space", "c": "channel", "t": "time"} + + +class Axes: + def __init__(self, axes: Union[List[str], List[Dict[str, str]]]) -> None: + self.axes = self._axes_to_dicts(axes) + + def validate(self, fmt: Format) -> None: + + # check names (only enforced for version 0.3) + if fmt.version == "0.3": + self._validate_axes_03() + return + + self._validate_axes_types() + + def get_axes(self, fmt: Format) -> Union[List[str], List[Dict[str, str]]]: + if fmt.version == "0.3": + return self._get_names() + return self.axes + + @staticmethod + def _axes_to_dicts( + axes: Union[List[str], List[Dict[str, str]]] + ) -> List[Dict[str, str]]: + """Returns a list of axis dicts with name and type""" + axes_dicts = [] + for axis in axes: + if isinstance(axis, str): + axis_dict = {"name": axis} + if axis in KNOWN_AXES: + axis_dict["type"] = KNOWN_AXES[axis] + axes_dicts.append(axis_dict) + else: + axes_dicts.append(axis) + return axes_dicts + + def _validate_axes_types(self) -> None: + """ + Validate the axes types according to the spec, version 0.4+ + """ + axes_types = [axis.get("type") for axis in self.axes] + known_types = list(KNOWN_AXES.values()) + unknown_types = [atype for atype in axes_types if atype not in known_types] + if len(unknown_types) > 1: + raise ValueError( + "Too many unknown axes types. 1 allowed, found: %s" % unknown_types + ) + + def _last_index(item: str, item_list: List[Any]) -> int: + return max(loc for loc, val in enumerate(item_list) if val == item) + + if "time" in axes_types and _last_index("time", axes_types) > 0: + raise ValueError("'time' axis must be first dimension only") + + if axes_types.count("channel") > 1: + raise ValueError("Only 1 axis can be type 'channel'") + + if "channel" in axes_types and _last_index( + "channel", axes_types + ) > axes_types.index("space"): + raise ValueError("'space' axes must come after 'channel'") + + def _get_names(self) -> List[str]: + """Returns a list of axis names""" + axes_names = [] + for axis in self.axes: + if "name" not in axis: + raise ValueError("Axis Dict %s has no 'name'" % axis) + axes_names.append(axis["name"]) + return axes_names + + def _validate_axes_03(self) -> None: + + val_axes = tuple(self._get_names()) + if len(val_axes) == 2: + if val_axes != ("y", "x"): + raise ValueError(f"2D data must have axes ('y', 'x') {val_axes}") + elif len(val_axes) == 3: + if val_axes not in [("z", "y", "x"), ("c", "y", "x"), ("t", "y", "x")]: + raise ValueError( + "3D data must have axes ('z', 'y', 'x') or ('c', 'y', 'x')" + " or ('t', 'y', 'x'), not %s" % (val_axes,) + ) + elif len(val_axes) == 4: + if val_axes not in [ + ("t", "z", "y", "x"), + ("c", "z", "y", "x"), + ("t", "c", "y", "x"), + ]: + raise ValueError("4D data must have axes tzyx or czyx or tcyx") + else: + if val_axes != ("t", "c", "z", "y", "x"): + raise ValueError("5D data must have axes ('t', 'c', 'z', 'y', 'x')") diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index cd411152..554dcc7e 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -7,67 +7,13 @@ import numpy as np import zarr +from .axes import Axes from .format import CurrentFormat, Format from .scale import Scaler from .types import JSONDict LOGGER = logging.getLogger("ome_zarr.writer") -KNOWN_AXES = {"x": "space", "y": "space", "z": "space", "c": "channel", "t": "time"} - - -def _axes_to_dicts( - axes: Union[List[str], List[Dict[str, str]]] -) -> List[Dict[str, str]]: - """Returns a list of axis dicts with name and type""" - axes_dicts = [] - for axis in axes: - if isinstance(axis, str): - axis_dict = {"name": axis} - if axis in KNOWN_AXES: - axis_dict["type"] = KNOWN_AXES[axis] - axes_dicts.append(axis_dict) - else: - axes_dicts.append(axis) - return axes_dicts - - -def _axes_to_names(axes: List[Dict[str, str]]) -> List[str]: - """Returns a list of axis names""" - axes_names = [] - for axis in axes: - if "name" not in axis: - raise ValueError("Axis Dict %s has no 'name'" % axis) - axes_names.append(axis["name"]) - return axes_names - - -def _validate_axes_types(axes_dicts: List[Dict[str, str]]) -> None: - """ - Validate the axes types according to the spec, version 0.4+ - """ - axes_types = [axis.get("type") for axis in axes_dicts] - known_types = list(KNOWN_AXES.values()) - unknown_types = [atype for atype in axes_types if atype not in known_types] - if len(unknown_types) > 1: - raise ValueError( - "Too many unknown axes types. 1 allowed, found: %s" % unknown_types - ) - - def _last_index(item: str, item_list: List[Any]) -> int: - return max(loc for loc, val in enumerate(item_list) if val == item) - - if "time" in axes_types and _last_index("time", axes_types) > 0: - raise ValueError("'time' axis must be first dimension only") - - if axes_types.count("channel") > 1: - raise ValueError("Only 1 axis can be type 'channel'") - - if "channel" in axes_types and _last_index( - "channel", axes_types - ) > axes_types.index("space"): - raise ValueError("'space' axes must come after 'channel'") - def validate_axes( ndim: int = None, @@ -103,42 +49,10 @@ def validate_axes( f"axes length ({len(axes)}) must match number of dimensions ({ndim})" ) - # axes may be list of 'x', 'y' or list of {'name': 'x'} - axes_dicts = _axes_to_dicts(axes) - axes_names = _axes_to_names(axes_dicts) - - # check names (only enforced for version 0.3) - if fmt.version == "0.3": - _validate_axes_03(axes_names) - return axes_names - - _validate_axes_types(axes_dicts) + axes_obj = Axes(axes) + axes_obj.validate(fmt) - return axes_dicts - - -def _validate_axes_03(axes: List[str]) -> None: - - val_axes = tuple(axes) - if len(val_axes) == 2: - if val_axes != ("y", "x"): - raise ValueError(f"2D data must have axes ('y', 'x') {val_axes}") - elif len(val_axes) == 3: - if val_axes not in [("z", "y", "x"), ("c", "y", "x"), ("t", "y", "x")]: - raise ValueError( - "3D data must have axes ('z', 'y', 'x') or ('c', 'y', 'x')" - " or ('t', 'y', 'x'), not %s" % (val_axes,) - ) - elif len(val_axes) == 4: - if val_axes not in [ - ("t", "z", "y", "x"), - ("c", "z", "y", "x"), - ("t", "c", "y", "x"), - ]: - raise ValueError("4D data must have axes tzyx or czyx or tcyx") - else: - if val_axes != ("t", "c", "z", "y", "x"): - raise ValueError("5D data must have axes ('t', 'c', 'z', 'y', 'x')") + return axes_obj.get_axes(fmt) def write_multiscale( diff --git a/tests/test_writer.py b/tests/test_writer.py index ccec6fe8..2de15e7b 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -4,16 +4,12 @@ import pytest import zarr +from ome_zarr.axes import KNOWN_AXES from ome_zarr.format import FormatV01, FormatV02, FormatV03, FormatV04 from ome_zarr.io import parse_url from ome_zarr.reader import Multiscales, Reader from ome_zarr.scale import Scaler -from ome_zarr.writer import ( - KNOWN_AXES, - validate_axes, - write_image, - write_multiscales_metadata, -) +from ome_zarr.writer import validate_axes, write_image, write_multiscales_metadata class TestWriter: From 18608d91f2e82238e649ac47b3cd9e68150cbb4e Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 12 Jan 2022 10:05:09 +0000 Subject: [PATCH 11/16] Fix various points from Seb --- ome_zarr/axes.py | 26 ++++++++++++++++++-------- ome_zarr/reader.py | 4 ++-- ome_zarr/writer.py | 4 ++-- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/ome_zarr/axes.py b/ome_zarr/axes.py index ed767358..02288682 100644 --- a/ome_zarr/axes.py +++ b/ome_zarr/axes.py @@ -2,20 +2,30 @@ """ from typing import Any, Dict, List, Union -from .format import Format +from .format import CurrentFormat, Format KNOWN_AXES = {"x": "space", "y": "space", "z": "space", "c": "channel", "t": "time"} class Axes: - def __init__(self, axes: Union[List[str], List[Dict[str, str]]]) -> None: - self.axes = self._axes_to_dicts(axes) - - def validate(self, fmt: Format) -> None: + def __init__( + self, + axes: Union[List[str], List[Dict[str, str]]], + fmt: Format = CurrentFormat(), + ) -> None: + + if axes is not None: + self.axes = self._axes_to_dicts(axes) + self.fmt = fmt + + def validate(self) -> None: + """Raises ValueError if not valid""" + if self.fmt.version in ("0.1", "0.2"): + return # check names (only enforced for version 0.3) - if fmt.version == "0.3": - self._validate_axes_03() + if self.fmt.version == "0.3": + self._validate_03() return self._validate_axes_types() @@ -76,7 +86,7 @@ def _get_names(self) -> List[str]: axes_names.append(axis["name"]) return axes_names - def _validate_axes_03(self) -> None: + def _validate_03(self) -> None: val_axes = tuple(self._get_names()) if len(val_axes) == 2: diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 2310eb61..647c5922 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -9,10 +9,10 @@ import numpy as np from dask import delayed +from .axes import Axes from .format import format_from_version from .io import ZarrLocation from .types import JSONDict -from .writer import validate_axes LOGGER = logging.getLogger("ome_zarr.reader") @@ -286,7 +286,7 @@ def __init__(self, node: Node) -> None: axes = multiscales[0].get("axes") fmt = format_from_version(version) # Raises ValueError if not valid - validate_axes(None, axes, fmt) + Axes(axes, fmt).validate() node.metadata["axes"] = axes datasets = [d["path"] for d in datasets] self.datasets: List[str] = datasets diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 554dcc7e..a63b678c 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -49,8 +49,8 @@ def validate_axes( f"axes length ({len(axes)}) must match number of dimensions ({ndim})" ) - axes_obj = Axes(axes) - axes_obj.validate(fmt) + axes_obj = Axes(axes, fmt) + axes_obj.validate() return axes_obj.get_axes(fmt) From 7147bbd8ed2ab634d4b0d80254e703d3888c366a Mon Sep 17 00:00:00 2001 From: William Moore Date: Wed, 12 Jan 2022 10:28:35 +0000 Subject: [PATCH 12/16] Rename get_axes() -> to_list() --- ome_zarr/axes.py | 2 +- ome_zarr/writer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ome_zarr/axes.py b/ome_zarr/axes.py index 02288682..787f490e 100644 --- a/ome_zarr/axes.py +++ b/ome_zarr/axes.py @@ -30,7 +30,7 @@ def validate(self) -> None: self._validate_axes_types() - def get_axes(self, fmt: Format) -> Union[List[str], List[Dict[str, str]]]: + def to_list(self, fmt: Format) -> Union[List[str], List[Dict[str, str]]]: if fmt.version == "0.3": return self._get_names() return self.axes diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 9e62e689..0b9e5f36 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -52,7 +52,7 @@ def validate_axes( axes_obj = Axes(axes, fmt) axes_obj.validate() - return axes_obj.get_axes(fmt) + return axes_obj.to_list(fmt) def _validate_well_images(images: List, fmt: Format = CurrentFormat()) -> None: From 8e37f31f90dea6cd165a6e93542c3df774018d4f Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 14 Jan 2022 09:58:09 +0000 Subject: [PATCH 13/16] Rename writer.validate_axes() to _get_valid_axes() --- ome_zarr/writer.py | 8 ++++---- tests/test_writer.py | 46 ++++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 0b9e5f36..78abbcbb 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -15,7 +15,7 @@ LOGGER = logging.getLogger("ome_zarr.writer") -def validate_axes( +def _get_valid_axes( ndim: int = None, axes: Union[str, List[str], List[Dict[str, str]]] = None, fmt: Format = CurrentFormat(), @@ -131,7 +131,7 @@ def write_multiscale( """ dims = len(pyramid[0].shape) - axes = validate_axes(dims, axes, fmt) + axes = _get_valid_axes(dims, axes, fmt) paths = [] for path, dataset in enumerate(pyramid): @@ -174,7 +174,7 @@ def write_multiscales_metadata( if fmt.version in ("0.1", "0.2"): LOGGER.info("axes ignored for version 0.1 or 0.2") else: - axes = validate_axes(axes=axes, fmt=fmt) + axes = _get_valid_axes(axes=axes, fmt=fmt) if axes is not None: multiscales[0]["axes"] = axes group.attrs["multiscales"] = multiscales @@ -307,7 +307,7 @@ def write_image( axes = None # check axes before trying to scale - validate_axes(image.ndim, axes, fmt) + _get_valid_axes(image.ndim, axes, fmt) if chunks is not None: chunks = _retuple(chunks, image.shape) diff --git a/tests/test_writer.py b/tests/test_writer.py index 6c41d2cc..b9028968 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -10,7 +10,7 @@ from ome_zarr.reader import Multiscales, Reader from ome_zarr.scale import Scaler from ome_zarr.writer import ( - validate_axes, + _get_valid_axes, write_image, write_multiscales_metadata, write_plate_metadata, @@ -92,23 +92,23 @@ def test_dim_names(self): # v0.3 MUST specify axes for 3D or 4D data with pytest.raises(ValueError): - validate_axes(3, axes=None, fmt=v03) + _get_valid_axes(3, axes=None, fmt=v03) # ndims must match axes length with pytest.raises(ValueError): - validate_axes(3, axes="yx", fmt=v03) + _get_valid_axes(3, axes="yx", fmt=v03) # axes must be ordered tczyx with pytest.raises(ValueError): - validate_axes(3, axes="yxt", fmt=v03) + _get_valid_axes(3, axes="yxt", fmt=v03) with pytest.raises(ValueError): - validate_axes(2, axes=["x", "y"], fmt=v03) + _get_valid_axes(2, axes=["x", "y"], fmt=v03) with pytest.raises(ValueError): - validate_axes(5, axes="xyzct", fmt=v03) + _get_valid_axes(5, axes="xyzct", fmt=v03) # valid axes - no change, converted to list - assert validate_axes(2, axes=["y", "x"], fmt=v03) == ["y", "x"] - assert validate_axes(5, axes="tczyx", fmt=v03) == [ + assert _get_valid_axes(2, axes=["y", "x"], fmt=v03) == ["y", "x"] + assert _get_valid_axes(5, axes="tczyx", fmt=v03) == [ "t", "c", "z", @@ -117,12 +117,12 @@ def test_dim_names(self): ] # if 2D or 5D, axes can be assigned automatically - assert validate_axes(2, axes=None, fmt=v03) == ["y", "x"] - assert validate_axes(5, axes=None, fmt=v03) == ["t", "c", "z", "y", "x"] + assert _get_valid_axes(2, axes=None, fmt=v03) == ["y", "x"] + assert _get_valid_axes(5, axes=None, fmt=v03) == ["t", "c", "z", "y", "x"] # for v0.1 or v0.2, axes should be None - assert validate_axes(2, axes=["y", "x"], fmt=FormatV01()) is None - assert validate_axes(2, axes=["y", "x"], fmt=FormatV02()) is None + assert _get_valid_axes(2, axes=["y", "x"], fmt=FormatV01()) is None + assert _get_valid_axes(2, axes=["y", "x"], fmt=FormatV02()) is None # check that write_image is checking axes data = self.create_data((125, 125)) @@ -140,7 +140,7 @@ def test_axes_dicts(self): # ALL axes must specify 'name' with pytest.raises(ValueError): - validate_axes(2, axes=[{"name": "y"}, {}], fmt=v04) + _get_valid_axes(2, axes=[{"name": "y"}, {}], fmt=v04) all_dims = [ {"name": "t", "type": "time"}, @@ -151,28 +151,28 @@ def test_axes_dicts(self): ] # auto axes for 2D, 5D, converted to dict for v0.4 - assert validate_axes(2, axes=None, fmt=v04) == all_dims[-2:] - assert validate_axes(5, axes=None, fmt=v04) == all_dims + assert _get_valid_axes(2, axes=None, fmt=v04) == all_dims[-2:] + assert _get_valid_axes(5, axes=None, fmt=v04) == all_dims # convert from list or string - assert validate_axes(3, axes=["z", "y", "x"], fmt=v04) == all_dims[-3:] - assert validate_axes(4, axes="czyx", fmt=v04) == all_dims[-4:] + assert _get_valid_axes(3, axes=["z", "y", "x"], fmt=v04) == all_dims[-3:] + assert _get_valid_axes(4, axes="czyx", fmt=v04) == all_dims[-4:] # invalid based on ordering of types with pytest.raises(ValueError): - assert validate_axes(3, axes=["y", "c", "x"], fmt=v04) + assert _get_valid_axes(3, axes=["y", "c", "x"], fmt=v04) with pytest.raises(ValueError): - assert validate_axes(4, axes="ctyx", fmt=v04) + assert _get_valid_axes(4, axes="ctyx", fmt=v04) # custom types - assert validate_axes(3, axes=["foo", "y", "x"], fmt=v04) == [ + assert _get_valid_axes(3, axes=["foo", "y", "x"], fmt=v04) == [ {"name": "foo"}, all_dims[-2], all_dims[-1], ] # space types can be in ANY order - assert validate_axes(3, axes=["x", "z", "y"], fmt=v04) == [ + assert _get_valid_axes(3, axes=["x", "z", "y"], fmt=v04) == [ all_dims[-1], all_dims[-3], all_dims[-2], @@ -180,7 +180,7 @@ def test_axes_dicts(self): # Not allowed multiple custom types with pytest.raises(ValueError): - validate_axes(4, axes=["foo", "bar", "y", "x"], fmt=v04) + _get_valid_axes(4, axes=["foo", "bar", "y", "x"], fmt=v04) # unconventional naming is allowed strange_axes = [ @@ -189,7 +189,7 @@ def test_axes_dicts(self): {"name": "dz", "type": "space"}, {"name": "WIDTH", "type": "space"}, ] - assert validate_axes(4, axes=strange_axes, fmt=v04) == strange_axes + assert _get_valid_axes(4, axes=strange_axes, fmt=v04) == strange_axes # check that write_image is checking axes data = self.create_data((125, 125)) From 3037b33ab7012d8fa2ed504c40f837cdcf23f3d9 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 14 Jan 2022 10:02:01 +0000 Subject: [PATCH 14/16] Axes() constructor also calls self.validate() --- ome_zarr/axes.py | 5 +++++ ome_zarr/reader.py | 2 +- ome_zarr/writer.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/ome_zarr/axes.py b/ome_zarr/axes.py index 787f490e..cfca96a2 100644 --- a/ome_zarr/axes.py +++ b/ome_zarr/axes.py @@ -13,10 +13,15 @@ def __init__( axes: Union[List[str], List[Dict[str, str]]], fmt: Format = CurrentFormat(), ) -> None: + """ + Constructor, transforms axes and validates + Raises ValueError if not valid + """ if axes is not None: self.axes = self._axes_to_dicts(axes) self.fmt = fmt + self.validate() def validate(self) -> None: """Raises ValueError if not valid""" diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 647c5922..711b4870 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -286,7 +286,7 @@ def __init__(self, node: Node) -> None: axes = multiscales[0].get("axes") fmt = format_from_version(version) # Raises ValueError if not valid - Axes(axes, fmt).validate() + Axes(axes, fmt) node.metadata["axes"] = axes datasets = [d["path"] for d in datasets] self.datasets: List[str] = datasets diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index 78abbcbb..de2d9b1d 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -49,8 +49,8 @@ def _get_valid_axes( f"axes length ({len(axes)}) must match number of dimensions ({ndim})" ) + # valiates on init axes_obj = Axes(axes, fmt) - axes_obj.validate() return axes_obj.to_list(fmt) From 5607786c1cc647517ab50586cfdd39b7ce0a3892 Mon Sep 17 00:00:00 2001 From: William Moore Date: Fri, 14 Jan 2022 11:18:37 +0000 Subject: [PATCH 15/16] Handle transformations in reader and writer. Add test --- ome_zarr/reader.py | 7 +++++-- ome_zarr/writer.py | 27 +++++++++++++++++++++++---- tests/test_writer.py | 11 +++++++++++ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index 711b4870..a92b9dda 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -288,8 +288,11 @@ def __init__(self, node: Node) -> None: # Raises ValueError if not valid Axes(axes, fmt) node.metadata["axes"] = axes - datasets = [d["path"] for d in datasets] - self.datasets: List[str] = datasets + paths = [d["path"] for d in datasets] + self.datasets: List[str] = paths + transformations = [d.get("transformations") for d in datasets] + if any(trans is not None for trans in transformations): + node.metadata["transformations"] = transformations LOGGER.info("datasets %s", datasets) except Exception as e: LOGGER.error(f"failed to parse multiscale metadata: {e}") diff --git a/ome_zarr/writer.py b/ome_zarr/writer.py index de2d9b1d..9317f0d9 100644 --- a/ome_zarr/writer.py +++ b/ome_zarr/writer.py @@ -108,6 +108,7 @@ def write_multiscale( chunks: Union[Tuple[Any, ...], int] = None, fmt: Format = CurrentFormat(), axes: Union[str, List[str], List[Dict[str, str]]] = None, + transformations: List[List[Dict[str, Any]]] = None, ) -> None: """ Write a pyramid with multiscale metadata to disk. @@ -128,6 +129,9 @@ def write_multiscale( axes: str or list of str or list of dict List of axes dicts, or names. Not needed for v0.1 or v0.2 or if 2D. Otherwise this must be provided + transformations: 2Dlist of dict + For each path, we have a List of transformation Dicts (not validated). + Each list of dicts are added to each datasets in order. """ dims = len(pyramid[0].shape) @@ -138,7 +142,7 @@ def write_multiscale( # TODO: chunks here could be different per layer group.create_dataset(str(path), data=dataset, chunks=chunks) paths.append(str(path)) - write_multiscales_metadata(group, paths, fmt, axes) + write_multiscales_metadata(group, paths, fmt, axes, transformations) def write_multiscales_metadata( @@ -146,6 +150,7 @@ def write_multiscales_metadata( paths: List[str], fmt: Format = CurrentFormat(), axes: Union[str, List[str], List[Dict[str, str]]] = None, + transformations: List[List[Dict[str, Any]]] = None, ) -> None: """ Write the multiscales metadata in the group. @@ -159,15 +164,23 @@ def write_multiscales_metadata( fmt: Format The format of the ome_zarr data which should be used. Defaults to the most current. - axes: list of str + axes: list of str or list of dicts the names of the axes. e.g. ["t", "c", "z", "y", "x"]. Ignored for versions 0.1 and 0.2. Required for version 0.3 or greater. + transformations: 2Dlist of dict + For each path, we have a List of transformation Dicts (not validated). + Each list of dicts are added to each datasets in order. """ + datasets: List[Dict[str, Any]] = [{"path": path} for path in paths] + if transformations is not None: + for dataset, transform in zip(datasets, transformations): + dataset["transformations"] = transform + multiscales = [ { "version": fmt.version, - "datasets": [{"path": str(p)} for p in paths], + "datasets": datasets, } ] if axes is not None: @@ -267,6 +280,7 @@ def write_image( scaler: Scaler = Scaler(), fmt: Format = CurrentFormat(), axes: Union[str, List[str], List[Dict[str, str]]] = None, + transformations: List[List[Dict[str, Any]]] = None, **metadata: JSONDict, ) -> None: """Writes an image to the zarr store according to ome-zarr specification @@ -294,6 +308,9 @@ def write_image( axes: str or list of str or list of dict List of axes dicts, or names. Not needed for v0.1 or v0.2 or if 2D. Otherwise this must be provided + transformations: 2Dlist of dict + For each resolution, we have a List of transformation Dicts (not validated). + Each list of dicts are added to each datasets in order. """ if image.ndim > 5: @@ -323,7 +340,9 @@ def write_image( LOGGER.debug("disabling pyramid") image = [image] - write_multiscale(image, group, chunks=chunks, fmt=fmt, axes=axes) + write_multiscale( + image, group, chunks=chunks, fmt=fmt, axes=axes, transformations=transformations + ) group.attrs.update(metadata) diff --git a/tests/test_writer.py b/tests/test_writer.py index b9028968..8a215395 100644 --- a/tests/test_writer.py +++ b/tests/test_writer.py @@ -17,6 +17,12 @@ write_well_metadata, ) +TRANSFORMATIONS = [ + {"axisIndices": [1, 2, 3], "scale": [0.50, 0.36, 0.36], "type": "scale"}, + {"axisIndices": [1, 2, 3], "scale": [0.50, 0.72, 0.72], "type": "scale"}, + {"axisIndices": [1, 2, 3], "scale": [0.50, 1.44, 1.44], "type": "scale"}, +] + class TestWriter: @pytest.fixture(autouse=True) @@ -73,6 +79,7 @@ def test_writer(self, shape, scaler, format_version): scaler=scaler, fmt=version, axes=axes, + transformations=TRANSFORMATIONS, ) # Verify @@ -84,6 +91,10 @@ def test_writer(self, shape, scaler, format_version): assert node.data[0].ndim == 5 else: assert node.data[0].shape == shape + print("node.metadata", node.metadata) + for transf, expected in zip(node.metadata["transformations"], TRANSFORMATIONS): + assert transf == expected + assert len(node.metadata["transformations"]) == len(node.data) assert np.allclose(data, node.data[0][...].compute()) def test_dim_names(self): From d9a44f75b5ab5e5c6d25ce8c28b85acdfe47cd7c Mon Sep 17 00:00:00 2001 From: William Moore Date: Mon, 17 Jan 2022 16:01:19 +0000 Subject: [PATCH 16/16] reader node.metadata['axes'] in latest format --- ome_zarr/axes.py | 7 ++++++- ome_zarr/reader.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/ome_zarr/axes.py b/ome_zarr/axes.py index cfca96a2..e761bcbd 100644 --- a/ome_zarr/axes.py +++ b/ome_zarr/axes.py @@ -20,6 +20,9 @@ def __init__( """ if axes is not None: self.axes = self._axes_to_dicts(axes) + elif fmt.version in ("0.1", "0.2"): + # strictly 5D + self.axes = self._axes_to_dicts(["t", "c", "z", "y", "x"]) self.fmt = fmt self.validate() @@ -35,7 +38,9 @@ def validate(self) -> None: self._validate_axes_types() - def to_list(self, fmt: Format) -> Union[List[str], List[Dict[str, str]]]: + def to_list( + self, fmt: Format = CurrentFormat() + ) -> Union[List[str], List[Dict[str, str]]]: if fmt.version == "0.3": return self._get_names() return self.axes diff --git a/ome_zarr/reader.py b/ome_zarr/reader.py index a92b9dda..79886178 100644 --- a/ome_zarr/reader.py +++ b/ome_zarr/reader.py @@ -286,8 +286,8 @@ def __init__(self, node: Node) -> None: axes = multiscales[0].get("axes") fmt = format_from_version(version) # Raises ValueError if not valid - Axes(axes, fmt) - node.metadata["axes"] = axes + axes_obj = Axes(axes, fmt) + node.metadata["axes"] = axes_obj.to_list() paths = [d["path"] for d in datasets] self.datasets: List[str] = paths transformations = [d.get("transformations") for d in datasets]