Skip to content

Commit

Permalink
Merge pull request #1733 from braingram/feature/lazy_tree
Browse files Browse the repository at this point in the history
Introduce `lazy_tree` (super dictionaries)
  • Loading branch information
braingram authored Jul 12, 2024
2 parents c2cce40 + 5843ada commit 9faf968
Show file tree
Hide file tree
Showing 23 changed files with 889 additions and 49 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@

- Deprecate ``ignore_implicit_conversion`` and "implicit conversion" [#1724]

- Add ``lazy_tree`` option to ``asdf.open`` and ``asdf.config``
to allow lazy deserialization of ASDF tagged tree nodes to
custom objects. [#1733]


3.2.0 (2024-04-05)
------------------
Expand Down
33 changes: 30 additions & 3 deletions asdf/_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
import pathlib
import time
import warnings
import weakref

from packaging.version import Version

from . import _compression as mcompression
from . import _display as display
from . import _node_info as node_info
from . import _version as version
from . import constants, generic_io, reference, schema, treeutil, util, versioning, yamlutil
from . import constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil
from ._block.manager import Manager as BlockManager
from ._helpers import validate_version
from .config import config_context, get_config
Expand Down Expand Up @@ -173,6 +174,10 @@ def __init__(
# custom_tree_to_tagged_tree or tagged_tree_to_custom_tree).
self._tree_modification_context = treeutil._TreeModificationContext()

# A cache of tagged objects and their converted custom objects used when
# a file is read with "lazy_tree=True". Used by lazy_nodes.
self._tagged_object_cache = lazy_nodes._TaggedObjectCache()

self._fd = None
self._closed = False
self._external_asdf_by_uri = {}
Expand Down Expand Up @@ -531,6 +536,7 @@ def close(self):
# as we're closing the file, also empty out the
# tree so that references to array data can be released
self._tree = AsdfObject()
self._tagged_object_cache.clear()
for external in self._external_asdf_by_uri.values():
external.close()
self._external_asdf_by_uri.clear()
Expand Down Expand Up @@ -878,6 +884,7 @@ def _open_asdf(
fd,
validate_checksums=False,
extensions=None,
lazy_tree=NotSet,
_get_yaml_content=False,
_force_raw_types=False,
strict_extension_check=False,
Expand All @@ -889,7 +896,7 @@ def _open_asdf(
msg = "'strict_extension_check' and 'ignore_missing_extensions' are incompatible options"
raise ValueError(msg)

with config_context():
with config_context() as cfg:
# validate_checksums (unlike memmap and lazy_load) is provided
# here instead of in __init__
self._blocks._validate_checksums = validate_checksums
Expand Down Expand Up @@ -970,7 +977,14 @@ def _open_asdf(
self.close()
raise

tree = yamlutil.tagged_tree_to_custom_tree(tree, self, _force_raw_types)
if lazy_tree is NotSet:
lazy_tree = cfg.lazy_tree
if lazy_tree and not _force_raw_types:
obj = AsdfObject()
obj.data = lazy_nodes.AsdfDictNode(tree, weakref.ref(self))
tree = obj
else:
tree = yamlutil.tagged_tree_to_custom_tree(tree, self, _force_raw_types)

if not (ignore_missing_extensions or _force_raw_types):
self._check_extensions(tree, strict=strict_extension_check)
Expand All @@ -988,6 +1002,7 @@ def _open_impl(
mode="r",
validate_checksums=False,
extensions=None,
lazy_tree=NotSet,
_get_yaml_content=False,
_force_raw_types=False,
strict_extension_check=False,
Expand All @@ -1002,6 +1017,7 @@ def _open_impl(
generic_file,
validate_checksums=validate_checksums,
extensions=extensions,
lazy_tree=lazy_tree,
_get_yaml_content=_get_yaml_content,
_force_raw_types=_force_raw_types,
strict_extension_check=strict_extension_check,
Expand Down Expand Up @@ -1604,6 +1620,7 @@ def open_asdf(
_force_raw_types=False,
copy_arrays=False,
memmap=NotSet,
lazy_tree=NotSet,
lazy_load=True,
custom_schema=None,
strict_extension_check=False,
Expand Down Expand Up @@ -1665,6 +1682,15 @@ def open_asdf(
Note: even if ``lazy_load`` is `False`, ``memmap`` is still taken
into account.
lazy_tree : bool, optional
When `True` the ASDF tree will not be converted to custom objects
when the file is loaded. Instead, objects will be "lazily" converted
only when they are accessed. Note that the tree will not contain dict
and list instances for containers and instead return instances of classes
defined in `asdf.lazy_nodes`. Since objects are converted when they
are accessed, traversing the tree (like is done during `AsdfFile.info`
and `AsdfFile.search`) will result in nodes being converted.
custom_schema : str, optional
Path to a custom schema file that will be used for a secondary
validation pass. This can be used to ensure that particular ASDF
Expand Down Expand Up @@ -1718,6 +1744,7 @@ def open_asdf(
mode=mode,
validate_checksums=validate_checksums,
extensions=extensions,
lazy_tree=lazy_tree,
_get_yaml_content=_get_yaml_content,
_force_raw_types=_force_raw_types,
strict_extension_check=strict_extension_check,
Expand Down
1 change: 0 additions & 1 deletion asdf/_core/_converters/complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

class ComplexConverter(Converter):
tags = ["tag:stsci.edu:asdf/core/complex-1.0.0"]

types = [*list(util._iter_subclasses(np.complexfloating)), complex]

def to_yaml_tree(self, obj, tag, ctx):
Expand Down
1 change: 0 additions & 1 deletion asdf/_core/_converters/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ class IntegerConverter(Converter):
"tag:stsci.edu:asdf/core/integer-1.0.0",
"tag:stsci.edu:asdf/core/integer-1.1.0",
]

types = ["asdf.tags.core.integer.IntegerType"]

def to_yaml_tree(self, obj, tag, ctx):
Expand Down
7 changes: 7 additions & 0 deletions asdf/_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,10 @@ def httpserver(request):
@pytest.fixture()
def test_data_path():
return importlib.resources.files("asdf") / "_tests" / "data"


@pytest.fixture(params=[True, False], ids=["lazy", "not-lazy"])
def with_lazy_tree(request):
with config.config_context() as cfg:
cfg.lazy_tree = request.param
yield
14 changes: 8 additions & 6 deletions asdf/_tests/core/_converters/test_complex.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,18 @@ def test_valid_nan_complex(valid):
pass


def test_roundtrip():
def test_roundtrip(tmp_path):
values = {
"a": 0 + 0j,
"b": 1 + 1j,
"c": -1 + 1j,
"d": -1 - 1j,
}

result = helpers.roundtrip_object(values)

assert len(values) == len(result)
for key, value in values.items():
assert result[key] == value
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"values": values}).write_to(fn)
with asdf.open(fn) as af:
result = af["values"]
assert len(values) == len(result)
for key, value in values.items():
assert result[key] == value
9 changes: 5 additions & 4 deletions asdf/_tests/core/_converters/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,17 +60,18 @@ def test_software():
assert result == software


def test_history_entry():
def test_history_entry(tmp_path):
history_entry = HistoryEntry(
description="Some history happened here",
time=datetime.datetime.now(),
software=[Software(name="FooSoft", version="1.5.0")],
extra="property",
)

result = helpers.roundtrip_object(history_entry)

assert result == history_entry
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"obj": history_entry}).write_to(fn)
with asdf.open(fn) as af:
assert af["obj"] == history_entry


def test_subclass_metadata():
Expand Down
5 changes: 3 additions & 2 deletions asdf/_tests/tags/core/tests/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,8 +919,9 @@ def test_inline_shape_mismatch():
"""

buff = helpers.yaml_to_asdf(content)
with pytest.raises(ValueError, match=r"inline data doesn't match the given shape"), asdf.open(buff):
pass
with pytest.raises(ValueError, match=r"inline data doesn't match the given shape"):
with asdf.open(buff) as af:
af["arr"]


def test_broadcasted_array(tmp_path):
Expand Down
6 changes: 4 additions & 2 deletions asdf/_tests/test_block_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,10 @@ def test_block_data_callback_converter(tmp_path):
# id(arr) would change every time
a = BlockDataCallback(lambda: np.zeros(3, dtype="uint8"))

b = helpers.roundtrip_object(a)
assert_array_equal(a.data, b.data)
tfn = tmp_path / "tmp.asdf"
asdf.AsdfFile({"obj": a}).write_to(tfn)
with asdf.open(tfn) as af:
assert_array_equal(a.data, af["obj"].data)

# make a tree without the BlockData instance to avoid
# the initial validate which will trigger block allocation
Expand Down
11 changes: 7 additions & 4 deletions asdf/_tests/test_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from packaging.specifiers import SpecifierSet
from yaml.representer import RepresenterError

import asdf
from asdf import AsdfFile, config_context
from asdf.exceptions import AsdfManifestURIMismatchWarning, AsdfWarning, ValidationError
from asdf.extension import (
Expand Down Expand Up @@ -899,7 +900,7 @@ def from_yaml_tree(self, node, tag, ctx):
config.add_extension(extension)


def test_reference_cycle():
def test_reference_cycle(tmp_path, with_lazy_tree):
class FractionWithInverse(fractions.Fraction):
def __init__(self, *args, **kwargs):
self._inverse = None
Expand Down Expand Up @@ -940,9 +941,11 @@ class FractionWithInverseExtension:
f2 = FractionWithInverse(5, 3)
f1.inverse = f2
f2.inverse = f1

read_f1 = roundtrip_object(f1)
assert read_f1.inverse.inverse is read_f1
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"obj": f1}).write_to(fn)
with asdf.open(fn) as af:
read_f1 = af["obj"]
assert read_f1.inverse.inverse is read_f1


def test_manifest_uri_id_mismatch_warning(tmp_path):
Expand Down
Loading

0 comments on commit 9faf968

Please sign in to comment.