Skip to content

Commit

Permalink
Merge pull request #1508 from braingram/converter_block_storage
Browse files Browse the repository at this point in the history
Converter block storage
  • Loading branch information
braingram authored May 2, 2023
2 parents 3aea2c1 + 2c9b8e1 commit 353d213
Show file tree
Hide file tree
Showing 15 changed files with 777 additions and 64 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The ASDF Standard is at v1.6.0
- Add ``all_array_storage``, ``all_array_compression`` and
``all_array_compression_kwargs`` to ``asdf.config.AsdfConfig`` [#1468]
- Move built-in tags to converters (except ndarray and integer). [#1474]
- Add block storage support to Converter [#1508]

2.15.0 (2023-03-28)
-------------------
Expand Down
3 changes: 2 additions & 1 deletion asdf/_tests/commands/tests/test_exploded.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def test_explode_then_implode(tmpdir):
# in internal blocks rather than letting some of them be automatically put
# inline.
ff.write_to(path, all_array_storage="internal")
assert len(ff._blocks) == 2
with asdf.open(path) as af:
assert len(af._blocks._internal_blocks) == 2

result = main.main_from_args(["explode", path])

Expand Down
2 changes: 1 addition & 1 deletion asdf/_tests/tags/core/tests/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ def test_integer_storage_duplication(tmpdir):

with asdf.AsdfFile(tree) as af:
af.write_to(tmpfile)
assert len(af._blocks) == 1

with asdf.open(tmpfile, _force_raw_types=True) as rf:
assert len(af._blocks) == 1
assert rf.tree["integer1"]["words"]["source"] == 0
assert rf.tree["integer2"]["words"]["source"] == 0

Expand Down
17 changes: 17 additions & 0 deletions asdf/_tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import getpass
import io
import os
Expand Down Expand Up @@ -439,6 +440,7 @@ def test_array_inline_threshold_masked_array(array_inline_threshold, inline_bloc

with asdf.AsdfFile(tree) as af:
af.write_to(file_path)
with asdf.open(file_path) as af:
assert len(list(af._blocks.inline_blocks)) == inline_blocks
assert len(list(af._blocks.internal_blocks)) == internal_blocks

Expand Down Expand Up @@ -547,3 +549,18 @@ def test_asdf_standard_version_tag_selection():
content = buff.read()
assert b"!core/asdf-1.0.0" not in content
assert b"!core/asdf-1.1.0" in content


def test_write_to_no_tree_modification(tmp_path):
fn = tmp_path / "test.asdf"
fn2 = tmp_path / "test2.asdf"
tree = {"foo": None}
af = asdf.AsdfFile(tree.copy())
af.write_to(fn)
assert tree == af.tree
with asdf.open(fn) as af:
af["history"]["extensions"][0]["software"]["version"] = "0.0.0.dev+abcdefg"
af["asdf_library"]["author"] = "foo"
tree = copy.deepcopy(af.tree)
af.write_to(fn2)
assert af.tree == tree
133 changes: 100 additions & 33 deletions asdf/_tests/test_array_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pytest
import yaml
from numpy.random import random
from numpy.testing import assert_array_equal

Expand All @@ -26,6 +27,15 @@ def test_external_block(tmp_path):
assert "test0000.asdf" in os.listdir(tmp_path)


def test_external_block_url():
uri = "asdf://foo"
my_array = RNG.normal(size=(8, 8))
tree = {"my_array": my_array}
asdf.get_config().all_array_storage = "external"
# this should not raise a ValueError since uri is provided
asdf.AsdfFile(tree, uri=uri)


def test_external_block_non_url():
my_array = RNG.normal(size=(8, 8))
tree = {"my_array": my_array}
Expand Down Expand Up @@ -655,7 +665,8 @@ def test_invalid_block_index_values():
assert len(ff._blocks) == 1


def test_invalid_last_block_index():
@pytest.mark.parametrize("block_index_index", [0, -1])
def test_invalid_block_index_offset(block_index_index):
"""
This adds a value in the block index that points to something
that isn't a block
Expand All @@ -670,13 +681,33 @@ def test_invalid_last_block_index():
tree = {"arrays": arrays}

ff = asdf.AsdfFile(tree)
ff.write_to(buff, include_block_index=False)
ff._blocks._internal_blocks[-1]._offset -= 4
ff._blocks.write_block_index(buff, ff)
ff.write_to(buff)

# now overwrite the block index with the first entry
# incorrectly pointing to a non-block offset
buff.seek(0)
bs = buff.read()
block_index_header_start = bs.index(constants.INDEX_HEADER)
block_index_start = block_index_header_start + len(constants.INDEX_HEADER)
block_index = yaml.load(bs[block_index_start:], yaml.SafeLoader)
block_index[block_index_index] -= 4
yaml_version = tuple(int(x) for x in ff.version_map["YAML_VERSION"].split("."))
buff.seek(block_index_start)
yaml.dump(
block_index,
stream=buff,
explicit_start=True,
explicit_end=True,
version=yaml_version,
allow_unicode=True,
encoding="utf-8",
)

buff.seek(0)
with asdf.open(buff) as ff:
assert len(ff._blocks) == 1
for i, a in enumerate(arrays):
assert_array_equal(ff["arrays"][i], a)


def test_unordered_block_index():
Expand All @@ -702,30 +733,6 @@ def test_unordered_block_index():
assert len(ff._blocks) == 1


def test_invalid_block_index_first_block_value():
"""
This creates a bogus block index where the offset of the first
block doesn't match what we already know it to be. In this
case, we should reject the whole block index.
"""
buff = io.BytesIO()

arrays = []
for i in range(10):
arrays.append(np.ones((8, 8)) * i)

tree = {"arrays": arrays}

ff = asdf.AsdfFile(tree)
ff.write_to(buff, include_block_index=False)
ff._blocks._internal_blocks[0]._offset -= 4
ff._blocks.write_block_index(buff, ff)

buff.seek(0)
with asdf.open(buff) as ff:
assert len(ff._blocks) == 1


def test_invalid_block_id():
ff = asdf.AsdfFile()
with pytest.raises(ValueError, match=r"Invalid source id .*"):
Expand Down Expand Up @@ -859,7 +866,11 @@ def test_write_to_update_storage_options(tmp_path, all_array_storage, all_array_
if all_array_compression == "bzp2" and compression_kwargs is not None:
compression_kwargs = {"compresslevel": 1}

def assert_result(ff, arr):
def assert_result(ff):
if "array" not in ff:
# this was called from _write_to while making an external block
# so don't check the result
return
if all_array_storage == "external":
assert "test0000.asdf" in os.listdir(tmp_path)
else:
Expand All @@ -868,10 +879,12 @@ def assert_result(ff, arr):
assert len(ff._blocks._internal_blocks) == 1
else:
assert len(ff._blocks._internal_blocks) == 0
blk = ff._blocks[arr]
blk = ff._blocks[ff["array"]]

target_compression = all_array_compression or None
assert blk._output_compression == target_compression
if target_compression == "input":
target_compression = None
assert blk.output_compression == target_compression

target_compression_kwargs = compression_kwargs or {}
assert blk._output_compression_kwargs == target_compression_kwargs
Expand All @@ -881,14 +894,28 @@ def assert_result(ff, arr):
fn = tmp_path / "test.asdf"

ff1 = asdf.AsdfFile(tree)

# as a new AsdfFile is used for write_to and we want
# to check blocks here, we patch _write_to to allow us
# to inspect the blocks in the new AsdfFile before
# it falls out of scope
original = asdf.AsdfFile._write_to

def patched(self, *args, **kwargs):
original(self, *args, **kwargs)
assert_result(self)

asdf.AsdfFile._write_to = patched

# first check write_to
ff1.write_to(
fn,
all_array_storage=all_array_storage,
all_array_compression=all_array_compression,
compression_kwargs=compression_kwargs,
)
assert_result(ff1, arr1)

asdf.AsdfFile._write_to = original

# then reuse the file to check update
with asdf.open(fn, mode="rw") as ff2:
Expand All @@ -899,7 +926,7 @@ def assert_result(ff, arr):
all_array_compression=all_array_compression,
compression_kwargs=compression_kwargs,
)
assert_result(ff2, arr2)
assert_result(ff2)


def test_block_key():
Expand Down Expand Up @@ -971,3 +998,43 @@ def __call__(self):
mode="r",
) as f:
rb.read(f, past_magic=False)


def test_remove_blocks(tmp_path):
"""Test that writing to a new file"""
fn1 = tmp_path / "test.asdf"
fn2 = tmp_path / "test2.asdf"

tree = {"a": np.zeros(3), "b": np.ones(1)}
af = asdf.AsdfFile(tree)
af.write_to(fn1)

with asdf.open(fn1, mode="rw") as af:
assert len(af._blocks._internal_blocks) == 2
af["a"] = None
af.write_to(fn2)

with asdf.open(fn1, mode="rw") as af:
assert len(af._blocks._internal_blocks) == 2
af["a"] = None
af.update()

for fn in (fn1, fn2):
with asdf.open(fn) as af:
assert len(af._blocks._internal_blocks) == 1


def test_write_to_before_update(tmp_path):
# this is a regression test for: https://github.com/asdf-format/asdf/issues/1505
fn1 = tmp_path / "test1.asdf"
fn2 = tmp_path / "test2.asdf"

tree = {"a": np.zeros(3), "b": np.ones(3)}
af = asdf.AsdfFile(tree)

af.write_to(fn1)

with asdf.open(fn1, mode="rw") as af:
af["a"] = None
af.write_to(fn2)
af.update()
4 changes: 2 additions & 2 deletions asdf/_tests/test_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_open_asdf_extensions(tmp_path):

def test_serialization_context():
extension_manager = ExtensionManager([])
context = SerializationContext("1.4.0", extension_manager, "file://test.asdf")
context = SerializationContext("1.4.0", extension_manager, "file://test.asdf", None)
assert context.version == "1.4.0"
assert context.extension_manager is extension_manager
assert context._extensions_used == set()
Expand All @@ -215,7 +215,7 @@ def test_serialization_context():
context._mark_extension_used(object())

with pytest.raises(ValueError, match=r"ASDF Standard version .* is not supported by asdf==.*"):
SerializationContext("0.5.4", extension_manager, None)
SerializationContext("0.5.4", extension_manager, None, None)


def test_reading_extension_metadata():
Expand Down
Loading

0 comments on commit 353d213

Please sign in to comment.