Skip to content

Commit

Permalink
Merge pull request #802 from eslavich/AL-304-fix-auto-inline
Browse files Browse the repository at this point in the history
Fix auto_inline when block is already set to inline
  • Loading branch information
perrygreenfield authored May 19, 2020
2 parents 36efc21 + d03b058 commit cabeb12
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 80 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
- Improve validator performance by skipping unnecessary step of
copying schema objects. [#784]

- Fix bug with ``auto_inline`` option where inline blocks
are not converted to internal when they exceed the threshold. [#802]

2.6.0 (2020-04-22)
------------------

Expand Down
19 changes: 3 additions & 16 deletions asdf/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from urllib import parse as urlparse

import numpy as np
from numpy.ma.core import masked_array

import yaml

Expand Down Expand Up @@ -585,9 +584,11 @@ def _handle_global_block_settings(self, ctx, block):
block.output_compression = all_array_compression

auto_inline = getattr(ctx, '_auto_inline', None)
if auto_inline:
if auto_inline and block.array_storage in ['internal', 'inline']:
if np.product(block.data.shape) < auto_inline:
self.set_array_storage(block, 'inline')
else:
self.set_array_storage(block, 'internal')

def finalize(self, ctx):
"""
Expand Down Expand Up @@ -715,20 +716,6 @@ def get_source(self, block):

raise ValueError("block not found.")

def _should_inline(self, array):

if not np.issubdtype(array.dtype, np.number):
return False

if isinstance(array, masked_array):
return False

# Make sure none of the values are too large to store as literals
if (array[~np.isnan(array)] > 2**52).any():
return False

return array.size <= self._inline_threshold_size

def find_or_create_block_for_array(self, arr, ctx):
"""
For a given array, looks for an existing block containing its
Expand Down
104 changes: 40 additions & 64 deletions asdf/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,116 +321,92 @@ def test_extension_version_check(installed, extension, warns):
af._check_extensions(tree)


@pytest.mark.xfail(reason='Setting auto_inline option modifies AsdfFile state')
def test_auto_inline(tmpdir):

outfile = str(tmpdir.join('test.asdf'))
tree = dict(data=np.arange(6))
tree = {"small_array": np.arange(6), "large_array": np.arange(100)}

# Use the same object for each write in order to make sure that there
# aren't unanticipated side effects
with asdf.AsdfFile(tree) as af:
# By default blocks are written internal.
af.write_to(outfile)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 2

af.write_to(outfile, auto_inline=10)
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1

# The previous write modified the small array block's storage
# to inline, and a subsequent write should maintain that setting.
af.write_to(outfile)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 1

af.write_to(outfile, auto_inline=7)
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1

af.write_to(outfile, auto_inline=5)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1

assert len(list(af.blocks.internal_blocks)) == 2

@pytest.mark.skip(reason='Until inline_threshold is added as a write option')
def test_inline_threshold(tmpdir):

tree = {
'small': np.ones(10),
'large': np.ones(100)
}
def test_auto_inline_masked_array(tmpdir):
outfile = str(tmpdir.join('test.asdf'))
arr = np.arange(6)
masked_arr = np.ma.masked_equal(arr, 3)
tree = {"masked_arr": masked_arr}

with asdf.AsdfFile(tree) as af:
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 1

with asdf.AsdfFile(tree, inline_threshold=10) as af:
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 1

with asdf.AsdfFile(tree, inline_threshold=5) as af:
af.write_to(outfile)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 2

with asdf.AsdfFile(tree, inline_threshold=100) as af:
af.write_to(outfile, auto_inline=10)
assert len(list(af.blocks.inline_blocks)) == 2
assert len(list(af.blocks.internal_blocks)) == 0


@pytest.mark.skip(reason='Until inline_threshold is added as a write option')
def test_inline_threshold_masked(tmpdir):

mask = np.random.randint(0, 1+1, 20)
masked_array = np.ma.masked_array(np.ones(20), mask=mask)

tree = {
'masked': masked_array
}

# Make sure that masked arrays aren't automatically inlined, even if they
# are small enough
with asdf.AsdfFile(tree) as af:
af.write_to(outfile, auto_inline=5)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 2

tree = {
'masked': masked_array,
'normal': np.random.random(20)
}

def test_auto_inline_large_value(tmpdir):
outfile = str(tmpdir.join('test.asdf'))
arr = np.array([2**52 + 1, 1, 2, 3, 4, 5])
tree = {"array": arr}

with asdf.AsdfFile(tree) as af:
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 2
af.write_to(outfile)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1

with pytest.raises(ValidationError):
af.write_to(outfile, auto_inline=10)

@pytest.mark.skip(reason='Until inline_threshold is added as a write option')
def test_inline_threshold_override(tmpdir):
af.write_to(outfile, auto_inline=5)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1

tmpfile = str(tmpdir.join('inline.asdf'))

tree = {
'small': np.ones(10),
'large': np.ones(100)
}
def test_auto_inline_string_array(tmpdir):
outfile = str(tmpdir.join('test.asdf'))
arr = np.array(["peach", "plum", "apricot", "nectarine", "cherry", "pluot"])
tree = {"array": arr}

with asdf.AsdfFile(tree) as af:
af.set_array_storage(tree['small'], 'internal')
af.write_to(outfile)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 2
assert len(list(af.blocks.internal_blocks)) == 1

with asdf.AsdfFile(tree) as af:
af.set_array_storage(tree['large'], 'inline')
assert len(list(af.blocks.inline_blocks)) == 2
af.write_to(outfile, auto_inline=10)
assert len(list(af.blocks.inline_blocks)) == 1
assert len(list(af.blocks.internal_blocks)) == 0

with asdf.AsdfFile(tree) as af:
af.write_to(tmpfile, all_array_storage='internal')
af.write_to(outfile, auto_inline=5)
assert len(list(af.blocks.inline_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 2

with asdf.AsdfFile(tree) as af:
af.write_to(tmpfile, all_array_storage='inline')
assert len(list(af.blocks.inline_blocks)) == 2
assert len(list(af.blocks.internal_blocks)) == 0
assert len(list(af.blocks.internal_blocks)) == 1


def test_resolver_deprecations():
Expand Down

0 comments on commit cabeb12

Please sign in to comment.