Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert inline threshold option #610

Merged
merged 6 commits into from
Nov 16, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
2.3.0 (unreleased)
------------------

- Small numeric arrays are now automatically stored inline. This behavior can
be overridden using the new ``inline_threshold`` argument to the ``AsdfFile``
constructor. It can also be controlled with the existing
``set_array_storage`` method of ``AsdfFile`` and the ``all_array_storage``
argument to ``AsdfFile.write_to``. [#557]

- Storage of arbitrary precision integers is now provided by
``asdf.IntegerType``. Reading a file with integer literals that are too
Expand Down
12 changes: 4 additions & 8 deletions asdf/asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,7 @@ class AsdfFile(versioning.VersionedMixin):
def __init__(self, tree=None, uri=None, extensions=None, version=None,
ignore_version_mismatch=True, ignore_unrecognized_tag=False,
ignore_implicit_conversion=False, copy_arrays=False,
lazy_load=True, custom_schema=None, inline_threshold=None,
_readonly=False):
lazy_load=True, custom_schema=None, _readonly=False):
"""
Parameters
----------
Expand Down Expand Up @@ -109,10 +108,7 @@ def __init__(self, tree=None, uri=None, extensions=None, version=None,
files follow custom conventions beyond those enforced by the
standard.

inline_threshold : int, optional
Optional threshold size below which arrays will automatically be
stored inline. Defaults to {0}.
""".format(block._DEFAULT_INLINE_THRESHOLD_SIZE)
"""

if custom_schema is not None:
self._custom_schema = schema.load_custom_schema(custom_schema)
Expand All @@ -134,8 +130,8 @@ def __init__(self, tree=None, uri=None, extensions=None, version=None,
self._closed = False
self._external_asdf_by_uri = {}
self._blocks = block.BlockManager(
self, copy_arrays=copy_arrays, inline_threshold=inline_threshold,
lazy_load=lazy_load, readonly=_readonly)
self, copy_arrays=copy_arrays, lazy_load=lazy_load,
readonly=_readonly)
self._uri = None
if tree is None:
self.tree = {}
Expand Down
19 changes: 4 additions & 15 deletions asdf/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,12 @@
from . import yamlutil


_DEFAULT_INLINE_THRESHOLD_SIZE = 50


class BlockManager(object):
"""
Manages the `Block`s associated with a ASDF file.
"""
def __init__(self, asdffile, copy_arrays=False, inline_threshold=None,
lazy_load=True, readonly=False):
def __init__(self, asdffile, copy_arrays=False, lazy_load=True,
readonly=False):
self._asdffile = weakref.ref(asdffile)

self._internal_blocks = []
Expand All @@ -49,11 +46,6 @@ def __init__(self, asdffile, copy_arrays=False, inline_threshold=None,
'streamed': self._streamed_blocks
}

if inline_threshold is not None:
self._inline_threshold_size = inline_threshold
else:
self._inline_threshold_size = _DEFAULT_INLINE_THRESHOLD_SIZE

self._data_to_block_mapping = {}
self._validate_checksums = False
self._memmap = not copy_arrays
Expand Down Expand Up @@ -753,7 +745,8 @@ def find_or_create_block_for_array(self, arr, ctx):
block : Block
"""
from .tags.core import ndarray
if (isinstance(arr, ndarray.NDArrayType) and arr.block is not None):
if (isinstance(arr, ndarray.NDArrayType) and
arr.block is not None):
if arr.block in self.blocks:
return arr.block
else:
Expand All @@ -764,10 +757,6 @@ def find_or_create_block_for_array(self, arr, ctx):
if block is not None:
return block
block = Block(base)

if self._should_inline(arr):
block._array_storage = 'inline'

self.add(block)
self._handle_global_block_settings(ctx, block)
return block
Expand Down
3 changes: 0 additions & 3 deletions asdf/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,6 @@ def _assert_roundtrip_tree(tree, tmpdir, *, asdf_check_func=None,

fname = str(tmpdir.join('test.asdf'))

# Most tests assume that all blocks will be stored internally
init_options.setdefault('inline_threshold', 0)

# First, test writing/reading a BytesIO buffer
buff = io.BytesIO()
AsdfFile(tree, extensions=extensions, **init_options).write_to(buff, **write_options)
Expand Down
3 changes: 3 additions & 0 deletions asdf/tests/test_low_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -1216,6 +1216,7 @@ def test_context_handler_resolve_and_inline(tmpdir):
newf.tree['random'][0]


@pytest.mark.skip(reason='Until inline_threshold is added as a write option')
def test_inline_threshold(tmpdir):

tree = {
Expand All @@ -1240,6 +1241,7 @@ def test_inline_threshold(tmpdir):
assert len(list(af.blocks.internal_blocks)) == 0


@pytest.mark.skip(reason='Until inline_threshold is added as a write option')
def test_inline_threshold_masked(tmpdir):

mask = np.random.randint(0, 1+1, 20)
Expand All @@ -1265,6 +1267,7 @@ def test_inline_threshold_masked(tmpdir):
assert len(list(af.blocks.internal_blocks)) == 2


@pytest.mark.skip(reason='Until inline_threshold is added as a write option')
def test_inline_threshold_override(tmpdir):

tmpfile = str(tmpdir.join('inline.asdf'))
Expand Down
36 changes: 5 additions & 31 deletions docs/asdf/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,37 +56,11 @@ data being saved.
Saving inline arrays
--------------------

As of `asdf-2.2.0`, small numerical arrays are automatically stored inline. The
default threshold size for inline versus internal arrays can be found with the
following:

.. code::

>>> from asdf.block import _DEFAULT_INLINE_THRESHOLD_SIZE
>>> print(_DEFAULT_INLINE_THRESHOLD_SIZE)
50

The default threshold can be overridden passing the `inline_threshold` argument
to the `asdf.AsdfFile` constructor. Setting `inline_threshold=0` has the effect
of making all small arrays be stored in internal blocks:

.. runcode::

from asdf import AsdfFile
import numpy as np

# Ordinarilly an array this size would be automatically inlined
my_array = np.ones(10)
tree = {'my_array': my_array}
# Set the inline threshold to 0 to force internal storage
with AsdfFile(tree, inline_threshold=0) as ff:
ff.write_to("test.asdf")

.. asdf:: test.asdf

The `~asdf.AsdfFile.set_array_storage` method can be used to set or override
the default storage type of a particular data array. The allowed values are
``internal``, ``external``, and ``inline``.
For small arrays, you may not care about the efficiency of a binary
representation and just want to save the array contents directly in the YAML
tree. The `~asdf.AsdfFile.set_array_storage` method can be used to set the
storage type of the associated data. The allowed values are ``internal``,
``external``, and ``inline``.

- ``internal``: The default. The array data will be
stored in a binary block in the same ASDF file.
Expand Down