From 564b15977eca957e73e624238c8678ffa7b1f1f9 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 11:06:55 +0200 Subject: [PATCH 1/8] Add lazy_load parameter to open() This allows loading ASDF files, then closing the file and still retaining full access to the tree objects. Signed-off-by: Vadim Markovtsev --- asdf/asdf.py | 23 ++++++-- asdf/block.py | 88 +++++++++++++++++----------- asdf/tags/core/ndarray.py | 2 + asdf/tags/core/tests/test_ndarray.py | 12 ++-- asdf/tests/helpers.py | 9 +++ 5 files changed, 92 insertions(+), 42 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index a5e2b6af3..fd6b766b7 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -51,7 +51,7 @@ class AsdfFile(versioning.VersionedMixin): def __init__(self, tree=None, uri=None, extensions=None, version=None, ignore_version_mismatch=True, ignore_unrecognized_tag=False, ignore_implicit_conversion=False, copy_arrays=False, - custom_schema=None, inline_threshold=None): + lazy_load=True, custom_schema=None, inline_threshold=None): """ Parameters ---------- @@ -92,6 +92,12 @@ def __init__(self, tree=None, uri=None, extensions=None, version=None, When `False`, when reading files, attempt to memmap underlying data arrays when possible. + lazy_load : bool, optional + When `True` and the read file is seekable, underlying data arrays + will be materialized when they are used for the first time. + This implies that the file should stay open during the lifetime + of the tree. + custom_schema : str, optional Path to a custom schema file that will be used for a secondary validation pass. This can be used to ensure that particular ASDF @@ -122,8 +128,9 @@ def __init__(self, tree=None, uri=None, extensions=None, version=None, self._fd = None self._closed = False self._external_asdf_by_uri = {} - self._blocks = block.BlockManager(self, copy_arrays=copy_arrays, - inline_threshold=inline_threshold) + self._blocks = block.BlockManager( + self, copy_arrays=copy_arrays, inline_threshold=inline_threshold, + lazy_load=lazy_load) self._uri = None if tree is None: self.tree = {} @@ -706,6 +713,7 @@ def open(cls, fd, uri=None, mode='r', ignore_unrecognized_tag=False, _force_raw_types=False, copy_arrays=False, + lazy_load=True, custom_schema=None, strict_extension_check=False, ignore_missing_extensions=False): @@ -749,6 +757,12 @@ def open(cls, fd, uri=None, mode='r', When `False`, when reading files, attempt to memmap underlying data arrays when possible. + lazy_load : bool, optional + When `True` and the read file is seekable, underlying data arrays + will be materialized when they are used for the first time. + This implies that the file should stay open during the lifetime + of the tree. + custom_schema : str, optional Path to a custom schema file that will be used for a secondary validation pass. This can be used to ensure that particular ASDF @@ -775,7 +789,8 @@ def open(cls, fd, uri=None, mode='r', self = cls(extensions=extensions, ignore_version_mismatch=ignore_version_mismatch, ignore_unrecognized_tag=ignore_unrecognized_tag, - copy_arrays=copy_arrays, custom_schema=custom_schema) + copy_arrays=copy_arrays, lazy_load=lazy_load, + custom_schema=custom_schema) return cls._open_impl( self, fd, uri=uri, mode=mode, diff --git a/asdf/block.py b/asdf/block.py index 07139c950..79916d392 100644 --- a/asdf/block.py +++ b/asdf/block.py @@ -33,7 +33,8 @@ class BlockManager(object): """ Manages the `Block`s associated with a ASDF file. """ - def __init__(self, asdffile, copy_arrays=False, inline_threshold=None): + def __init__(self, asdffile, copy_arrays=False, inline_threshold=None, + lazy_load=True): self._asdffile = weakref.ref(asdffile) self._internal_blocks = [] @@ -56,6 +57,7 @@ def __init__(self, asdffile, copy_arrays=False, inline_threshold=None): self._data_to_block_mapping = {} self._validate_checksums = False self._memmap = not copy_arrays + self._lazy_load = lazy_load def __len__(self): """ @@ -201,6 +203,9 @@ def has_blocks_with_offset(self): return True return False + def _new_block(self): + return Block(memmap=self._memmap, lazy_load=self._lazy_load) + def _sort_blocks_by_offset(self): def sorter(x): if x.offset is None: @@ -212,7 +217,7 @@ def sorter(x): def _read_next_internal_block(self, fd, past_magic=False): # This assumes the file pointer is at the beginning of the # block, (or beginning + 4 if past_magic is True) - block = Block(memmap=self._memmap).read( + block = self._new_block().read( fd, past_magic=past_magic, validate_checksum=self._validate_checksums) if block is not None: @@ -262,22 +267,23 @@ def finish_reading_internal_blocks(self): This is called before updating a file, since updating requires knowledge of all internal blocks in the file. """ - if len(self._internal_blocks): - for i, block in enumerate(self._internal_blocks): - if isinstance(block, UnloadedBlock): - block.load() - - last_block = self._internal_blocks[-1] - - # Read all of the remaining blocks in the file, if any - if (last_block._fd is not None and - last_block._fd.seekable()): - last_block._fd.seek(last_block.end_offset) - while True: - last_block = self._read_next_internal_block( - last_block._fd, False) - if last_block is None: - break + if not self._internal_blocks: + return + for i, block in enumerate(self._internal_blocks): + if isinstance(block, UnloadedBlock): + block.load() + + last_block = self._internal_blocks[-1] + + # Read all of the remaining blocks in the file, if any + if (last_block._fd is not None and + last_block._fd.seekable()): + last_block._fd.seek(last_block.end_offset) + while True: + last_block = self._read_next_internal_block( + last_block._fd, False) + if last_block is None: + break def write_internal_blocks_serial(self, fd, pad_blocks=False): """ @@ -499,7 +505,7 @@ def read_block_index(self, fd, ctx): # make sure it makes sense. fd.seek(offsets[-1], generic_io.SEEK_SET) try: - block = Block(memmap=self._memmap).read(fd) + block = self._new_block().read(fd) except (ValueError, IOError): return @@ -511,11 +517,16 @@ def read_block_index(self, fd, ctx): # objects for offset in offsets[1:-1]: self._internal_blocks.append( - UnloadedBlock(fd, offset, memmap=self._memmap)) + UnloadedBlock(fd, offset, + memmap=self._memmap, lazy_load=self._lazy_load)) # We already read the last block in the file -- no need to read it again self._internal_blocks.append(block) + # Materialize the internal blocks if we are not lazy + if not self._lazy_load: + self.finish_reading_internal_blocks() + def get_external_filename(self, filename, index): """ Given a main filename and an index number, return a new file @@ -788,7 +799,8 @@ class Block(object): ('checksum', '16s') ]) - def __init__(self, data=None, uri=None, array_storage='internal', memmap=True): + def __init__(self, data=None, uri=None, array_storage='internal', + memmap=True, lazy_load=True): if isinstance(data, np.ndarray) and not data.flags.c_contiguous: if data.flags.f_contiguous: self._data = np.asfortranarray(data) @@ -806,6 +818,7 @@ def __init__(self, data=None, uri=None, array_storage='internal', memmap=True): self._checksum = None self._should_memmap = memmap self._memmapped = False + self._lazy_load = lazy_load self.update_size() self._allocated = self._size @@ -943,10 +956,10 @@ def read(self, fd, past_magic=False, validate_checksum=False): """ Read a Block from the given Python file-like object. - If the file is seekable, the reading or memmapping of the - actual data is postponed until an array requests it. If the - file is a stream, the data will be read into memory - immediately. + If the file is seekable and lazy_load is True, the reading + or memmapping of the actual data is postponed until an array + requests it. If the file is a stream or lazy_load is False, + the data will be read into memory immediately. Parameters ---------- @@ -1012,19 +1025,27 @@ def read(self, fd, past_magic=False, validate_checksum=False): # If the file is seekable, we can delay reading the actual # data until later. self._fd = fd - self._header_size = header_size self._offset = offset + self._header_size = header_size if header['flags'] & constants.BLOCK_FLAG_STREAMED: # Support streaming blocks - fd.fast_forward(-1) self._array_storage = 'streamed' - self._data_size = self._size = self._allocated = \ - (fd.tell() - self.data_offset) + 1 + if self._lazy_load: + fd.fast_forward(-1) + self._data_size = self._size = self._allocated = \ + (fd.tell() - self.data_offset) + 1 + else: + self._data = fd.read_into_array(-1) + self._data_size = self._size = self._allocated = len(self._data) else: - fd.fast_forward(header['allocated_size']) self._allocated = header['allocated_size'] self._size = header['used_size'] self._data_size = header['data_size'] + if self._lazy_load: + fd.fast_forward(header['allocated_size']) + else: + self._data = self._read_data(fd, self._size, self._data_size) + fd.fast_forward(self._allocated - self._size) else: # If the file is a stream, we need to get the data now. if header['flags'] & constants.BLOCK_FLAG_STREAMED: @@ -1033,9 +1054,9 @@ def read(self, fd, past_magic=False, validate_checksum=False): self._data = fd.read_into_array(-1) self._data_size = self._size = self._allocated = len(self._data) else: - self._data_size = header['data_size'] - self._size = header['used_size'] self._allocated = header['allocated_size'] + self._size = header['used_size'] + self._data_size = header['data_size'] self._data = self._read_data(fd, self._size, self._data_size) fd.fast_forward(self._allocated - self._size) fd.close() @@ -1165,7 +1186,7 @@ class UnloadedBlock(object): full-fledged block whenever the underlying data or more detail is requested. """ - def __init__(self, fd, offset, memmap=True): + def __init__(self, fd, offset, memmap=True, lazy_load=True): self._fd = fd self._offset = offset self._data = None @@ -1176,6 +1197,7 @@ def __init__(self, fd, offset, memmap=True): self._checksum = None self._should_memmap = memmap self._memmapped = False + self._lazy_load = lazy_load def __len__(self): self.load() diff --git a/asdf/tags/core/ndarray.py b/asdf/tags/core/ndarray.py index 8d8cea8b3..271a9096b 100644 --- a/asdf/tags/core/ndarray.py +++ b/asdf/tags/core/ndarray.py @@ -240,6 +240,8 @@ def __init__(self, source, shape, dtype, offset, strides, self._offset = offset self._strides = strides self._order = order + if not asdffile.blocks._lazy_load: + self._make_array() def _make_array(self): if self._array is None: diff --git a/asdf/tags/core/tests/test_ndarray.py b/asdf/tags/core/tests/test_ndarray.py index d99e5187f..3c2777ad6 100644 --- a/asdf/tags/core/tests/test_ndarray.py +++ b/asdf/tags/core/tests/test_ndarray.py @@ -96,14 +96,16 @@ def test_byteorder(tmpdir): } def check_asdf(asdf): - tree = asdf.tree + my_tree = asdf.tree + for endian in ('bigendian', 'little'): + assert my_tree[endian].dtype == tree[endian].dtype if sys.byteorder == 'little': - assert tree['bigendian'].dtype.byteorder == '>' - assert tree['little'].dtype.byteorder == '=' + assert my_tree['bigendian'].dtype.byteorder == '>' + assert my_tree['little'].dtype.byteorder == '=' else: - assert tree['bigendian'].dtype.byteorder == '=' - assert tree['little'].dtype.byteorder == '<' + assert my_tree['bigendian'].dtype.byteorder == '=' + assert my_tree['little'].dtype.byteorder == '<' def check_raw_yaml(content): assert b'byteorder: little' in content diff --git a/asdf/tests/helpers.py b/asdf/tests/helpers.py index a1ed31356..2268a1fcc 100644 --- a/asdf/tests/helpers.py +++ b/asdf/tests/helpers.py @@ -233,6 +233,15 @@ def assert_roundtrip_tree(tree, tmpdir, *, asdf_check_func=None, finally: server.finalize() + # Now don't be lazy and check that nothing breaks + with io.BytesIO() as buff: + AsdfFile(tree, extensions=extensions, **init_options).write_to(buff, **write_options) + buff.seek(0) + ff = AsdfFile.open(buff, extensions=extensions, copy_arrays=True, lazy_load=False) + # The underlying file is closed at this time and everything should still work + assert_tree_match(tree, ff.tree, ff, funcname=tree_match_func) + if asdf_check_func: + asdf_check_func(ff) def yaml_to_asdf(yaml_content, yaml_headers=True, standard_version=None): """ From 2b37ffbd4a37a6c97e7144f29d9c14ed7f6f2555 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 17:04:38 +0200 Subject: [PATCH 2/8] Add lazy_load to the changelog --- CHANGES.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index eabf76e3a..7433da5b7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,6 +6,13 @@ constructor. It can also be controlled with the existing ``set_array_storage`` method of ``AsdfFile`` and the ``all_array_storage`` argument to ``AsdfFile.write_to``. [#557] +- Add new parameter `lazy_load` to ``AsdfFile.open``. It is ``True`` by + default and preserves the default behavior. ``False`` detaches the + loaded tree from the underlying file: all blocks are fully read and + numpy arrays are materialized. Thus it becomes safe to close the file + and continue using ``AsdfFile.tree``. However, `copy_arrays` parameter + is still effective and the active memory maps may still require the file + to stay open in case `copy_arrays` is ``False``. [#573] 2.1.1 (unreleased) ------------------ From d808f937449a69f964cb2a1c646049ece09ea946 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 17:05:14 +0200 Subject: [PATCH 3/8] Update the docstrings in AsdfFile --- asdf/asdf.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index fd6b766b7..a74bcf524 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -90,13 +90,18 @@ def __init__(self, tree=None, uri=None, extensions=None, version=None, copy_arrays : bool, optional When `False`, when reading files, attempt to memmap underlying data - arrays when possible. + arrays when possible. Please note that the file must be opened + in "rw" mode; "r" will result a SIGSEGV. lazy_load : bool, optional - When `True` and the read file is seekable, underlying data arrays - will be materialized when they are used for the first time. - This implies that the file should stay open during the lifetime - of the tree. + When `True` and the underlying file handle is seekable, data + arrays will only be loaded lazily: i.e. when they are accessed + for the first time. In this case the underlying file must stay + open during the lifetime of the tree. Setting to False causes + all data arrays to be loaded up front, which means that they + can be accessed even after the underlying file is closed. + Note: even if `lazy_load` is `False`, `copy_arrays` is still taken + into account. custom_schema : str, optional Path to a custom schema file that will be used for a secondary @@ -755,13 +760,18 @@ def open(cls, fd, uri=None, mode='r', copy_arrays : bool, optional When `False`, when reading files, attempt to memmap underlying data - arrays when possible. + arrays when possible. Please note that the file must be opened + in "rw" mode; "r" will result a SIGSEGV. lazy_load : bool, optional - When `True` and the read file is seekable, underlying data arrays - will be materialized when they are used for the first time. - This implies that the file should stay open during the lifetime - of the tree. + When `True` and the underlying file handle is seekable, data + arrays will only be loaded lazily: i.e. when they are accessed + for the first time. In this case the underlying file must stay + open during the lifetime of the tree. Setting to False causes + all data arrays to be loaded up front, which means that they + can be accessed even after the underlying file is closed. + Note: even if `lazy_load` is `False`, `copy_arrays` is still taken + into account. custom_schema : str, optional Path to a custom schema file that will be used for a secondary From 30f2093ce16ca94d9297af7be404a172a91e7066 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 17:06:31 +0200 Subject: [PATCH 4/8] Add BlockManager.lazy_load/memmap properties --- asdf/block.py | 21 ++++++++++++++++++--- asdf/tags/core/ndarray.py | 2 +- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/asdf/block.py b/asdf/block.py index 79916d392..79dee20ca 100644 --- a/asdf/block.py +++ b/asdf/block.py @@ -193,6 +193,21 @@ def inline_blocks(self): for block in self._inline_blocks: yield block + @property + def memmap(self): + """ + The flag which indicates whether the arrays are memory mapped + to the underlying file. + """ + return self._memmap + + @property + def lazy_load(self): + """ + The flag which indicates whether the blocks are lazily read. + """ + return self._lazy_load + def has_blocks_with_offset(self): """ Returns `True` if any of the internal blocks currently have an @@ -204,7 +219,7 @@ def has_blocks_with_offset(self): return False def _new_block(self): - return Block(memmap=self._memmap, lazy_load=self._lazy_load) + return Block(memmap=self.memmap, lazy_load=self.lazy_load) def _sort_blocks_by_offset(self): def sorter(x): @@ -518,13 +533,13 @@ def read_block_index(self, fd, ctx): for offset in offsets[1:-1]: self._internal_blocks.append( UnloadedBlock(fd, offset, - memmap=self._memmap, lazy_load=self._lazy_load)) + memmap=self.memmap, lazy_load=self.lazy_load)) # We already read the last block in the file -- no need to read it again self._internal_blocks.append(block) # Materialize the internal blocks if we are not lazy - if not self._lazy_load: + if not self.lazy_load: self.finish_reading_internal_blocks() def get_external_filename(self, filename, index): diff --git a/asdf/tags/core/ndarray.py b/asdf/tags/core/ndarray.py index 271a9096b..9639a4bd3 100644 --- a/asdf/tags/core/ndarray.py +++ b/asdf/tags/core/ndarray.py @@ -240,7 +240,7 @@ def __init__(self, source, shape, dtype, offset, strides, self._offset = offset self._strides = strides self._order = order - if not asdffile.blocks._lazy_load: + if not asdffile.blocks.lazy_load: self._make_array() def _make_array(self): From e2ff6e806eaef45d48e75610eae9fb593654fcf8 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 17:07:19 +0200 Subject: [PATCH 5/8] Fix lazy_load=False with copy_arrays=False --- asdf/block.py | 32 +++++++++++++++++++++++--------- asdf/tests/helpers.py | 8 ++++++++ 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/asdf/block.py b/asdf/block.py index 79dee20ca..1b29c93f6 100644 --- a/asdf/block.py +++ b/asdf/block.py @@ -1057,10 +1057,16 @@ def read(self, fd, past_magic=False, validate_checksum=False): self._size = header['used_size'] self._data_size = header['data_size'] if self._lazy_load: - fd.fast_forward(header['allocated_size']) + fd.fast_forward(self._allocated) else: - self._data = self._read_data(fd, self._size, self._data_size) - fd.fast_forward(self._allocated - self._size) + curpos = fd.tell() + self._memmap_data() + fd.seek(curpos) + if not self._memmapped: + self._data = self._read_data(fd, self._size, self._data_size) + fd.fast_forward(self._allocated - self._size) + else: + fd.fast_forward(self._allocated) else: # If the file is a stream, we need to get the data now. if header['flags'] & constants.BLOCK_FLAG_STREAMED: @@ -1084,12 +1090,24 @@ def read(self, fd, past_magic=False, validate_checksum=False): return self def _read_data(self, fd, used_size, data_size): + """ + Read the block data from a file. + """ if not self.input_compression: return fd.read_into_array(used_size) else: return mcompression.decompress( fd, used_size, data_size, self.input_compression) + def _memmap_data(self): + """ + Memory map the block data from the file. + """ + memmap = self._fd.can_memmap() and not self.input_compression + if self._should_memmap and memmap: + self._data = self._fd.memmap_array(self.data_offset, self._size) + self._memmapped = True + def write(self, fd): """ Write an internal block to the given Python file-like object. @@ -1166,12 +1184,8 @@ def data(self): # Be nice and reset the file position after we're done curpos = self._fd.tell() try: - memmap = self._fd.can_memmap() and not self.input_compression - if self._should_memmap and memmap: - self._data = self._fd.memmap_array( - self.data_offset, self._size) - self._memmapped = True - else: + self._memmap_data() + if not self._memmapped: self._fd.seek(self.data_offset) self._data = self._read_data( self._fd, self._size, self._data_size) diff --git a/asdf/tests/helpers.py b/asdf/tests/helpers.py index 2268a1fcc..3d656cd44 100644 --- a/asdf/tests/helpers.py +++ b/asdf/tests/helpers.py @@ -243,6 +243,14 @@ def assert_roundtrip_tree(tree, tmpdir, *, asdf_check_func=None, if asdf_check_func: asdf_check_func(ff) + # Now repeat with copy_arrays=False and a real file to test mmap() + AsdfFile(tree, extensions=extensions, **init_options).write_to(fname, **write_options) + with AsdfFile.open(fname, mode='rw', extensions=extensions, copy_arrays=False, + lazy_load=False) as ff: + assert_tree_match(tree, ff.tree, ff, funcname=tree_match_func) + if asdf_check_func: + asdf_check_func(ff) + def yaml_to_asdf(yaml_content, yaml_headers=True, standard_version=None): """ Given a string of YAML content, adds the extra pre- From 88fbe37216c4fc41a2e16ad2a8e6a0c82e1a4a5c Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 17:13:36 +0200 Subject: [PATCH 6/8] Ensure that all the blocks are fully read if lazy_load=False --- asdf/tests/helpers.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/asdf/tests/helpers.py b/asdf/tests/helpers.py index 3d656cd44..62928cc57 100644 --- a/asdf/tests/helpers.py +++ b/asdf/tests/helpers.py @@ -20,9 +20,9 @@ CartesianDifferential = None from ..asdf import AsdfFile, get_asdf_library_info +from ..block import Block from .httpserver import RangeHTTPServer from ..extension import default_extensions -from .. import util from .. import versioning from ..tags.core import AsdfObject @@ -238,6 +238,10 @@ def assert_roundtrip_tree(tree, tmpdir, *, asdf_check_func=None, AsdfFile(tree, extensions=extensions, **init_options).write_to(buff, **write_options) buff.seek(0) ff = AsdfFile.open(buff, extensions=extensions, copy_arrays=True, lazy_load=False) + # Ensure that all the blocks are loaded + for block in ff.blocks._internal_blocks: + assert isinstance(block, Block) + assert block._data is not None # The underlying file is closed at this time and everything should still work assert_tree_match(tree, ff.tree, ff, funcname=tree_match_func) if asdf_check_func: @@ -247,6 +251,9 @@ def assert_roundtrip_tree(tree, tmpdir, *, asdf_check_func=None, AsdfFile(tree, extensions=extensions, **init_options).write_to(fname, **write_options) with AsdfFile.open(fname, mode='rw', extensions=extensions, copy_arrays=False, lazy_load=False) as ff: + for block in ff.blocks._internal_blocks: + assert isinstance(block, Block) + assert block._data is not None assert_tree_match(tree, ff.tree, ff, funcname=tree_match_func) if asdf_check_func: asdf_check_func(ff) From 7b2eb2d7b26eefbedf751ab7c04ef2ce6f843db6 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 18:23:51 +0200 Subject: [PATCH 7/8] Add double backticks in changelog --- CHANGES.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7433da5b7..7870758b7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -6,13 +6,13 @@ constructor. It can also be controlled with the existing ``set_array_storage`` method of ``AsdfFile`` and the ``all_array_storage`` argument to ``AsdfFile.write_to``. [#557] -- Add new parameter `lazy_load` to ``AsdfFile.open``. It is ``True`` by +- Add new parameter ``lazy_load`` to ``AsdfFile.open``. It is ``True`` by default and preserves the default behavior. ``False`` detaches the loaded tree from the underlying file: all blocks are fully read and numpy arrays are materialized. Thus it becomes safe to close the file - and continue using ``AsdfFile.tree``. However, `copy_arrays` parameter + and continue using ``AsdfFile.tree``. However, ``copy_arrays`` parameter is still effective and the active memory maps may still require the file - to stay open in case `copy_arrays` is ``False``. [#573] + to stay open in case ``copy_arrays`` is ``False``. [#573] 2.1.1 (unreleased) ------------------ From 62561d31e2153d670afdae47db6bdc888042f343 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Wed, 24 Oct 2018 18:51:50 +0200 Subject: [PATCH 8/8] Remove the note about file mode for copy_arrays See https://github.com/spacetelescope/asdf/issues/574 --- asdf/asdf.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/asdf/asdf.py b/asdf/asdf.py index a74bcf524..0d75d8e63 100644 --- a/asdf/asdf.py +++ b/asdf/asdf.py @@ -90,8 +90,7 @@ def __init__(self, tree=None, uri=None, extensions=None, version=None, copy_arrays : bool, optional When `False`, when reading files, attempt to memmap underlying data - arrays when possible. Please note that the file must be opened - in "rw" mode; "r" will result a SIGSEGV. + arrays when possible. lazy_load : bool, optional When `True` and the underlying file handle is seekable, data @@ -760,8 +759,7 @@ def open(cls, fd, uri=None, mode='r', copy_arrays : bool, optional When `False`, when reading files, attempt to memmap underlying data - arrays when possible. Please note that the file must be opened - in "rw" mode; "r" will result a SIGSEGV. + arrays when possible. lazy_load : bool, optional When `True` and the underlying file handle is seekable, data