From fa76ab73a180a103fb48830d0c1d20f5ca2d1c99 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Mon, 17 Jun 2019 07:11:34 -0700 Subject: [PATCH 1/2] Ensure indexing explicitly indexed arrays don't leak out. Previously, indexing an ImplicitToExplicitIndexingAdapter object could directly return an ExplicitlyIndexed object, which could not be indexed normally. This resulted in broken behavior with dask's new `_meta` attribute. This change almost but not entirely fixes xarray on dask master. There are still errors raised inside two tests from dask's `blockwise_meta` helper function: > return meta.astype(dtype) E AttributeError: 'ImplicitToExplicitIndexingAdapter' object has no attribute 'astype' --- xarray/core/indexing.py | 8 +++++++- xarray/tests/test_indexing.py | 9 ++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 65a123c3319..1891b547048 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -453,7 +453,13 @@ def __array__(self, dtype=None): def __getitem__(self, key): key = expanded_indexer(key, self.ndim) - return self.array[self.indexer_cls(key)] + result = self.array[self.indexer_cls(key)] + if isinstance(result, ExplicitlyIndexed): + return type(self)(result, self.indexer_cls) + else: + # Sometimes explicitly indexed arrays return NumPy arrays or + # scalars. + return result class LazilyOuterIndexedArray(ExplicitlyIndexedNDArrayMixin): diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index 14b79c71ca4..d3cd8bd30ce 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -505,13 +505,20 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support): def test_implicit_indexing_adapter(): - array = np.arange(10) + array = np.arange(10, dtype=np.int64) implicit = indexing.ImplicitToExplicitIndexingAdapter( indexing.NumpyIndexingAdapter(array), indexing.BasicIndexer) np.testing.assert_array_equal(array, np.asarray(implicit)) np.testing.assert_array_equal(array, implicit[:]) +def test_implicit_indexing_adapter_copy_on_write(): + array = np.arange(10, dtype=np.int64) + implicit = indexing.ImplicitToExplicitIndexingAdapter( + indexing.CopyOnWriteArray(array)) + assert isinstance(implicit[:], indexing.ImplicitToExplicitIndexingAdapter) + + def test_outer_indexer_consistency_with_broadcast_indexes_vectorized(): def nonzero(x): if isinstance(x, np.ndarray) and x.dtype.kind == 'b': From bc9786d8de0f03ead62c3538856773ff1951641e Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 23 Jun 2019 14:10:27 +0300 Subject: [PATCH 2/2] Set meta in dask.array.from_array --- .travis.yml | 2 +- xarray/core/variable.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ee242ebf818..efa903f5083 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ matrix: - env: - CONDA_ENV=py36 - EXTRA_FLAGS="--run-flaky --run-network-tests" + - env: CONDA_ENV=py36-dask-dev - env: CONDA_ENV=py36-pandas-dev - env: CONDA_ENV=py36-rasterio - env: CONDA_ENV=py36-zarr-dev @@ -25,7 +26,6 @@ matrix: - env: CONDA_ENV=py36-hypothesis allow_failures: - - env: CONDA_ENV=py36-dask-dev - env: - CONDA_ENV=py36 - EXTRA_FLAGS="--run-flaky --run-network-tests" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ab1be181e31..cccb9663ad5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -3,6 +3,7 @@ import typing from collections import OrderedDict, defaultdict from datetime import timedelta +from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -870,6 +871,7 @@ def chunk(self, chunks=None, name=None, lock=False): ------- chunked : xarray.Variable """ + import dask import dask.array as da if utils.is_dict_like(chunks): @@ -892,7 +894,17 @@ def chunk(self, chunks=None, name=None, lock=False): # https://github.com/dask/dask/issues/2883 data = indexing.ImplicitToExplicitIndexingAdapter( data, indexing.OuterIndexer) - data = da.from_array(data, chunks, name=name, lock=lock) + + # For now, assume that all arrays that we wrap with dask (including + # our lazily loaded backend array classes) should use NumPy array + # operations. + if LooseVersion(dask.__version__) > '1.2.2': + kwargs = dict(meta=np.ndarray) + else: + kwargs = dict() + + data = da.from_array( + data, chunks, name=name, lock=lock, **kwargs) return type(self)(self.dims, data, self._attrs, self._encoding, fastpath=True)