Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pint support for variables #3706

Merged
merged 23 commits into from
Feb 23, 2020
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
e5a6632
get fillna tests to pass
keewis Jan 17, 2020
9414fe3
get the _getitem_with_mask tests to pass
keewis Jan 17, 2020
12d2fe4
silence the behavior change warning of pint
keewis Jan 17, 2020
077d67a
don't use 0 as fill value since that has special behaviour
keewis Jan 17, 2020
e03fac8
use concat as a class method
keewis Jan 19, 2020
6b65a76
use np.pad after trimming instead of concatenating a filled array
keewis Jan 19, 2020
88320ef
rewrite the concat test to pass appropriate arrays
keewis Jan 19, 2020
1e07dce
use da.pad when dealing with dask arrays
keewis Jan 20, 2020
3942193
Merge branch 'master' into pint-support-variables
keewis Jan 22, 2020
ce572de
mark the failing pad tests as xfail when on a current pint version
keewis Jan 22, 2020
8bfc347
Merge branch 'master' into pint-support-variables
keewis Feb 5, 2020
3d16f2e
update whats-new.rst
keewis Feb 5, 2020
a8cf968
fix the import order
keewis Feb 5, 2020
9fdbd56
Merge branch 'master' into pint-support-variables
keewis Feb 5, 2020
98e2b40
test using pint master
keewis Feb 5, 2020
05946b1
fix the install command
keewis Feb 5, 2020
8c490ee
reimplement the pad test to really work with units
keewis Feb 5, 2020
f6eca88
use np.logical_not instead
keewis Feb 7, 2020
72241e5
use duck_array_ops to provide pad
keewis Feb 7, 2020
2ebfa6b
add comments explaining the order of the arguments to where
keewis Feb 7, 2020
0a0c2d3
mark the flipped parameter changes with a todo
keewis Feb 23, 2020
4cadc52
skip the identical tests
keewis Feb 23, 2020
b51caa4
remove the warnings filter
keewis Feb 23, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/azure/install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ steps:
git+https://github.com/zarr-developers/zarr \
git+https://github.com/Unidata/cftime \
git+https://github.com/mapbox/rasterio \
git+https://github.com/hgrecco/pint \
git+https://github.com/pydata/bottleneck
condition: eq(variables['UPSTREAM_DEV'], 'true')
displayName: Install upstream dev dependencies
Expand Down
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Breaking changes

New Features
~~~~~~~~~~~~
- implement pint support. (:issue:`3594`, :pull:`3706`)
By `Justus Magin <https://github.com/keewis>`_.

Bug fixes
~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def where_method(data, cond, other=dtypes.NA):


def fillna(data, other):
return where(isnull(data), other, data)
return where(notnull(data), data, other)

dcherian marked this conversation as resolved.
Show resolved Hide resolved

def concatenate(arrays, axis=0):
Expand Down
31 changes: 16 additions & 15 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,11 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA):

data = as_indexable(self._data)[actual_indexer]
mask = indexing.create_mask(indexer, self.shape, data)
data = duck_array_ops.where(mask, fill_value, data)
if isinstance(mask, bool):
mask = not mask
else:
mask = ~mask
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you flip the argument order rather than adding this? I’m a little puzzles here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the concern here is about consistency when applying ~ to bool objects and boolean dtype arrays, explicitly calling np.logical_not is a good alternative.

But it does feel a little weird to me to see this here. Maybe changing duck_array_ops.notnull would have the same effect?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as the fillna issue above, in order to get the results in the units of data, we need to flip the arguments and for that I need invert the mask (if there is a different way to flip the arguments without inverting, please do tell me).

I tried to use mask = ~mask, but ~ does not work as expected for bool. I'll use np.logical_not instead.

data = duck_array_ops.where(mask, data, fill_value)
else:
# array cannot be indexed along dimensions of size 0, so just
# build the mask directly instead.
Expand Down Expand Up @@ -1099,24 +1103,21 @@ def _shift_one_dim(self, dim, count, fill_value=dtypes.NA):
else:
dtype = self.dtype

shape = list(self.shape)
shape[axis] = min(abs(count), shape[axis])
width = min(abs(count), self.shape[axis])
dim_pad = (width, 0) if count >= 0 else (0, width)
pads = [(0, 0) if d != dim else dim_pad for d in self.dims]

if isinstance(trimmed_data, dask_array_type):
chunks = list(trimmed_data.chunks)
chunks[axis] = (shape[axis],)
full = functools.partial(da.full, chunks=chunks)
pad_func = da.pad
else:
full = np.full

filler = full(shape, fill_value, dtype=dtype)
pad_func = np.pad
keewis marked this conversation as resolved.
Show resolved Hide resolved

if count > 0:
arrays = [filler, trimmed_data]
else:
arrays = [trimmed_data, filler]

data = duck_array_ops.concatenate(arrays, axis)
data = pad_func(
trimmed_data.astype(dtype),
pads,
mode="constant",
constant_values=fill_value,
)

if isinstance(data, dask_array_type):
# chunked data should come out with the same chunks; this makes
Expand Down
99 changes: 63 additions & 36 deletions xarray/tests/test_units.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import operator
import warnings
from distutils.version import LooseVersion

import numpy as np
import pandas as pd
Expand All @@ -19,6 +21,13 @@
unit_registry = pint.UnitRegistry(force_ndarray=True)
Quantity = unit_registry.Quantity


# silence pint's BehaviorChangeWarning
with warnings.catch_warnings():
warnings.simplefilter("ignore")
Quantity([])
dcherian marked this conversation as resolved.
Show resolved Hide resolved


pytestmark = [
pytest.mark.skipif(
not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled"
Expand Down Expand Up @@ -1536,27 +1545,17 @@ def test_missing_value_detection(self, func):
@pytest.mark.parametrize(
"unit,error",
(
pytest.param(
1,
DimensionalityError,
id="no_unit",
marks=pytest.mark.xfail(reason="uses 0 as a replacement"),
),
pytest.param(1, DimensionalityError, id="no_unit"),
pytest.param(
unit_registry.dimensionless, DimensionalityError, id="dimensionless"
),
pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
pytest.param(
unit_registry.cm,
None,
id="compatible_unit",
marks=pytest.mark.xfail(reason="converts to fill value's unit"),
),
pytest.param(unit_registry.cm, None, id="compatible_unit"),
pytest.param(unit_registry.m, None, id="identical_unit"),
),
)
def test_missing_value_fillna(self, unit, error):
value = 0
value = 10
array = (
np.array(
[
Expand Down Expand Up @@ -1762,14 +1761,7 @@ def test_1d_math(self, func, unit, error, dtype):
unit_registry.dimensionless, DimensionalityError, id="dimensionless"
),
pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"),
pytest.param(
unit_registry.cm,
None,
id="compatible_unit",
marks=pytest.mark.xfail(
reason="getitem_with_mask converts to the unit of other"
),
),
pytest.param(unit_registry.cm, None, id="compatible_unit"),
pytest.param(unit_registry.m, None, id="identical_unit"),
),
)
Expand Down Expand Up @@ -1853,12 +1845,7 @@ def test_squeeze(self, dtype):
),
method("reduce", np.std, "x"),
method("round", 2),
pytest.param(
method("shift", {"x": -2}),
marks=pytest.mark.xfail(
reason="trying to concatenate ndarray to quantity"
),
),
method("shift", {"x": -2}),
method("transpose", "y", "x"),
),
ids=repr,
Expand Down Expand Up @@ -1933,7 +1920,6 @@ def test_unstack(self, dtype):
assert_units_equal(expected, actual)
xr.testing.assert_identical(expected, actual)

@pytest.mark.xfail(reason="ignores units")
@pytest.mark.parametrize(
"unit,error",
(
Expand All @@ -1948,25 +1934,28 @@ def test_unstack(self, dtype):
)
def test_concat(self, unit, error, dtype):
array1 = (
np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m
np.linspace(0, 5, 9 * 10).reshape(3, 6, 5).astype(dtype) * unit_registry.m
)
array2 = np.linspace(5, 10, 10 * 2).reshape(10, 2).astype(dtype) * unit
array2 = np.linspace(5, 10, 10 * 3).reshape(3, 2, 5).astype(dtype) * unit

variable = xr.Variable(("x", "y"), array1)
other = xr.Variable(("y", "z"), array2)
variable = xr.Variable(("x", "y", "z"), array1)
other = xr.Variable(("x", "y", "z"), array2)

if error is not None:
with pytest.raises(error):
variable.concat(other)
xr.Variable.concat([variable, other], dim="y")

return

units = extract_units(variable)
expected = attach_units(
strip_units(variable).concat(strip_units(convert_units(other, units))),
xr.Variable.concat(
[strip_units(variable), strip_units(convert_units(other, units))],
dim="y",
),
units,
)
actual = variable.concat(other)
actual = xr.Variable.concat([variable, other], dim="y")

assert_units_equal(expected, actual)
xr.testing.assert_identical(expected, actual)
Expand Down Expand Up @@ -2036,6 +2025,43 @@ def test_no_conflicts(self, unit, dtype):

assert expected == actual

def test_pad(self, dtype):
data = np.arange(4 * 3 * 2).reshape(4, 3, 2).astype(dtype) * unit_registry.m
v = xr.Variable(["x", "y", "z"], data)

xr_args = [{"x": (2, 1)}, {"y": (0, 3)}, {"x": (3, 1), "z": (2, 0)}]
np_args = [
((2, 1), (0, 0), (0, 0)),
((0, 0), (0, 3), (0, 0)),
((3, 1), (0, 0), (2, 0)),
]
for xr_arg, np_arg in zip(xr_args, np_args):
actual = v.pad_with_fill_value(**xr_arg)
expected = xr.Variable(
v.dims,
np.pad(
v.data.astype(float),
np_arg,
mode="constant",
constant_values=np.nan,
),
)
xr.testing.assert_identical(expected, actual)
assert_units_equal(expected, actual)
assert isinstance(actual._data, type(v._data))

# for the boolean array, we pad False
data = np.full_like(data, False, dtype=bool).reshape(4, 3, 2)
v = xr.Variable(["x", "y", "z"], data)
for xr_arg, np_arg in zip(xr_args, np_args):
actual = v.pad_with_fill_value(fill_value=data.flat[0], **xr_arg)
expected = xr.Variable(
v.dims,
np.pad(v.data, np_arg, mode="constant", constant_values=v.data.flat[0]),
)
xr.testing.assert_identical(actual, expected)
assert_units_equal(expected, actual)

@pytest.mark.parametrize(
"unit,error",
(
Expand All @@ -2044,7 +2070,8 @@ def test_no_conflicts(self, unit, dtype):
DimensionalityError,
id="no_unit",
marks=pytest.mark.xfail(
reason="is not treated the same as dimensionless"
LooseVersion(pint.__version__) < LooseVersion("0.10.2"),
reason="bug in pint's implementation of np.pad",
),
),
pytest.param(
Expand Down