Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
commit 398f1b6
Author: dcherian <[email protected]>
Date:   Fri May 20 08:47:56 2022 -0600

    Backward compatibility dask

commit bde40e4
Merge: 0783df3 4cae8d0
Author: dcherian <[email protected]>
Date:   Fri May 20 07:54:48 2022 -0600

    Merge branch 'main' into dask-datetime-to-numeric

    * main:
      concatenate docs style (pydata#6621)
      Typing for open_dataset/array/mfdataset and to_netcdf/zarr (pydata#6612)
      {full,zeros,ones}_like typing (pydata#6611)

commit 0783df3
Merge: 5cff4f1 8de7061
Author: dcherian <[email protected]>
Date:   Sun May 15 21:03:50 2022 -0600

    Merge branch 'main' into dask-datetime-to-numeric

    * main: (24 commits)
      Fix overflow issue in decode_cf_datetime for dtypes <= np.uint32 (pydata#6598)
      Enable flox in GroupBy and resample (pydata#5734)
      Add setuptools as dependency in ASV benchmark CI (pydata#6609)
      change polyval dim ordering (pydata#6601)
      re-add timedelta support for polyval (pydata#6599)
      Minor Dataset.map docstr clarification (pydata#6595)
      New inline_array kwarg for open_dataset (pydata#6566)
      Fix polyval overloads (pydata#6593)
      Restore old MultiIndex dropping behaviour (pydata#6592)
      [docs] add Dataset.assign_coords example (pydata#6336) (pydata#6558)
      Fix zarr append dtype checks (pydata#6476)
      Add missing space in exception message (pydata#6590)
      Doc Link to accessors list in extending-xarray.rst (pydata#6587)
      Fix Dataset/DataArray.isel with drop=True and scalar DataArray indexes (pydata#6579)
      Add some warnings about rechunking to the docs (pydata#6569)
      [pre-commit.ci] pre-commit autoupdate (pydata#6584)
      terminology.rst: fix link to Unidata's "netcdf_dataset_components" (pydata#6583)
      Allow string formatting of scalar DataArrays (pydata#5981)
      Fix mypy issues & reenable in tests (pydata#6581)
      polyval: Use Horner's algorithm + support chunked inputs (pydata#6548)
      ...

commit 5cff4f1
Merge: dfe200d 6144c61
Author: Maximilian Roos <[email protected]>
Date:   Sun May 1 15:16:33 2022 -0700

    Merge branch 'main' into dask-datetime-to-numeric

commit dfe200d
Author: dcherian <[email protected]>
Date:   Sun May 1 11:04:03 2022 -0600

    Minor cleanup

commit 35ed378
Author: dcherian <[email protected]>
Date:   Sun May 1 10:57:36 2022 -0600

    Support dask arrays in datetime_to_numeric
  • Loading branch information
dcherian committed May 20, 2022
1 parent ff5ad1e commit e5a9b60
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 13 deletions.
22 changes: 19 additions & 3 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,14 @@ def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float):
# Compute timedelta object.
# For np.datetime64, this can silently yield garbage due to overflow.
# One option is to enforce 1970-01-01 as the universal offset.
array = array - offset

# This map_blocks call is for backwards compatibility.
# dask == 2021.04.1 does not support subtracting object arrays
# which is required for cftime
if is_duck_dask_array(array):
array = array.map_blocks(lambda a, b: a - b, offset)
else:
array = array - offset

# Scalar is converted to 0d-array
if not hasattr(array, "dtype"):
Expand Down Expand Up @@ -517,10 +524,19 @@ def pd_timedelta_to_float(value, datetime_unit):
return np_timedelta64_to_float(value, datetime_unit)


def _timedelta_to_seconds(array):
return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6


def py_timedelta_to_float(array, datetime_unit):
"""Convert a timedelta object to a float, possibly at a loss of resolution."""
array = np.asarray(array)
array = np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6
array = asarray(array)
if is_duck_dask_array(array):
array = array.map_blocks(
_timedelta_to_seconds, meta=np.array([], dtype=np.float64)
)
else:
array = _timedelta_to_seconds(array)
conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit)
return conversion_factor * array

Expand Down
49 changes: 39 additions & 10 deletions xarray/tests/test_duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,39 +675,68 @@ def test_multiple_dims(dtype, dask, skipna, func):
assert_allclose(actual, expected)


def test_datetime_to_numeric_datetime64():
@pytest.mark.parametrize("dask", [True, False])
def test_datetime_to_numeric_datetime64(dask):
if dask and not has_dask:
pytest.skip("requires dask")

times = pd.date_range("2000", periods=5, freq="7D").values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
if dask:
import dask.array

times = dask.array.from_array(times, chunks=-1)

with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h")
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(times, offset=offset, datetime_unit="h")
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h"
)
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit="h", dtype=dtype
)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_datetime_to_numeric_cftime():
@pytest.mark.parametrize("dask", [True, False])
def test_datetime_to_numeric_cftime(dask):
if dask and not has_dask:
pytest.skip("requires dask")

times = cftime_range("2000", periods=5, freq="7D", calendar="standard").values
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
if dask:
import dask.array

times = dask.array.from_array(times, chunks=-1)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=int)
expected = 24 * np.arange(0, 35, 7)
np.testing.assert_array_equal(result, expected)

offset = times[1]
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h", dtype=int
)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, offset=offset, datetime_unit="h", dtype=int
)
expected = 24 * np.arange(-7, 28, 7)
np.testing.assert_array_equal(result, expected)

dtype = np.float32
result = duck_array_ops.datetime_to_numeric(times, datetime_unit="h", dtype=dtype)
with raise_if_dask_computes():
result = duck_array_ops.datetime_to_numeric(
times, datetime_unit="h", dtype=dtype
)
expected = 24 * np.arange(0, 35, 7).astype(dtype)
np.testing.assert_array_equal(result, expected)

Expand Down

0 comments on commit e5a9b60

Please sign in to comment.