From 9485861f7eba9f186ae232763054ee0470285e4b Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 14 Sep 2024 14:57:04 -0400 Subject: [PATCH] Opt out of floor division for float dtype time encoding --- doc/whats-new.rst | 5 +++- xarray/coding/times.py | 4 +-- xarray/tests/test_coding_times.py | 42 +++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index e6caac788b1..48229cc8406 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -46,7 +46,10 @@ Bug fixes - Make illegal path-like variable names when constructing a DataTree from a Dataset (:issue:`9339`, :pull:`9378`) By `Etienne Schalk `_. - +- Fix bug when encoding times with missing values as floats in the case when + the non-missing times could in theory be encoded with integers + (:issue:`9488`, :pull:`9497`). By `Spencer Clark + `_. Documentation diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cfdecd28a27..5655bd20afc 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -771,7 +771,7 @@ def _eagerly_encode_cf_datetime( # needed time delta to encode faithfully to int64 needed_time_delta = _time_units_to_timedelta64(needed_units) - floor_division = True + floor_division = np.issubdtype(dtype, np.integer) or dtype is None if time_delta > needed_time_delta: floor_division = False if dtype is None: @@ -892,7 +892,7 @@ def _eagerly_encode_cf_timedelta( # needed time delta to encode faithfully to int64 needed_time_delta = _time_units_to_timedelta64(needed_units) - floor_division = True + floor_division = np.issubdtype(dtype, np.integer) or dtype is None if time_delta > needed_time_delta: floor_division = False if dtype is None: diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 5879e6beed8..66caf25cc73 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1383,16 +1383,38 @@ def test_roundtrip_timedelta64_nanosecond_precision_warning() -> None: assert decoded_var.encoding["dtype"] == np.int64 -def test_roundtrip_float_times() -> None: - # Regression test for GitHub issue #8271 - fill_value = 20.0 - times = [ - np.datetime64("1970-01-01 00:00:00", "ns"), - np.datetime64("1970-01-01 06:00:00", "ns"), - np.datetime64("NaT", "ns"), - ] +_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS = { + "GH-8271": ( + 20.0, + np.array( + ["1970-01-01 00:00:00", "1970-01-01 06:00:00", "NaT"], + dtype="datetime64[ns]", + ), + "days since 1960-01-01", + np.array([3653, 3653.25, 20.0]), + ), + "GH-9488-datetime64[ns]": ( + 1.0e20, + np.array(["2010-01-01 12:00:00", "NaT"], dtype="datetime64[ns]"), + "seconds since 2010-01-01", + np.array([43200, 1.0e20]), + ), + "GH-9488-timedelta64[ns]": ( + 1.0e20, + np.array([1_000_000_000, "NaT"], dtype="timedelta64[ns]"), + "seconds", + np.array([1.0, 1.0e20]), + ), +} + - units = "days since 1960-01-01" +@pytest.mark.parametrize( + ("fill_value", "times", "units", "encoded_values"), + _TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.values(), + ids=_TEST_ROUNDTRIP_FLOAT_TIMES_TESTS.keys(), +) +def test_roundtrip_float_times(fill_value, times, units, encoded_values) -> None: + # Regression test for GitHub issues #8271 and #9488 var = Variable( ["time"], times, @@ -1400,7 +1422,7 @@ def test_roundtrip_float_times() -> None: ) encoded_var = conventions.encode_cf_variable(var) - np.testing.assert_array_equal(encoded_var, np.array([3653, 3653.25, 20.0])) + np.testing.assert_array_equal(encoded_var, encoded_values) assert encoded_var.attrs["units"] == units assert encoded_var.attrs["_FillValue"] == fill_value