From 328d0ca8361ab51a51ecaaec8b7a77b1ef7f1012 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 31 Jul 2023 14:08:38 -0400 Subject: [PATCH 1/5] Refactor bases classes --- CHANGES.rst | 5 +- tests/test_generic_indicators.py | 5 +- xclim/core/indicator.py | 117 +++++++++++++++++++---------- xclim/indicators/generic/_stats.py | 8 +- 4 files changed, 86 insertions(+), 49 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4a0dbc446..a0ccf2267 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,9 +14,9 @@ New features and enhancements Bug fixes ^^^^^^^^^ -* Fix `kldiv` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`). +* Fix ``kldiv`` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`). * Fix the registry entries of "generic" indicators. (:issue:`1423`, :pull:`1424`). -* Fix `jetstream_metric_woollings` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`). +* Fix ``jetstream_metric_woollings`` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`). * Fix virtual indicator attribute assignment causing individual indicator's realm to be ignored. (:issue:`1425`, :pull:`1426`). Internal changes @@ -25,6 +25,7 @@ Internal changes * Increased the guess of number of quantiles needed in ExtremeValues. (:pull:`1413`). * Tolerance thresholds for error in ``test_processing::test_adapt_freq`` have been relaxed to allow for more variation in the results. (:issue:`1417`, :pull:`1418`). * Added 'streamflow' to the list of known variables (:pull:`1431`). +* Refactor base indicator classes and fix misleading inheritance of ``return_level`` (:issue:`1263`, :pull:`1446`). v0.44.0 (2023-06-23) -------------------- diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index 8a45a5f2f..6bb8c4e79 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -87,8 +87,9 @@ def test_ndq(self, ndq_series): assert out.attrs["units"] == "m3 s-1" def test_missing(self, ndq_series): - a = ndq_series - a = ndq_series.where(~((a.time.dt.dayofyear == 5) * (a.time.dt.year == 1902))) + a = ndq_series.where( + ~((ndq_series.time.dt.dayofyear == 5) & (ndq_series.time.dt.year == 1902)) + ) assert a.shape == (5000, 2, 3) out = generic.stats(a, op="max", month=1) diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index 78cf15591..bb6927847 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1355,11 +1355,11 @@ def _show_deprecation_warning(self): ) -class ResamplingIndicator(Indicator): - """Indicator that performs a resampling computation. +class CheckMissingIndicator(Indicator): + """Class for indicators that completely reduce the time dimension, adding missing value checks. - Compared to the base Indicator, this adds the handling of missing data, - and the check of allowed periods. + A full reduction of the "time" dimension is expected by default: the missing step will fail if the output still has a time dimension. + To enable resampling, the :py:meth:`_get_missing_freq` method can be subclassed to return the resampling frequency. The method is always called with the indicator parameters. Parameters ---------- @@ -1368,24 +1368,10 @@ class ResamplingIndicator(Indicator): None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". missing_options : dict, optional Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. - allowed_periods : Sequence[str], optional - A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be - computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the - indicator doesn't take a `freq` argument. """ missing = "from_context" missing_options: dict | None = None - allowed_periods: list[str] | None = None - - @classmethod - def _ensure_correct_parameters(cls, parameters): - if "freq" not in parameters: - raise ValueError( - "ResamplingIndicator require a 'freq' argument, use the base Indicator" - " class if your computation doesn't perform any resampling." - ) - return super()._ensure_correct_parameters(parameters) def __init__(self, **kwds): if self.missing == "from_context" and self.missing_options is not None: @@ -1401,23 +1387,6 @@ def __init__(self, **kwds): super().__init__(**kwds) - def _preprocess_and_checks(self, das, params): - """Perform parent's checks and also check if freq is allowed.""" - das, params = super()._preprocess_and_checks(das, params) - - # Check if the period is allowed: - if ( - self.allowed_periods is not None - and parse_offset(params["freq"])[1] not in self.allowed_periods - ): - raise ValueError( - f"Resampling frequency {params['freq']} is not allowed for indicator " - f"{self.identifier} (needs something equivalent to one " - f"of {self.allowed_periods})." - ) - - return das, params - def _history_string(self, **kwargs): if self.missing == "from_context": missing = OPTIONS[CHECK_MISSING] @@ -1432,6 +1401,10 @@ def _history_string(self, **kwargs): return super()._history_string(**kwargs) + opt_str + def _get_missing_freq(self, params): + """Return the resampling frequency to be used in the missing values check.""" + return None + def _postprocess(self, outs, das, params): """Masking of missing values.""" outs = super()._postprocess(outs, das, params) @@ -1445,25 +1418,79 @@ def _postprocess(self, outs, das, params): # We flag periods according to the missing method. skip variables without a time coordinate. src_freq = self.src_freq if isinstance(self.src_freq, str) else None + freq = self._get_missing_freq(params) miss = ( - self._missing( - da, params["freq"], src_freq, options, params.get("indexer", {}) - ) + self._missing(da, freq, src_freq, options, params.get("indexer", {})) for da in das.values() if "time" in da.coords ) # Reduce by or and broadcast to ensure the same length in time # When indexing is used and there are no valid points in the last period, mask will not include it mask = reduce(np.logical_or, miss) - if isinstance(mask, DataArray) and mask.time.size < outs[0].time.size: + if ( + isinstance(mask, DataArray) + and "time" in mask.dims + and mask.time.size < outs[0].time.size + ): mask = mask.reindex(time=outs[0].time, fill_value=True) outs = [out.where(~mask) for out in outs] return outs -class ResamplingIndicatorWithIndexing(ResamplingIndicator): - """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" +class ResamplingIndicator(CheckMissingIndicator): + """Indicator that performs a resampling computation. + + Compared to the base Indicator, this adds the handling of missing data, + and the check of allowed periods. + + Parameters + ---------- + missing : {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, optional + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + allowed_periods : Sequence[str], optional + A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be + computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the + indicator doesn't take a `freq` argument. + """ + + allowed_periods: list[str] | None = None + + @classmethod + def _ensure_correct_parameters(cls, parameters): + if "freq" not in parameters: + raise ValueError( + "ResamplingIndicator require a 'freq' argument, use the base Indicator" + " class if your computation doesn't perform any resampling." + ) + return super()._ensure_correct_parameters(parameters) + + def _get_missing_freq(self, params): + return params["freq"] + + def _preprocess_and_checks(self, das, params): + """Perform parent's checks and also check if freq is allowed.""" + das, params = super()._preprocess_and_checks(das, params) + + # Check if the period is allowed: + if ( + self.allowed_periods is not None + and parse_offset(params["freq"])[1] not in self.allowed_periods + ): + raise ValueError( + f"Resampling frequency {params['freq']} is not allowed for indicator " + f"{self.identifier} (needs something equivalent to one " + f"of {self.allowed_periods})." + ) + + return das, params + + +class IndexingIndicator(Indicator): + """Indicator that also injects "indexer" kwargs to subset the inputs before computation.""" @classmethod def _injected_parameters(cls): @@ -1492,6 +1519,12 @@ def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, An return das, params +class ResamplingIndicatorWithIndexing(ResamplingIndicator, IndexingIndicator): + """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation.""" + + pass + + class Daily(ResamplingIndicator): """Class for daily inputs and resampling computes.""" @@ -1505,6 +1538,8 @@ class Hourly(ResamplingIndicator): base_registry["Indicator"] = Indicator +base_registry["CheckMissingIndicator"] = CheckMissingIndicator +base_registry["IndexingIndicator"] = IndexingIndicator base_registry["ResamplingIndicator"] = ResamplingIndicator base_registry["ResamplingIndicatorWithIndexing"] = ResamplingIndicatorWithIndexing base_registry["Hourly"] = Hourly diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index ec932d415..a563ae5e1 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -1,6 +1,6 @@ from __future__ import annotations -from xclim.core.indicator import Indicator, ResamplingIndicator +from xclim.core.indicator import CheckMissingIndicator, ResamplingIndicator from xclim.indices.generic import select_resample_op from xclim.indices.stats import fit as _fit from xclim.indices.stats import frequency_analysis @@ -8,7 +8,7 @@ __all__ = ["fit", "return_level", "stats"] -class Generic(Indicator): +class Generic(CheckMissingIndicator): realm = "generic" @@ -27,10 +27,11 @@ class GenericResampling(ResamplingIndicator): cell_methods="time: fit", compute=_fit, src_freq=None, + missing="skip", ) -return_level = GenericResampling( +return_level = Generic( title="Return level from frequency analysis", identifier="return_level", var_name="fa_{window}{mode:r}{indexer}", @@ -51,6 +52,5 @@ class GenericResampling(ResamplingIndicator): long_name="Daily statistics", description="{freq} {op} of daily values ({indexer}).", compute=select_resample_op, - missing="any", src_freq="D", ) From 39c96eeeae5b5228e5e560915cf276409f285187 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 31 Jul 2023 16:40:51 -0400 Subject: [PATCH 2/5] Accept YAQ in missing prepare - upd chgs - enable missing in fa and fit --- CHANGES.rst | 2 ++ tests/test_generic_indicators.py | 9 ++++++++- xclim/core/missing.py | 8 ++++---- xclim/indicators/generic/_stats.py | 2 -- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a0ccf2267..3fbc77cea 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,8 @@ New features and enhancements * Added ``ensembles.hawkins_sutton`` method to partition the uncertainty sources in a climate projection ensemble. (:issue:`771`, :pull:`1262`). * New function ``xclim.core.calendar.convert_doy`` to transform day-of-year data between calendars. Also accessible from ``convert_calendar`` with ``doy=True``. (:issue:`1283`, :pull:`1406`). * Add support for setting optional variables through the `ds` argument. (:issue:`1432`, :pull:`1435`). +* Missing value objects now support input timeseries of quarterly and yearly frequencies (:pull:`1446`). +* Missing value checks enabled for all "generic" indicators (``return_level``, ``fit`` and ``stats``) (:pull:`1446`). Bug fixes ^^^^^^^^^ diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py index 6bb8c4e79..8e9cc60a9 100644 --- a/tests/test_generic_indicators.py +++ b/tests/test_generic_indicators.py @@ -11,6 +11,7 @@ def test_simple(self, pr_ndseries, random): ts = generic.stats(pr, freq="YS", op="max") p = generic.fit(ts, dist="gumbel_r") assert p.attrs["estimator"] == "Maximum likelihood" + assert "time" not in p.dims def test_nan(self, pr_series, random): r = random.random(22) @@ -18,7 +19,10 @@ def test_nan(self, pr_series, random): pr = pr_series(r) out = generic.fit(pr, dist="norm") - assert not np.isnan(out.values[0]) + assert np.isnan(out.values[0]) + with set_options(check_missing="skip"): + out = generic.fit(pr, dist="norm") + assert not np.isnan(out.values[0]) def test_ndim(self, pr_ndseries, random): pr = pr_ndseries(random.random((100, 1, 2))) @@ -28,6 +32,9 @@ def test_ndim(self, pr_ndseries, random): def test_options(self, q_series, random): q = q_series(random.random(19)) + out = generic.fit(q, dist="norm") + np.testing.assert_array_equal(out.isnull(), False) + with set_options(missing_options={"at_least_n": {"n": 10}}): out = generic.fit(q, dist="norm") np.testing.assert_array_equal(out.isnull(), False) diff --git a/xclim/core/missing.py b/xclim/core/missing.py index f717758a4..45679e693 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -27,7 +27,7 @@ import numpy as np import xarray as xr -from .calendar import date_range, get_calendar, select_time +from .calendar import date_range, get_calendar, parse_offset, select_time from .options import ( CHECK_MISSING, MISSING_METHODS, @@ -106,8 +106,8 @@ def prepare(self, da, freq, src_timestep, **indexer): freq : str Resampling frequency defining the periods defined in https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling. - src_timestep : {"D", "H"} - Expected input frequency. + src_timestep : str + Expected input frequency. See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling. \*\*indexer : {dim: indexer}, optional Time attribute and values over which to subset the array. For example, use season='DJF' to select winter values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given, @@ -142,7 +142,7 @@ def prepare(self, da, freq, src_timestep, **indexer): start_time = i[:1] end_time = i[-1:] - if indexer or "M" in src_timestep: + if indexer or parse_offset(src_timestep)[1] in "YAQM": # Create a full synthetic time series and compare the number of days with the original series. t = date_range( start_time[0], diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index a563ae5e1..de6b5c51e 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -27,7 +27,6 @@ class GenericResampling(ResamplingIndicator): cell_methods="time: fit", compute=_fit, src_freq=None, - missing="skip", ) @@ -41,7 +40,6 @@ class GenericResampling(ResamplingIndicator): abstract="Frequency analysis on the basis of a given mode and distribution.", compute=frequency_analysis, src_freq="D", - missing="skip", ) From dc05b0f1ab938491ef29a5ce95d9a16775632aca Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 1 Aug 2023 14:31:33 -0400 Subject: [PATCH 3/5] offset_is_divisor and tests --- CHANGES.rst | 1 + tests/test_missing.py | 22 ++++++++++++++++++ xclim/core/calendar.py | 53 ++++++++++++++++++++++++++++++++++++++++++ xclim/core/missing.py | 17 ++++++++++++-- 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3fbc77cea..358662170 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,7 @@ New features and enhancements * Added ``ensembles.hawkins_sutton`` method to partition the uncertainty sources in a climate projection ensemble. (:issue:`771`, :pull:`1262`). * New function ``xclim.core.calendar.convert_doy`` to transform day-of-year data between calendars. Also accessible from ``convert_calendar`` with ``doy=True``. (:issue:`1283`, :pull:`1406`). * Add support for setting optional variables through the `ds` argument. (:issue:`1432`, :pull:`1435`). +* New ``xclim.core.calendar.offset_is_divisor`` to test if a given freq divides another one evenly (:pull:`1446`). * Missing value objects now support input timeseries of quarterly and yearly frequencies (:pull:`1446`). * Missing value checks enabled for all "generic" indicators (``return_level``, ``fit`` and ``stats``) (:pull:`1446`). diff --git a/tests/test_missing.py b/tests/test_missing.py index d0e501f2b..86fccf6cf 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -31,6 +31,21 @@ def test_monthly_input(self, random): mb = missing.MissingBase(ts, freq="AS", src_timestep="M", season="JJA") assert mb.count == 3 + def test_seasonal_input(self, random): + """Creating array with 11 seasons.""" + n = 11 + time = xr.cftime_range(start="2002-04-01", periods=n, freq="QS-JAN") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + mb = missing.MissingBase(ts, freq="YS", src_timestep="QS-JAN") + # Make sure count is 12, because we're requesting a YS freq. + np.testing.assert_array_equal(mb.count, [4, 4, 4, 1]) + + with pytest.raises( + NotImplementedError, + match="frequency that is not aligned with the source timestep.", + ): + missing.MissingBase(ts, freq="YS", src_timestep="QS-DEC") + class TestMissingAnyFills: def test_missing_days(self, tas_series): @@ -144,6 +159,13 @@ def test_hourly(self, pr_hr_series): out = missing.missing_any(pr, freq="MS") np.testing.assert_array_equal(out, [True, False, True]) + def test_seasonal(self, random): + n = 11 + time = xr.cftime_range(start="2002-01-01", periods=n, freq="QS-JAN") + ts = xr.DataArray(random.random(n), dims="time", coords={"time": time}) + out = missing.missing_any(ts, freq="YS") + np.testing.assert_array_equal(out, [False, False, True]) + class TestMissingWMO: def test_missing_days(self, tas_series): diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 504322f58..c06002e38 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -40,6 +40,7 @@ "get_calendar", "interp_calendar", "max_doy", + "offset_is_divisor", "parse_offset", "percentile_doy", "resample_doy", @@ -834,6 +835,58 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N ) +def offset_is_divisor(freqA: str, freqB: str): + """Check that freqA is a divisor of freqB. + + A frequency is a "divisor" of another if a whole number of periods of the + former fit within a single period of the latter. + + Parameters + ---------- + freqA: str + The divisor frequency. + freqB: str + The large frequency. + + Returns + ------- + bool + + Examples + -------- + >>> offset_is_divisor("QS-Jan", "YS") + True + >>> offset_is_divisor("QS-DEC", "AS-JUL") + False + >>> offset_is_divisor("D", "M") + True + """ + if compare_offsets(freqA, ">", freqB): + return False + # Reconstruct offsets anchored at the start of the period + # to have comparable quantities, also get "offset" objects + mA, bA, sA, aA = parse_offset(freqA) + offAs = pd.tseries.frequencies.to_offset(construct_offset(mA, bA, True, aA)) + + mB, bB, sB, aB = parse_offset(freqB) + offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB)) + tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13) + + if bA in "WDHTLUN" or bB in "WDHTLUN": + # Simple length comparison is sufficient for submonthly freqs + # In case one of bA or bB is > W, we test many to be sure. + tA = pd.date_range("1970-01-01T00:00:00", freq=offAs, periods=13) + return np.all( + (np.diff(tB)[:, np.newaxis] / np.diff(tA)[np.newaxis, :]) % 1 == 0 + ) + + # else, we test alignment with some real dates + # If both fall on offAs, then is means freqA is aligned with freqB at those dates + # if N=13 is True, then it is always True + # As freqA <= freqB, this means freqA is a "divisor" of freqB. + return all(offAs.is_on_offset(d) for d in tB) + + def _interpolate_doy_calendar( source: xr.DataArray, doy_max: int, doy_min: int = 1 ) -> xr.DataArray: diff --git a/xclim/core/missing.py b/xclim/core/missing.py index 45679e693..b02d7eeba 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -27,7 +27,13 @@ import numpy as np import xarray as xr -from .calendar import date_range, get_calendar, parse_offset, select_time +from .calendar import ( + date_range, + get_calendar, + offset_is_divisor, + parse_offset, + select_time, +) from .options import ( CHECK_MISSING, MISSING_METHODS, @@ -142,7 +148,14 @@ def prepare(self, da, freq, src_timestep, **indexer): start_time = i[:1] end_time = i[-1:] - if indexer or parse_offset(src_timestep)[1] in "YAQM": + if freq is not None and not offset_is_divisor(src_timestep, freq): + raise NotImplementedError( + "Missing checks not implemented for timeseries resampled to a frequency that is not " + f"aligned with the source timestep. {src_timestep} is not a divisor of {freq}." + ) + + offset = parse_offset(src_timestep) + if indexer or offset[1] in "YAQM": # Create a full synthetic time series and compare the number of days with the original series. t = date_range( start_time[0], From fdb93f9c978d62cf7861aaf9ef92fb1913354b4c Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 14 Aug 2023 10:57:20 -0400 Subject: [PATCH 4/5] Split check missing from reducingindicator --- xclim/core/indicator.py | 35 ++++++++++++++++++++++++------ xclim/indicators/generic/_stats.py | 4 ++-- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py index bb6927847..a568d611a 100644 --- a/xclim/core/indicator.py +++ b/xclim/core/indicator.py @@ -1356,10 +1356,12 @@ def _show_deprecation_warning(self): class CheckMissingIndicator(Indicator): - """Class for indicators that completely reduce the time dimension, adding missing value checks. + """Class adding missing value checks to indicators. - A full reduction of the "time" dimension is expected by default: the missing step will fail if the output still has a time dimension. - To enable resampling, the :py:meth:`_get_missing_freq` method can be subclassed to return the resampling frequency. The method is always called with the indicator parameters. + This should not be used as-is, but subclassed by implementing the `_get_missing_freq` method. + This method will be called in `_postprocess` using the compute parameters as only argument. + It should return a freq string, the same as the output freq of the computed data. + It can also be "None" to indicator the full time axis has been reduced, or "False" to skip the missing checks. Parameters ---------- @@ -1403,13 +1405,14 @@ def _history_string(self, **kwargs): def _get_missing_freq(self, params): """Return the resampling frequency to be used in the missing values check.""" - return None + raise NotImplementedError("Don't use `CheckMissingIndicator` directly.") def _postprocess(self, outs, das, params): """Masking of missing values.""" outs = super()._postprocess(outs, das, params) - if self.missing != "skip": + freq = self._get_missing_freq(params) + if self.missing != "skip" or freq is False: # Mask results that do not meet criteria defined by the `missing` method. # This means all outputs must have the same dimensions as the broadcasted inputs (excluding time) options = self.missing_options or OPTIONS[MISSING_OPTIONS].get( @@ -1418,7 +1421,6 @@ def _postprocess(self, outs, das, params): # We flag periods according to the missing method. skip variables without a time coordinate. src_freq = self.src_freq if isinstance(self.src_freq, str) else None - freq = self._get_missing_freq(params) miss = ( self._missing(da, freq, src_freq, options, params.get("indexer", {})) for da in das.values() @@ -1438,6 +1440,25 @@ def _postprocess(self, outs, das, params): return outs +class ReducingIndicator(CheckMissingIndicator): + """Indicator that performs a time-reducing computation. + + Compared to the base Indicator, this adds the handling of missing data. + + Parameters + ---------- + missing : {any, wmo, pct, at_least_n, skip, from_context} + The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If + None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context". + missing_options : dict, optional + Arguments to pass to the `missing` function. If None, this will be determined by the global configuration. + """ + + def _get_missing_freq(self, params): + """Return None, to indicate that the full time axis is to be reduced.""" + return None + + class ResamplingIndicator(CheckMissingIndicator): """Indicator that performs a resampling computation. @@ -1538,7 +1559,7 @@ class Hourly(ResamplingIndicator): base_registry["Indicator"] = Indicator -base_registry["CheckMissingIndicator"] = CheckMissingIndicator +base_registry["ReducingIndicator"] = ReducingIndicator base_registry["IndexingIndicator"] = IndexingIndicator base_registry["ResamplingIndicator"] = ResamplingIndicator base_registry["ResamplingIndicatorWithIndexing"] = ResamplingIndicatorWithIndexing diff --git a/xclim/indicators/generic/_stats.py b/xclim/indicators/generic/_stats.py index de6b5c51e..d8d8c6555 100644 --- a/xclim/indicators/generic/_stats.py +++ b/xclim/indicators/generic/_stats.py @@ -1,6 +1,6 @@ from __future__ import annotations -from xclim.core.indicator import CheckMissingIndicator, ResamplingIndicator +from xclim.core.indicator import ReducingIndicator, ResamplingIndicator from xclim.indices.generic import select_resample_op from xclim.indices.stats import fit as _fit from xclim.indices.stats import frequency_analysis @@ -8,7 +8,7 @@ __all__ = ["fit", "return_level", "stats"] -class Generic(CheckMissingIndicator): +class Generic(ReducingIndicator): realm = "generic" From e9cee54bc2523704a0144e375823c76871a5c61e Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 14 Aug 2023 11:01:43 -0400 Subject: [PATCH 5/5] rename offset_is_divisor and vars --- CHANGES.rst | 2 +- xclim/core/calendar.py | 26 +++++++++++++------------- xclim/core/missing.py | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 358662170..c611fee8d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,7 +11,7 @@ New features and enhancements * Added ``ensembles.hawkins_sutton`` method to partition the uncertainty sources in a climate projection ensemble. (:issue:`771`, :pull:`1262`). * New function ``xclim.core.calendar.convert_doy`` to transform day-of-year data between calendars. Also accessible from ``convert_calendar`` with ``doy=True``. (:issue:`1283`, :pull:`1406`). * Add support for setting optional variables through the `ds` argument. (:issue:`1432`, :pull:`1435`). -* New ``xclim.core.calendar.offset_is_divisor`` to test if a given freq divides another one evenly (:pull:`1446`). +* New ``xclim.core.calendar.is_offset_divisor`` to test if a given freq divides another one evenly (:pull:`1446`). * Missing value objects now support input timeseries of quarterly and yearly frequencies (:pull:`1446`). * Missing value checks enabled for all "generic" indicators (``return_level``, ``fit`` and ``stats``) (:pull:`1446`). diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index c06002e38..d785115b4 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -39,8 +39,8 @@ "ensure_cftime_array", "get_calendar", "interp_calendar", + "is_offset_divisor", "max_doy", - "offset_is_divisor", "parse_offset", "percentile_doy", "resample_doy", @@ -835,17 +835,17 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N ) -def offset_is_divisor(freqA: str, freqB: str): - """Check that freqA is a divisor of freqB. +def is_offset_divisor(divisor: str, offset: str): + """Check that divisor is a divisor of offset. A frequency is a "divisor" of another if a whole number of periods of the former fit within a single period of the latter. Parameters ---------- - freqA: str + divisor: str The divisor frequency. - freqB: str + offset: str The large frequency. Returns @@ -854,21 +854,21 @@ def offset_is_divisor(freqA: str, freqB: str): Examples -------- - >>> offset_is_divisor("QS-Jan", "YS") + >>> is_offset_divisor("QS-Jan", "YS") True - >>> offset_is_divisor("QS-DEC", "AS-JUL") + >>> is_offset_divisor("QS-DEC", "AS-JUL") False - >>> offset_is_divisor("D", "M") + >>> is_offset_divisor("D", "M") True """ - if compare_offsets(freqA, ">", freqB): + if compare_offsets(divisor, ">", offset): return False # Reconstruct offsets anchored at the start of the period # to have comparable quantities, also get "offset" objects - mA, bA, sA, aA = parse_offset(freqA) + mA, bA, sA, aA = parse_offset(divisor) offAs = pd.tseries.frequencies.to_offset(construct_offset(mA, bA, True, aA)) - mB, bB, sB, aB = parse_offset(freqB) + mB, bB, sB, aB = parse_offset(offset) offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB)) tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13) @@ -881,9 +881,9 @@ def offset_is_divisor(freqA: str, freqB: str): ) # else, we test alignment with some real dates - # If both fall on offAs, then is means freqA is aligned with freqB at those dates + # If both fall on offAs, then is means divisor is aligned with offset at those dates # if N=13 is True, then it is always True - # As freqA <= freqB, this means freqA is a "divisor" of freqB. + # As divisor <= offset, this means divisor is a "divisor" of offset. return all(offAs.is_on_offset(d) for d in tB) diff --git a/xclim/core/missing.py b/xclim/core/missing.py index b02d7eeba..953319683 100644 --- a/xclim/core/missing.py +++ b/xclim/core/missing.py @@ -30,7 +30,7 @@ from .calendar import ( date_range, get_calendar, - offset_is_divisor, + is_offset_divisor, parse_offset, select_time, ) @@ -148,7 +148,7 @@ def prepare(self, da, freq, src_timestep, **indexer): start_time = i[:1] end_time = i[-1:] - if freq is not None and not offset_is_divisor(src_timestep, freq): + if freq is not None and not is_offset_divisor(src_timestep, freq): raise NotImplementedError( "Missing checks not implemented for timeseries resampled to a frequency that is not " f"aligned with the source timestep. {src_timestep} is not a divisor of {freq}."