Ouranosinc · aulemahal · Aug 31, 2023 · Jul 31, 2023 · Jul 31, 2023 · Aug 1, 2023
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -11,12 +11,15 @@ New features and enhancements
 * Added ``ensembles.hawkins_sutton`` method to partition the uncertainty sources in a climate projection ensemble. (:issue:`771`, :pull:`1262`).
 * New function ``xclim.core.calendar.convert_doy`` to transform day-of-year data between calendars. Also accessible from ``convert_calendar`` with ``doy=True``. (:issue:`1283`, :pull:`1406`).
 * Add support for setting optional variables through the `ds` argument. (:issue:`1432`, :pull:`1435`).
+* New ``xclim.core.calendar.offset_is_divisor`` to test if a given freq divides another one evenly (:pull:`1446`).
+* Missing value objects now support input timeseries of quarterly and yearly frequencies (:pull:`1446`).
+* Missing value checks enabled for all "generic" indicators (``return_level``, ``fit`` and ``stats``) (:pull:`1446`).
 
 Bug fixes
 ^^^^^^^^^
-* Fix `kldiv` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`).
+* Fix ``kldiv`` docstring so the math formula renders to HTML. (:issue:`1408`, :pull:`1409`).
 * Fix the registry entries of "generic" indicators. (:issue:`1423`, :pull:`1424`).
-* Fix `jetstream_metric_woollings` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`).
+* Fix ``jetstream_metric_woollings`` so it uses the `vertical` coordinate identified by `cf-xarray`, instead of `pressure`. (:issue:`1421`, :pull:`1422`). Add logic to handle coordinates in decreasing order, or for longitudes defined from 0-360 instead of -180 to 180. (:issue:`1429`, :pull:`1430`).
 * Fix virtual indicator attribute assignment causing individual indicator's realm to be ignored. (:issue:`1425`, :pull:`1426`).
 
 Internal changes
@@ -25,6 +28,7 @@ Internal changes
 * Increased the guess of number of quantiles needed in ExtremeValues. (:pull:`1413`).
 * Tolerance thresholds for error in ``test_processing::test_adapt_freq`` have been relaxed to allow for more variation in the results. (:issue:`1417`, :pull:`1418`).
 * Added 'streamflow' to the list of known variables (:pull:`1431`).
+* Refactor base indicator classes and fix misleading inheritance of ``return_level`` (:issue:`1263`, :pull:`1446`).
 
 v0.44.0 (2023-06-23)
 --------------------

diff --git a/tests/test_generic_indicators.py b/tests/test_generic_indicators.py
@@ -11,14 +11,18 @@ def test_simple(self, pr_ndseries, random):
         ts = generic.stats(pr, freq="YS", op="max")
         p = generic.fit(ts, dist="gumbel_r")
         assert p.attrs["estimator"] == "Maximum likelihood"
+        assert "time" not in p.dims
 
     def test_nan(self, pr_series, random):
         r = random.random(22)
         r[0] = np.nan
         pr = pr_series(r)
 
         out = generic.fit(pr, dist="norm")
-        assert not np.isnan(out.values[0])
+        assert np.isnan(out.values[0])
+        with set_options(check_missing="skip"):
+            out = generic.fit(pr, dist="norm")
+            assert not np.isnan(out.values[0])
 
     def test_ndim(self, pr_ndseries, random):
         pr = pr_ndseries(random.random((100, 1, 2)))
@@ -28,6 +32,9 @@ def test_ndim(self, pr_ndseries, random):
 
     def test_options(self, q_series, random):
         q = q_series(random.random(19))
+        out = generic.fit(q, dist="norm")
+        np.testing.assert_array_equal(out.isnull(), False)
+
         with set_options(missing_options={"at_least_n": {"n": 10}}):
             out = generic.fit(q, dist="norm")
         np.testing.assert_array_equal(out.isnull(), False)
@@ -87,8 +94,9 @@ def test_ndq(self, ndq_series):
         assert out.attrs["units"] == "m3 s-1"
 
     def test_missing(self, ndq_series):
-        a = ndq_series
-        a = ndq_series.where(~((a.time.dt.dayofyear == 5) * (a.time.dt.year == 1902)))
+        a = ndq_series.where(
+            ~((ndq_series.time.dt.dayofyear == 5) & (ndq_series.time.dt.year == 1902))
+        )
         assert a.shape == (5000, 2, 3)
         out = generic.stats(a, op="max", month=1)
 

diff --git a/tests/test_missing.py b/tests/test_missing.py
@@ -31,6 +31,21 @@ def test_monthly_input(self, random):
         mb = missing.MissingBase(ts, freq="AS", src_timestep="M", season="JJA")
         assert mb.count == 3
 
+    def test_seasonal_input(self, random):
+        """Creating array with 11 seasons."""
+        n = 11
+        time = xr.cftime_range(start="2002-04-01", periods=n, freq="QS-JAN")
+        ts = xr.DataArray(random.random(n), dims="time", coords={"time": time})
+        mb = missing.MissingBase(ts, freq="YS", src_timestep="QS-JAN")
+        # Make sure count is 12, because we're requesting a YS freq.
+        np.testing.assert_array_equal(mb.count, [4, 4, 4, 1])
+
+        with pytest.raises(
+            NotImplementedError,
+            match="frequency that is not aligned with the source timestep.",
+        ):
+            missing.MissingBase(ts, freq="YS", src_timestep="QS-DEC")
+
 
 class TestMissingAnyFills:
     def test_missing_days(self, tas_series):
@@ -144,6 +159,13 @@ def test_hourly(self, pr_hr_series):
         out = missing.missing_any(pr, freq="MS")
         np.testing.assert_array_equal(out, [True, False, True])
 
+    def test_seasonal(self, random):
+        n = 11
+        time = xr.cftime_range(start="2002-01-01", periods=n, freq="QS-JAN")
+        ts = xr.DataArray(random.random(n), dims="time", coords={"time": time})
+        out = missing.missing_any(ts, freq="YS")
+        np.testing.assert_array_equal(out, [False, False, True])
+
 
 class TestMissingWMO:
     def test_missing_days(self, tas_series):

diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py
@@ -40,6 +40,7 @@
     "get_calendar",
     "interp_calendar",
     "max_doy",
+    "offset_is_divisor",
     "parse_offset",
     "percentile_doy",
     "resample_doy",
@@ -834,6 +835,58 @@ def construct_offset(mult: int, base: str, start_anchored: bool, anchor: str | N
     )
 
 
+def offset_is_divisor(freqA: str, freqB: str):
+    """Check that freqA is a divisor of freqB.
+
+    A frequency is a "divisor" of another if a whole number of periods of the
+    former fit within a single period of the latter.
+
+    Parameters
+    ----------
+    freqA: str
+      The divisor frequency.
+    freqB: str
+      The large frequency.
+
+    Returns
+    -------
+    bool
+
+    Examples
+    --------
+    >>> offset_is_divisor("QS-Jan", "YS")
+    True
+    >>> offset_is_divisor("QS-DEC", "AS-JUL")
+    False
+    >>> offset_is_divisor("D", "M")
+    True
+    """
+    if compare_offsets(freqA, ">", freqB):
+        return False
+    # Reconstruct offsets anchored at the start of the period
+    # to have comparable quantities, also get "offset" objects
+    mA, bA, sA, aA = parse_offset(freqA)
+    offAs = pd.tseries.frequencies.to_offset(construct_offset(mA, bA, True, aA))
+
+    mB, bB, sB, aB = parse_offset(freqB)
+    offBs = pd.tseries.frequencies.to_offset(construct_offset(mB, bB, True, aB))
+    tB = pd.date_range("1970-01-01T00:00:00", freq=offBs, periods=13)
+
+    if bA in "WDHTLUN" or bB in "WDHTLUN":
+        # Simple length comparison is sufficient for submonthly freqs
+        # In case one of bA or bB is > W, we test many to be sure.
+        tA = pd.date_range("1970-01-01T00:00:00", freq=offAs, periods=13)
+        return np.all(
+            (np.diff(tB)[:, np.newaxis] / np.diff(tA)[np.newaxis, :]) % 1 == 0
+        )
+
+    # else, we test alignment with some real dates
+    # If both fall on offAs, then is means freqA is aligned with freqB at those dates
+    # if N=13 is True, then it is always True
+    # As freqA <= freqB, this means freqA is a "divisor" of freqB.
+    return all(offAs.is_on_offset(d) for d in tB)
+
+
 def _interpolate_doy_calendar(
     source: xr.DataArray, doy_max: int, doy_min: int = 1
 ) -> xr.DataArray:

diff --git a/xclim/core/indicator.py b/xclim/core/indicator.py
@@ -1355,11 +1355,11 @@ def _show_deprecation_warning(self):
         )
 
 
-class ResamplingIndicator(Indicator):
-    """Indicator that performs a resampling computation.
+class CheckMissingIndicator(Indicator):
+    """Class for indicators that completely reduce the time dimension, adding missing value checks.
 
-    Compared to the base Indicator, this adds the handling of missing data,
-    and the check of allowed periods.
+    A full reduction of the "time" dimension is expected by default: the missing step will fail if the output still has a time dimension.
+    To enable resampling, the :py:meth:`_get_missing_freq` method can be subclassed to return the resampling frequency. The method is always called with the indicator parameters.
 
     Parameters
     ----------
@@ -1368,24 +1368,10 @@ class ResamplingIndicator(Indicator):
       None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context".
     missing_options : dict, optional
       Arguments to pass to the `missing` function. If None, this will be determined by the global configuration.
-    allowed_periods : Sequence[str], optional
-      A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be
-      computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the
-      indicator doesn't take a `freq` argument.
     """
 
     missing = "from_context"
     missing_options: dict | None = None
-    allowed_periods: list[str] | None = None
-
-    @classmethod
-    def _ensure_correct_parameters(cls, parameters):
-        if "freq" not in parameters:
-            raise ValueError(
-                "ResamplingIndicator require a 'freq' argument, use the base Indicator"
-                " class if your computation doesn't perform any resampling."
-            )
-        return super()._ensure_correct_parameters(parameters)
 
     def __init__(self, **kwds):
         if self.missing == "from_context" and self.missing_options is not None:
@@ -1401,23 +1387,6 @@ def __init__(self, **kwds):
 
         super().__init__(**kwds)
 
-    def _preprocess_and_checks(self, das, params):
-        """Perform parent's checks and also check if freq is allowed."""
-        das, params = super()._preprocess_and_checks(das, params)
-
-        # Check if the period is allowed:
-        if (
-            self.allowed_periods is not None
-            and parse_offset(params["freq"])[1] not in self.allowed_periods
-        ):
-            raise ValueError(
-                f"Resampling frequency {params['freq']} is not allowed for indicator "
-                f"{self.identifier} (needs something equivalent to one "
-                f"of {self.allowed_periods})."
-            )
-
-        return das, params
-
     def _history_string(self, **kwargs):
         if self.missing == "from_context":
             missing = OPTIONS[CHECK_MISSING]
@@ -1432,6 +1401,10 @@ def _history_string(self, **kwargs):
 
         return super()._history_string(**kwargs) + opt_str
 
+    def _get_missing_freq(self, params):
+        """Return the resampling frequency to be used in the missing values check."""
+        return None
+
     def _postprocess(self, outs, das, params):
         """Masking of missing values."""
         outs = super()._postprocess(outs, das, params)
@@ -1445,25 +1418,79 @@ def _postprocess(self, outs, das, params):
 
             # We flag periods according to the missing method. skip variables without a time coordinate.
             src_freq = self.src_freq if isinstance(self.src_freq, str) else None
+            freq = self._get_missing_freq(params)
             miss = (
-                self._missing(
-                    da, params["freq"], src_freq, options, params.get("indexer", {})
-                )
+                self._missing(da, freq, src_freq, options, params.get("indexer", {}))
                 for da in das.values()
                 if "time" in da.coords
             )
             # Reduce by or and broadcast to ensure the same length in time
             # When indexing is used and there are no valid points in the last period, mask will not include it
             mask = reduce(np.logical_or, miss)
-            if isinstance(mask, DataArray) and mask.time.size < outs[0].time.size:
+            if (
+                isinstance(mask, DataArray)
+                and "time" in mask.dims
+                and mask.time.size < outs[0].time.size
+            ):
                 mask = mask.reindex(time=outs[0].time, fill_value=True)
             outs = [out.where(~mask) for out in outs]
 
         return outs
 
 
-class ResamplingIndicatorWithIndexing(ResamplingIndicator):
-    """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation."""
+class ResamplingIndicator(CheckMissingIndicator):
+    """Indicator that performs a resampling computation.
+
+    Compared to the base Indicator, this adds the handling of missing data,
+    and the check of allowed periods.
+
+    Parameters
+    ----------
+    missing : {any, wmo, pct, at_least_n, skip, from_context}
+      The name of the missing value method. See `xclim.core.missing.MissingBase` to create new custom methods. If
+      None, this will be determined by the global configuration (see `xclim.set_options`). Defaults to "from_context".
+    missing_options : dict, optional
+      Arguments to pass to the `missing` function. If None, this will be determined by the global configuration.
+    allowed_periods : Sequence[str], optional
+      A list of allowed periods, i.e. base parts of the `freq` parameter. For example, indicators meant to be
+      computed annually only will have `allowed_periods=["A"]`. `None` means "any period" or that the
+      indicator doesn't take a `freq` argument.
+    """
+
+    allowed_periods: list[str] | None = None
+
+    @classmethod
+    def _ensure_correct_parameters(cls, parameters):
+        if "freq" not in parameters:
+            raise ValueError(
+                "ResamplingIndicator require a 'freq' argument, use the base Indicator"
+                " class if your computation doesn't perform any resampling."
+            )
+        return super()._ensure_correct_parameters(parameters)
+
+    def _get_missing_freq(self, params):
+        return params["freq"]
+
+    def _preprocess_and_checks(self, das, params):
+        """Perform parent's checks and also check if freq is allowed."""
+        das, params = super()._preprocess_and_checks(das, params)
+
+        # Check if the period is allowed:
+        if (
+            self.allowed_periods is not None
+            and parse_offset(params["freq"])[1] not in self.allowed_periods
+        ):
+            raise ValueError(
+                f"Resampling frequency {params['freq']} is not allowed for indicator "
+                f"{self.identifier} (needs something equivalent to one "
+                f"of {self.allowed_periods})."
+            )
+
+        return das, params
+
+
+class IndexingIndicator(Indicator):
+    """Indicator that also injects "indexer" kwargs to subset the inputs before computation."""
 
     @classmethod
     def _injected_parameters(cls):
@@ -1492,6 +1519,12 @@ def _preprocess_and_checks(self, das: dict[str, DataArray], params: dict[str, An
         return das, params
 
 
+class ResamplingIndicatorWithIndexing(ResamplingIndicator, IndexingIndicator):
+    """Resampling indicator that also injects "indexer" kwargs to subset the inputs before computation."""
+
+    pass
+
+
 class Daily(ResamplingIndicator):
     """Class for daily inputs and resampling computes."""
 
@@ -1505,6 +1538,8 @@ class Hourly(ResamplingIndicator):
 
 
 base_registry["Indicator"] = Indicator
+base_registry["CheckMissingIndicator"] = CheckMissingIndicator
+base_registry["IndexingIndicator"] = IndexingIndicator
 base_registry["ResamplingIndicator"] = ResamplingIndicator
 base_registry["ResamplingIndicatorWithIndexing"] = ResamplingIndicatorWithIndexing
 base_registry["Hourly"] = Hourly

diff --git a/xclim/core/missing.py b/xclim/core/missing.py
@@ -27,7 +27,13 @@
 import numpy as np
 import xarray as xr
 
-from .calendar import date_range, get_calendar, select_time
+from .calendar import (
+    date_range,
+    get_calendar,
+    offset_is_divisor,
+    parse_offset,
+    select_time,
+)
 from .options import (
     CHECK_MISSING,
     MISSING_METHODS,
@@ -106,8 +112,8 @@ def prepare(self, da, freq, src_timestep, **indexer):
         freq : str
             Resampling frequency defining the periods defined in
             https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling.
-        src_timestep : {"D", "H"}
-            Expected input frequency.
+        src_timestep : str
+            Expected input frequency. See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#resampling.
         \*\*indexer : {dim: indexer}, optional
             Time attribute and values over which to subset the array. For example, use season='DJF' to select winter
             values, month=1 to select January, or month=[6,7,8] to select summer months. If not indexer is given,
@@ -142,7 +148,14 @@ def prepare(self, da, freq, src_timestep, **indexer):
             start_time = i[:1]
             end_time = i[-1:]
 
-        if indexer or "M" in src_timestep:
+        if freq is not None and not offset_is_divisor(src_timestep, freq):
+            raise NotImplementedError(
+                "Missing checks not implemented for timeseries resampled to a frequency that is not "
+                f"aligned with the source timestep. {src_timestep} is not a divisor of {freq}."
+            )
+
+        offset = parse_offset(src_timestep)
+        if indexer or offset[1] in "YAQM":
             # Create a full synthetic time series and compare the number of days with the original series.
             t = date_range(
                 start_time[0],