diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 552d11a06dc..c96b0aa5c3b 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -351,6 +351,36 @@ IndexVariable.sizes IndexVariable.values + + namedarray.core.NamedArray.all + namedarray.core.NamedArray.any + namedarray.core.NamedArray.attrs + namedarray.core.NamedArray.chunks + namedarray.core.NamedArray.chunksizes + namedarray.core.NamedArray.copy + namedarray.core.NamedArray.count + namedarray.core.NamedArray.cumprod + namedarray.core.NamedArray.cumsum + namedarray.core.NamedArray.data + namedarray.core.NamedArray.dims + namedarray.core.NamedArray.dtype + namedarray.core.NamedArray.get_axis_num + namedarray.core.NamedArray.max + namedarray.core.NamedArray.mean + namedarray.core.NamedArray.median + namedarray.core.NamedArray.min + namedarray.core.NamedArray.nbytes + namedarray.core.NamedArray.ndim + namedarray.core.NamedArray.prod + namedarray.core.NamedArray.reduce + namedarray.core.NamedArray.shape + namedarray.core.NamedArray.size + namedarray.core.NamedArray.sizes + namedarray.core.NamedArray.std + namedarray.core.NamedArray.sum + namedarray.core.NamedArray.var + + plot.plot plot.line plot.step diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index 5cdbc732741..d320eef1bbf 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -15,7 +15,6 @@ ) from xarray.core.common import ImplementsArrayReduce, ImplementsDatasetReduce from xarray.core.ops import ( - IncludeCumMethods, IncludeNumpySameMethods, IncludeReduceMethods, ) @@ -99,8 +98,6 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): class VariableArithmetic( ImplementsArrayReduce, - IncludeReduceMethods, - IncludeCumMethods, IncludeNumpySameMethods, SupportsArithmetic, VariableOpsMixin, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ebd6fb6f51f..ef12d566517 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6280,7 +6280,7 @@ def dropna( array = self._variables[k] if dim in array.dims: dims = [d for d in array.dims if d != dim] - count += np.asarray(array.count(dims)) # type: ignore[attr-defined] + count += np.asarray(array.count(dims)) size += math.prod([self.dims[d] for d in dims]) if thresh is not None: diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index d949cbdfbd1..3627554cf57 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -28,7 +28,7 @@ def _load_static_files(): ] -def short_data_repr_html(array): +def short_data_repr_html(array) -> str: """Format "data" for DataArray and Variable.""" internal_data = getattr(array, "variable", array)._data if hasattr(internal_data, "_repr_html_"): @@ -37,7 +37,7 @@ def short_data_repr_html(array): return f"
{text}
" -def format_dims(dims, dims_with_index): +def format_dims(dims, dims_with_index) -> str: if not dims: return "" @@ -53,7 +53,7 @@ def format_dims(dims, dims_with_index): return f"" -def summarize_attrs(attrs): +def summarize_attrs(attrs) -> str: attrs_dl = "".join( f"
{escape(str(k))} :
" f"
{escape(str(v))}
" for k, v in attrs.items() @@ -62,7 +62,7 @@ def summarize_attrs(attrs): return f"
{attrs_dl}
" -def _icon(icon_name): +def _icon(icon_name) -> str: # icon_name should be defined in xarray/static/html/icon-svg-inline.html return ( f"" @@ -72,7 +72,7 @@ def _icon(icon_name): ) -def summarize_variable(name, var, is_index=False, dtype=None): +def summarize_variable(name, var, is_index=False, dtype=None) -> str: variable = var.variable if hasattr(var, "variable") else var cssclass_idx = " class='xr-has-index'" if is_index else "" @@ -109,7 +109,7 @@ def summarize_variable(name, var, is_index=False, dtype=None): ) -def summarize_coords(variables): +def summarize_coords(variables) -> str: li_items = [] for k, v in variables.items(): li_content = summarize_variable(k, v, is_index=k in variables.xindexes) @@ -120,7 +120,7 @@ def summarize_coords(variables): return f"" -def summarize_vars(variables): +def summarize_vars(variables) -> str: vars_li = "".join( f"
  • {summarize_variable(k, v)}
  • " for k, v in variables.items() @@ -129,14 +129,14 @@ def summarize_vars(variables): return f"" -def short_index_repr_html(index): +def short_index_repr_html(index) -> str: if hasattr(index, "_repr_html_"): return index._repr_html_() return f"
    {escape(repr(index))}
    " -def summarize_index(coord_names, index): +def summarize_index(coord_names, index) -> str: name = "
    ".join([escape(str(n)) for n in coord_names]) index_id = f"index-{uuid.uuid4()}" @@ -155,7 +155,7 @@ def summarize_index(coord_names, index): ) -def summarize_indexes(indexes): +def summarize_indexes(indexes) -> str: indexes_li = "".join( f"
  • {summarize_index(v, i)}
  • " for v, i in indexes.items() @@ -165,7 +165,7 @@ def summarize_indexes(indexes): def collapsible_section( name, inline_details="", details="", n_items=None, enabled=True, collapsed=False -): +) -> str: # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) @@ -187,7 +187,7 @@ def collapsible_section( def _mapping_section( mapping, name, details_func, max_items_collapse, expand_option_name, enabled=True -): +) -> str: n_items = len(mapping) expanded = _get_boolean_with_default( expand_option_name, n_items < max_items_collapse @@ -203,7 +203,7 @@ def _mapping_section( ) -def dim_section(obj): +def dim_section(obj) -> str: dim_list = format_dims(obj.dims, obj.xindexes.dims) return collapsible_section( @@ -211,7 +211,7 @@ def dim_section(obj): ) -def array_section(obj): +def array_section(obj) -> str: # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) collapsed = ( @@ -296,7 +296,7 @@ def _obj_repr(obj, header_components, sections): ) -def array_repr(arr): +def array_repr(arr) -> str: dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape)) if hasattr(arr, "xindexes"): indexed_dims = arr.xindexes.dims @@ -326,7 +326,7 @@ def array_repr(arr): return _obj_repr(arr, header_components, sections) -def dataset_repr(ds): +def dataset_repr(ds) -> str: obj_type = f"xarray.{type(ds).__name__}" header_components = [f"
    {escape(obj_type)}
    "] diff --git a/xarray/core/ops.py b/xarray/core/ops.py index e1c3573841a..b23d586fb79 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -53,7 +53,6 @@ "var", "median", ] -NAN_CUM_METHODS = ["cumsum", "cumprod"] # TODO: wrap take, dot, sort @@ -263,20 +262,6 @@ def inject_reduce_methods(cls): setattr(cls, name, func) -def inject_cum_methods(cls): - methods = [(name, getattr(duck_array_ops, name), True) for name in NAN_CUM_METHODS] - for name, f, include_skipna in methods: - numeric_only = getattr(f, "numeric_only", False) - func = cls._reduce_method(f, include_skipna, numeric_only) - func.__name__ = name - func.__doc__ = _CUM_DOCSTRING_TEMPLATE.format( - name=name, - cls=cls.__name__, - extra_args=cls._cum_extra_args_docstring.format(name=name), - ) - setattr(cls, name, func) - - def op_str(name): return f"__{name}__" @@ -316,16 +301,6 @@ def __init_subclass__(cls, **kwargs): inject_reduce_methods(cls) -class IncludeCumMethods: - __slots__ = () - - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - - if getattr(cls, "_reduce_method", None): - inject_cum_methods(cls) - - class IncludeNumpySameMethods: __slots__ = () diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 12d81fc8601..15ea45263d5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -5,7 +5,7 @@ import math import numbers import warnings -from collections.abc import Hashable, Iterable, Mapping, Sequence +from collections.abc import Hashable, Mapping, Sequence from datetime import timedelta from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast @@ -1704,7 +1704,7 @@ def clip(self, min=None, max=None): return apply_ufunc(np.clip, self, min, max, dask="allowed") - def reduce( + def reduce( # type: ignore[override] self, func: Callable[..., Any], dim: Dims = None, @@ -1745,59 +1745,21 @@ def reduce( Array with summarized data and the indicated dimension(s) removed. """ - if dim == ...: - dim = None - if dim is not None and axis is not None: - raise ValueError("cannot supply both 'axis' and 'dim' arguments") - - if dim is not None: - axis = self.get_axis_num(dim) - - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", r"Mean of empty slice", category=RuntimeWarning - ) - if axis is not None: - if isinstance(axis, tuple) and len(axis) == 1: - # unpack axis for the benefit of functions - # like np.argmin which can't handle tuple arguments - axis = axis[0] - data = func(self.data, axis=axis, **kwargs) - else: - data = func(self.data, **kwargs) - - if getattr(data, "shape", ()) == self.shape: - dims = self.dims - else: - removed_axes: Iterable[int] - if axis is None: - removed_axes = range(self.ndim) - else: - removed_axes = np.atleast_1d(axis) % self.ndim - if keepdims: - # Insert np.newaxis for removed dims - slices = tuple( - np.newaxis if i in removed_axes else slice(None, None) - for i in range(self.ndim) - ) - if getattr(data, "shape", None) is None: - # Reduce has produced a scalar value, not an array-like - data = np.asanyarray(data)[slices] - else: - data = data[slices] - dims = self.dims - else: - dims = tuple( - adim for n, adim in enumerate(self.dims) if n not in removed_axes - ) + keep_attrs_ = ( + _get_keep_attrs(default=False) if keep_attrs is None else keep_attrs + ) - if keep_attrs is None: - keep_attrs = _get_keep_attrs(default=False) - attrs = self._attrs if keep_attrs else None + # Noe that the call order for Variable.mean is + # Variable.mean -> NamedArray.mean -> Variable.reduce + # -> NamedArray.reduce + result = super().reduce( + func=func, dim=dim, axis=axis, keepdims=keepdims, **kwargs + ) - # We need to return `Variable` rather than the type of `self` at the moment, ref - # #8216 - return Variable(dims, data, attrs=attrs) + # return Variable always to support IndexVariable + return Variable( + result.dims, result._data, attrs=result._attrs if keep_attrs_ else None + ) @classmethod def concat( diff --git a/xarray/namedarray/_aggregations.py b/xarray/namedarray/_aggregations.py new file mode 100644 index 00000000000..76dfb18d068 --- /dev/null +++ b/xarray/namedarray/_aggregations.py @@ -0,0 +1,949 @@ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_aggregations. Do not edit manually. + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any, Callable + +from xarray.core import duck_array_ops +from xarray.core.types import Dims, Self + + +class NamedArrayAggregations: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + raise NotImplementedError() + + def count( + self, + dim: Dims = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``count`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``count``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``count`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``count`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + pandas.DataFrame.count + dask.dataframe.DataFrame.count + Dataset.count + DataArray.count + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.count() + + array(5) + """ + return self.reduce( + duck_array_ops.count, + dim=dim, + **kwargs, + ) + + def all( + self, + dim: Dims = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``all`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``all``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``all`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``all`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.all + dask.array.all + Dataset.all + DataArray.all + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + >>> na + + array([ True, True, True, True, True, False]) + + >>> na.all() + + array(False) + """ + return self.reduce( + duck_array_ops.array_all, + dim=dim, + **kwargs, + ) + + def any( + self, + dim: Dims = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``any`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``any``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``any`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``any`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.any + dask.array.any + Dataset.any + DataArray.any + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([True, True, True, True, True, False], dtype=bool), + ... ) + >>> na + + array([ True, True, True, True, True, False]) + + >>> na.any() + + array(True) + """ + return self.reduce( + duck_array_ops.array_any, + dim=dim, + **kwargs, + ) + + def max( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``max`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``max``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``max`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``max`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.max + dask.array.max + Dataset.max + DataArray.max + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.max() + + array(3.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.max(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.max, + dim=dim, + skipna=skipna, + **kwargs, + ) + + def min( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``min`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``min``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``min`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``min`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.min + dask.array.min + Dataset.min + DataArray.min + :ref:`agg` + User guide on reduction or aggregation operations. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.min() + + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.min(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.min, + dim=dim, + skipna=skipna, + **kwargs, + ) + + def mean( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``mean`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``mean``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``mean`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``mean`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.mean + dask.array.mean + Dataset.mean + DataArray.mean + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.mean() + + array(1.6) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.mean(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.mean, + dim=dim, + skipna=skipna, + **kwargs, + ) + + def prod( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + min_count: int | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``prod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``prod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``prod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``prod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.prod + dask.array.prod + Dataset.prod + DataArray.prod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.prod() + + array(0.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.prod(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> na.prod(skipna=True, min_count=2) + + array(0.) + """ + return self.reduce( + duck_array_ops.prod, + dim=dim, + skipna=skipna, + min_count=min_count, + **kwargs, + ) + + def sum( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + min_count: int | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``sum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``sum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + min_count : int or None, optional + The required number of valid values to perform the operation. If + fewer than min_count non-NA values are present the result will be + NA. Only used if skipna is set to True or defaults to True for the + array's dtype. Changed in version 0.17.0: if specified on an integer + array and skipna=True, the result will be a float array. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``sum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``sum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.sum + dask.array.sum + Dataset.sum + DataArray.sum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.sum() + + array(8.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.sum(skipna=False) + + array(nan) + + Specify ``min_count`` for finer control over when NaNs are ignored. + + >>> na.sum(skipna=True, min_count=2) + + array(8.) + """ + return self.reduce( + duck_array_ops.sum, + dim=dim, + skipna=skipna, + min_count=min_count, + **kwargs, + ) + + def std( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + ddof: int = 0, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``std`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``std``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``std`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``std`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.std + dask.array.std + Dataset.std + DataArray.std + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.std() + + array(1.0198039) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.std(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> na.std(skipna=True, ddof=1) + + array(1.14017543) + """ + return self.reduce( + duck_array_ops.std, + dim=dim, + skipna=skipna, + ddof=ddof, + **kwargs, + ) + + def var( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + ddof: int = 0, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``var`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``var``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + ddof : int, default: 0 + “Delta Degrees of Freedom”: the divisor used in the calculation is ``N - ddof``, + where ``N`` represents the number of elements. + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``var`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``var`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.var + dask.array.var + Dataset.var + DataArray.var + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.var() + + array(1.04) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.var(skipna=False) + + array(nan) + + Specify ``ddof=1`` for an unbiased estimate. + + >>> na.var(skipna=True, ddof=1) + + array(1.3) + """ + return self.reduce( + duck_array_ops.var, + dim=dim, + skipna=skipna, + ddof=ddof, + **kwargs, + ) + + def median( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``median`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``median``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``median`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``median`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.median + dask.array.median + Dataset.median + DataArray.median + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.median() + + array(2.) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.median(skipna=False) + + array(nan) + """ + return self.reduce( + duck_array_ops.median, + dim=dim, + skipna=skipna, + **kwargs, + ) + + def cumsum( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``cumsum`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumsum``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumsum`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumsum`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumsum + dask.array.cumsum + Dataset.cumsum + DataArray.cumsum + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.cumsum() + + array([1., 3., 6., 6., 8., 8.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.cumsum(skipna=False) + + array([ 1., 3., 6., 6., 8., nan]) + """ + return self.reduce( + duck_array_ops.cumsum, + dim=dim, + skipna=skipna, + **kwargs, + ) + + def cumprod( + self, + dim: Dims = None, + *, + skipna: bool | None = None, + **kwargs: Any, + ) -> Self: + """ + Reduce this NamedArray's data by applying ``cumprod`` along some dimension(s). + + Parameters + ---------- + dim : str, Iterable of Hashable, "..." or None, default: None + Name of dimension[s] along which to apply ``cumprod``. For e.g. ``dim="x"`` + or ``dim=["x", "y"]``. If "..." or None, will reduce over all dimensions. + skipna : bool or None, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or ``skipna=True`` has not been + implemented (object, datetime64 or timedelta64). + **kwargs : Any + Additional keyword arguments passed on to the appropriate array + function for calculating ``cumprod`` on this object's data. + These could include dask-specific kwargs like ``split_every``. + + Returns + ------- + reduced : NamedArray + New NamedArray with ``cumprod`` applied to its data and the + indicated dimension(s) removed + + See Also + -------- + numpy.cumprod + dask.array.cumprod + Dataset.cumprod + DataArray.cumprod + :ref:`agg` + User guide on reduction or aggregation operations. + + Notes + ----- + Non-numeric variables will be removed prior to reducing. + + Examples + -------- + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x", + ... np.array([1, 2, 3, 0, 2, np.nan]), + ... ) + >>> na + + array([ 1., 2., 3., 0., 2., nan]) + + >>> na.cumprod() + + array([1., 2., 6., 0., 0., 0.]) + + Use ``skipna`` to control whether NaNs are ignored. + + >>> na.cumprod(skipna=False) + + array([ 1., 2., 6., 0., 0., nan]) + """ + return self.reduce( + duck_array_ops.cumprod, + dim=dim, + skipna=skipna, + **kwargs, + ) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 92cc742e131..6833215a9f2 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -2,14 +2,16 @@ import copy import math +import warnings from collections.abc import Hashable, Iterable, Mapping, Sequence from typing import TYPE_CHECKING, Any, Callable, Generic, Union, cast import numpy as np # TODO: get rid of this after migrating this class to array API -from xarray.core import dtypes +from xarray.core import dtypes, formatting, formatting_html from xarray.core.indexing import ExplicitlyIndexed +from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray.utils import ( Default, T_DuckArray, @@ -22,6 +24,7 @@ ) if TYPE_CHECKING: + from xarray.core.types import Dims from xarray.namedarray.utils import Self # type: ignore[attr-defined] try: @@ -41,7 +44,7 @@ # T_NamedArray = TypeVar("T_NamedArray", bound="NamedArray[T_DuckArray]") DimsInput = Union[str, Iterable[Hashable]] - Dims = tuple[Hashable, ...] + DimsProperty = tuple[Hashable, ...] AttrsInput = Union[Mapping[Any, Any], None] @@ -76,7 +79,7 @@ def as_compatible_data( return cast(T_DuckArray, np.asarray(data)) -class NamedArray(Generic[T_DuckArray]): +class NamedArray(NamedArrayAggregations, Generic[T_DuckArray]): """A lightweight wrapper around duck arrays with named dimensions and attributes which describe a single Array. Numeric operations on this object implement array broadcasting and dimension alignment based on dimension names, @@ -85,7 +88,7 @@ class NamedArray(Generic[T_DuckArray]): __slots__ = ("_data", "_dims", "_attrs") _data: T_DuckArray - _dims: Dims + _dims: DimsProperty _attrs: dict[Any, Any] | None def __init__( @@ -195,7 +198,7 @@ def nbytes(self) -> int: return self.size * self.dtype.itemsize @property - def dims(self) -> Dims: + def dims(self) -> DimsProperty: """Tuple of dimension names with which this NamedArray is associated.""" return self._dims @@ -203,7 +206,7 @@ def dims(self) -> Dims: def dims(self, value: DimsInput) -> None: self._dims = self._parse_dimensions(value) - def _parse_dimensions(self, dims: DimsInput) -> Dims: + def _parse_dimensions(self, dims: DimsInput) -> DimsProperty: dims = (dims,) if isinstance(dims, str) else tuple(dims) if len(dims) != self.ndim: raise ValueError( @@ -327,6 +330,30 @@ def _dask_finalize( data = array_func(results, *args, **kwargs) return type(self)(self._dims, data, attrs=self._attrs) + def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, ...]: + """Return axis number(s) corresponding to dimension(s) in this array. + + Parameters + ---------- + dim : str or iterable of str + Dimension name(s) for which to lookup axes. + + Returns + ------- + int or tuple of int + Axis number or numbers corresponding to the given dimensions. + """ + if not isinstance(dim, str) and isinstance(dim, Iterable): + return tuple(self._get_axis_num(d) for d in dim) + else: + return self._get_axis_num(dim) + + def _get_axis_num(self: Any, dim: Hashable) -> int: + try: + return self.dims.index(dim) # type: ignore[no-any-return] + except ValueError: + raise ValueError(f"{dim!r} not found in array dimensions {self.dims!r}") + @property def chunks(self) -> tuple[tuple[int, ...], ...] | None: """ @@ -446,6 +473,91 @@ def copy( """ return self._copy(deep=deep, data=data) + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + axis: int | Sequence[int] | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + """Reduce this array by applying `func` along some dimension(s). + + Parameters + ---------- + func : callable + Function which can be called in the form + `func(x, axis=axis, **kwargs)` to return the result of reducing an + np.ndarray over an integer valued axis. + dim : "...", str, Iterable of Hashable or None, optional + Dimension(s) over which to apply `func`. By default `func` is + applied over all dimensions. + axis : int or Sequence of int, optional + Axis(es) over which to apply `func`. Only one of the 'dim' + and 'axis' arguments can be supplied. If neither are supplied, then + the reduction is calculated over the flattened array (by calling + `func(x)` without an axis argument). + keepdims : bool, default: False + If True, the dimensions which are reduced are left in the result + as dimensions of size one + **kwargs : dict + Additional keyword arguments passed on to `func`. + + Returns + ------- + reduced : Array + Array with summarized data and the indicated dimension(s) + removed. + """ + if dim == ...: + dim = None + if dim is not None and axis is not None: + raise ValueError("cannot supply both 'axis' and 'dim' arguments") + + if dim is not None: + axis = self.get_axis_num(dim) + + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", r"Mean of empty slice", category=RuntimeWarning + ) + if axis is not None: + if isinstance(axis, tuple) and len(axis) == 1: + # unpack axis for the benefit of functions + # like np.argmin which can't handle tuple arguments + axis = axis[0] + data = func(self.data, axis=axis, **kwargs) + else: + data = func(self.data, **kwargs) + + if getattr(data, "shape", ()) == self.shape: + dims = self.dims + else: + removed_axes: Iterable[int] + if axis is None: + removed_axes = range(self.ndim) + else: + removed_axes = np.atleast_1d(axis) % self.ndim + if keepdims: + # Insert np.newaxis for removed dims + slices = tuple( + np.newaxis if i in removed_axes else slice(None, None) + for i in range(self.ndim) + ) + if getattr(data, "shape", None) is None: + # Reduce has produced a scalar value, not an array-like + data = np.asanyarray(data)[slices] + else: + data = data[slices] + dims = self.dims + else: + dims = tuple( + adim for n, adim in enumerate(self.dims) if n not in removed_axes + ) + + # Return NamedArray to handle IndexVariable when data is nD + return NamedArray(dims, data, attrs=self._attrs) + def _nonzero(self) -> tuple[Self, ...]: """Equivalent numpy's nonzero but returns a tuple of NamedArrays.""" # TODO we should replace dask's native nonzero @@ -453,6 +565,12 @@ def _nonzero(self) -> tuple[Self, ...]: nonzeros = np.nonzero(self.data) return tuple(type(self)((dim,), nz) for nz, dim in zip(nonzeros, self.dims)) + def __repr__(self) -> str: + return formatting.array_repr(self) + + def _repr_html_(self) -> str: + return formatting_html.array_repr(self) + def _as_sparse( self, sparse_format: str | Default = _default, diff --git a/xarray/util/generate_aggregations.py b/xarray/util/generate_aggregations.py index 873f6015b5c..a1233ea0291 100644 --- a/xarray/util/generate_aggregations.py +++ b/xarray/util/generate_aggregations.py @@ -14,7 +14,7 @@ """ import collections import textwrap -from dataclasses import dataclass +from dataclasses import dataclass, field MODULE_PREAMBLE = '''\ """Mixin classes with reduction operations.""" @@ -34,9 +34,23 @@ from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -flox_available = module_available("flox")''' +flox_available = module_available("flox") +''' -DEFAULT_PREAMBLE = """ +NAMED_ARRAY_MODULE_PREAMBLE = '''\ +"""Mixin classes with reduction operations.""" +# This file was generated using xarray.util.generate_aggregations. Do not edit manually. + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any, Callable + +from xarray.core import duck_array_ops +from xarray.core.types import Dims, Self +''' + +AGGREGATIONS_PREAMBLE = """ class {obj}{cls}Aggregations: __slots__ = () @@ -53,6 +67,23 @@ def reduce( ) -> Self: raise NotImplementedError()""" +NAMED_ARRAY_AGGREGATIONS_PREAMBLE = """ + +class {obj}{cls}Aggregations: + __slots__ = () + + def reduce( + self, + func: Callable[..., Any], + dim: Dims = None, + *, + axis: int | Sequence[int] | None = None, + keepdims: bool = False, + **kwargs: Any, + ) -> Self: + raise NotImplementedError()""" + + GROUPBY_PREAMBLE = """ class {obj}{cls}Aggregations: @@ -104,9 +135,7 @@ def _flox_reduce( TEMPLATE_REDUCTION_SIGNATURE = ''' def {method}( self, - dim: Dims = None, - *,{extra_kwargs} - keep_attrs: bool | None = None, + dim: Dims = None,{kw_only}{extra_kwargs}{keep_attrs} **kwargs: Any, ) -> Self: """ @@ -139,9 +168,7 @@ def {method}( TEMPLATE_SEE_ALSO = """ See Also -------- - numpy.{method} - dask.array.{method} - {see_also_obj}.{method} +{see_also_methods} :ref:`{docref}` User guide on {docref_description}.""" @@ -186,15 +213,6 @@ def {method}( function for calculating ``{method}`` on this object's data. These could include dask-specific kwargs like ``split_every``.""" -_COUNT_SEE_ALSO = """ - See Also - -------- - pandas.DataFrame.{method} - dask.dataframe.DataFrame.{method} - {see_also_obj}.{method} - :ref:`{docref}` - User guide on {docref_description}.""" - _NUMERIC_ONLY_NOTES = "Non-numeric variables will be removed prior to reducing." _FLOX_NOTES_TEMPLATE = """Use the ``flox`` package to significantly speed up {kind} computations, @@ -238,6 +256,15 @@ def {method}( ) +@dataclass +class DataStructure: + name: str + create_example: str + example_var_name: str + numeric_only: bool = False + see_also_modules: tuple[str] = tuple + + class Method: def __init__( self, @@ -245,11 +272,12 @@ def __init__( bool_reduce=False, extra_kwargs=tuple(), numeric_only=False, + see_also_modules=("numpy", "dask.array"), ): self.name = name self.extra_kwargs = extra_kwargs self.numeric_only = numeric_only - + self.see_also_modules = see_also_modules if bool_reduce: self.array_method = f"array_{name}" self.np_example_array = """ @@ -261,34 +289,26 @@ def __init__( ... np.array([1, 2, 3, 0, 2, np.nan])""" +@dataclass class AggregationGenerator: _dim_docstring = _DIM_DOCSTRING _template_signature = TEMPLATE_REDUCTION_SIGNATURE - def __init__( - self, - cls, - datastructure, - methods, - docref, - docref_description, - example_call_preamble, - definition_preamble, - see_also_obj=None, - notes=None, - ): - self.datastructure = datastructure - self.cls = cls - self.methods = methods - self.docref = docref - self.docref_description = docref_description - self.example_call_preamble = example_call_preamble - self.preamble = definition_preamble.format(obj=datastructure.name, cls=cls) - self.notes = "" if notes is None else notes - if not see_also_obj: - self.see_also_obj = self.datastructure.name - else: - self.see_also_obj = see_also_obj + cls: str + datastructure: DataStructure + methods: tuple[Method, ...] + docref: str + docref_description: str + example_call_preamble: str + definition_preamble: str + has_keep_attrs: bool = True + notes: str = "" + preamble: str = field(init=False) + + def __post_init__(self): + self.preamble = self.definition_preamble.format( + obj=self.datastructure.name, cls=self.cls + ) def generate_methods(self): yield [self.preamble] @@ -296,7 +316,16 @@ def generate_methods(self): yield self.generate_method(method) def generate_method(self, method): - template_kwargs = dict(obj=self.datastructure.name, method=method.name) + has_kw_only = method.extra_kwargs or self.has_keep_attrs + + template_kwargs = dict( + obj=self.datastructure.name, + method=method.name, + keep_attrs="\n keep_attrs: bool | None = None," + if self.has_keep_attrs + else "", + kw_only="\n *," if has_kw_only else "", + ) if method.extra_kwargs: extra_kwargs = "\n " + "\n ".join( @@ -313,7 +342,7 @@ def generate_method(self, method): for text in [ self._dim_docstring.format(method=method.name, cls=self.cls), *(kwarg.docs for kwarg in method.extra_kwargs if kwarg.docs), - _KEEP_ATTRS_DOCSTRING, + _KEEP_ATTRS_DOCSTRING if self.has_keep_attrs else None, _KWARGS_DOCSTRING.format(method=method.name), ]: if text: @@ -321,13 +350,24 @@ def generate_method(self, method): yield TEMPLATE_RETURNS.format(**template_kwargs) - see_also = _COUNT_SEE_ALSO if method.name == "count" else TEMPLATE_SEE_ALSO + # we want Datset.count to refer to DataArray.count + # but we also want DatasetGroupBy.count to refer to Dataset.count + # The generic aggregations have self.cls == '' + others = ( + self.datastructure.see_also_modules + if self.cls == "" + else (self.datastructure.name,) + ) + see_also_methods = "\n".join( + " " * 8 + f"{mod}.{method.name}" + for mod in (method.see_also_modules + others) + ) # Fixes broken links mentioned in #8055 - yield see_also.format( + yield TEMPLATE_SEE_ALSO.format( **template_kwargs, docref=self.docref, docref_description=self.docref_description, - see_also_obj=self.see_also_obj, + see_also_methods=see_also_methods, ) notes = self.notes @@ -342,18 +382,12 @@ def generate_method(self, method): yield textwrap.indent(self.generate_example(method=method), "") yield ' """' - yield self.generate_code(method) + yield self.generate_code(method, self.has_keep_attrs) def generate_example(self, method): - create_da = f""" - >>> da = xr.DataArray({method.np_example_array}, - ... dims="time", - ... coords=dict( - ... time=("time", pd.date_range("2001-01-01", freq="M", periods=6)), - ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), - ... ), - ... )""" - + created = self.datastructure.create_example.format( + example_array=method.np_example_array + ) calculation = f"{self.datastructure.example_var_name}{self.example_call_preamble}.{method.name}" if method.extra_kwargs: extra_examples = "".join( @@ -364,7 +398,8 @@ def generate_example(self, method): return f""" Examples - --------{create_da}{self.datastructure.docstring_create} + --------{created} + >>> {self.datastructure.example_var_name} >>> {calculation}(){extra_examples}""" @@ -373,7 +408,7 @@ class GroupByAggregationGenerator(AggregationGenerator): _dim_docstring = _DIM_DOCSTRING_GROUPBY _template_signature = TEMPLATE_REDUCTION_SIGNATURE_GROUPBY - def generate_code(self, method): + def generate_code(self, method, has_keep_attrs): extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] if self.datastructure.numeric_only: @@ -425,7 +460,7 @@ def generate_code(self, method): class GenericAggregationGenerator(AggregationGenerator): - def generate_code(self, method): + def generate_code(self, method, has_keep_attrs): extra_kwargs = [kwarg.call for kwarg in method.extra_kwargs if kwarg.call] if self.datastructure.numeric_only: @@ -435,18 +470,20 @@ def generate_code(self, method): extra_kwargs = textwrap.indent("\n" + "\n".join(extra_kwargs), 12 * " ") else: extra_kwargs = "" + keep_attrs = ( + "\n" + 12 * " " + "keep_attrs=keep_attrs," if has_keep_attrs else "" + ) return f"""\ return self.reduce( duck_array_ops.{method.array_method}, - dim=dim,{extra_kwargs} - keep_attrs=keep_attrs, + dim=dim,{extra_kwargs}{keep_attrs} **kwargs, )""" AGGREGATION_METHODS = ( # Reductions: - Method("count"), + Method("count", see_also_modules=("pandas.DataFrame", "dask.dataframe.DataFrame")), Method("all", bool_reduce=True), Method("any", bool_reduce=True), Method("max", extra_kwargs=(skipna,)), @@ -463,28 +500,34 @@ def generate_code(self, method): ) -@dataclass -class DataStructure: - name: str - docstring_create: str - example_var_name: str - numeric_only: bool = False - - DATASET_OBJECT = DataStructure( name="Dataset", - docstring_create=""" - >>> ds = xr.Dataset(dict(da=da)) - >>> ds""", + create_example=""" + >>> da = xr.DataArray({example_array}, + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... ) + >>> ds = xr.Dataset(dict(da=da))""", example_var_name="ds", numeric_only=True, + see_also_modules=("DataArray",), ) DATAARRAY_OBJECT = DataStructure( name="DataArray", - docstring_create=""" - >>> da""", + create_example=""" + >>> da = xr.DataArray({example_array}, + ... dims="time", + ... coords=dict( + ... time=("time", pd.date_range("2001-01-01", freq="M", periods=6)), + ... labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), + ... ), + ... )""", example_var_name="da", numeric_only=False, + see_also_modules=("Dataset",), ) DATASET_GENERATOR = GenericAggregationGenerator( cls="", @@ -493,8 +536,7 @@ class DataStructure: docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", - see_also_obj="DataArray", - definition_preamble=DEFAULT_PREAMBLE, + definition_preamble=AGGREGATIONS_PREAMBLE, ) DATAARRAY_GENERATOR = GenericAggregationGenerator( cls="", @@ -503,8 +545,7 @@ class DataStructure: docref="agg", docref_description="reduction or aggregation operations", example_call_preamble="", - see_also_obj="Dataset", - definition_preamble=DEFAULT_PREAMBLE, + definition_preamble=AGGREGATIONS_PREAMBLE, ) DATAARRAY_GROUPBY_GENERATOR = GroupByAggregationGenerator( cls="GroupBy", @@ -547,24 +588,59 @@ class DataStructure: notes=_FLOX_RESAMPLE_NOTES, ) +NAMED_ARRAY_OBJECT = DataStructure( + name="NamedArray", + create_example=""" + >>> from xarray.namedarray.core import NamedArray + >>> na = NamedArray( + ... "x",{example_array}, + ... )""", + example_var_name="na", + numeric_only=False, + see_also_modules=("Dataset", "DataArray"), +) + +NAMED_ARRAY_GENERATOR = GenericAggregationGenerator( + cls="", + datastructure=NAMED_ARRAY_OBJECT, + methods=AGGREGATION_METHODS, + docref="agg", + docref_description="reduction or aggregation operations", + example_call_preamble="", + definition_preamble=NAMED_ARRAY_AGGREGATIONS_PREAMBLE, + has_keep_attrs=False, +) + + +def write_methods(filepath, generators, preamble): + with open(filepath, mode="w", encoding="utf-8") as f: + f.write(preamble) + for gen in generators: + for lines in gen.generate_methods(): + for line in lines: + f.write(line + "\n") + if __name__ == "__main__": import os from pathlib import Path p = Path(os.getcwd()) - filepath = p.parent / "xarray" / "xarray" / "core" / "_aggregations.py" - # filepath = p.parent / "core" / "_aggregations.py" # Run from script location - with open(filepath, mode="w", encoding="utf-8") as f: - f.write(MODULE_PREAMBLE + "\n") - for gen in [ + write_methods( + filepath=p.parent / "xarray" / "xarray" / "core" / "_aggregations.py", + generators=[ DATASET_GENERATOR, DATAARRAY_GENERATOR, DATASET_GROUPBY_GENERATOR, DATASET_RESAMPLE_GENERATOR, DATAARRAY_GROUPBY_GENERATOR, DATAARRAY_RESAMPLE_GENERATOR, - ]: - for lines in gen.generate_methods(): - for line in lines: - f.write(line + "\n") + ], + preamble=MODULE_PREAMBLE, + ) + write_methods( + filepath=p.parent / "xarray" / "xarray" / "namedarray" / "_aggregations.py", + generators=[NAMED_ARRAY_GENERATOR], + preamble=NAMED_ARRAY_MODULE_PREAMBLE, + ) + # filepath = p.parent / "core" / "_aggregations.py" # Run from script location