From bfdc8b6a658236972100afc14bb7d18353b0fcc0 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Wed, 7 Aug 2024 12:24:18 +0200 Subject: [PATCH] Improve the docs of the `filter()` arguments (#870) --- RELEASE_NOTES.md | 3 +- docs/api.rst | 1 - docs/api/database.rst | 2 +- docs/api/filtering.rst | 76 +++++++++++++++++++++++++++++++++++++----- docs/api/slice.rst | 7 ---- pyam/core.py | 33 +++++------------- 6 files changed, 79 insertions(+), 43 deletions(-) delete mode 100644 docs/api/slice.rst diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index ddf78f9cc..f38f0fdc2 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,6 +1,7 @@ # Next release -- [#866](https://github.com/IAMconsortium/pyam/pull/888) Support filtering by a `measurand` argument with tuples of +- [#870](https://github.com/IAMconsortium/pyam/pull/870) Improve the docs of the `filter()` arguments +- [#866](https://github.com/IAMconsortium/pyam/pull/866) Support filtering by a `measurand` argument with tuples of variable and units # Release v2.2.3 diff --git a/docs/api.rst b/docs/api.rst index fc34ad79d..8c9fd7260 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -13,7 +13,6 @@ and methods. api/general api/iamdataframe api/database - api/slice api/filtering api/compute api/plotting diff --git a/docs/api/database.rst b/docs/api/database.rst index a0c377b13..00e65cd6f 100644 --- a/docs/api/database.rst +++ b/docs/api/database.rst @@ -23,7 +23,7 @@ Reading from an |ixmp4| platform -------------------------------- The |pyam| package provides a simple interface to read timeseries data and meta -indicators from local or remote |ixmp4| platform instancs. +indicators from local or remote |ixmp4| platform instances. .. autofunction:: read_ixmp4 diff --git a/docs/api/filtering.rst b/docs/api/filtering.rst index 4a0efe8f0..0c9319c35 100644 --- a/docs/api/filtering.rst +++ b/docs/api/filtering.rst @@ -1,12 +1,72 @@ .. currentmodule:: pyam -Advanced filtering -================== +Filtering and slicing +===================== -|pyam| includes a function to directly downselect a :class:`pandas.DataFrame` -with appropriate columns or index dimensions -(i.e., :code:`['model', 'scenario']`) -using a :class:`IamDataFrame` and keyword arguments similar -to the :meth:`IamDataFrame.filter` function. +Arguments for filtering an :class:`IamDataFrame` +------------------------------------------------ -.. autofunction:: filter_by_meta +The |pyam| package provides several methods to filter an :class:`IamDataFrame` by its +(timeseries) **data** or **meta** values. Read more about the `Data Model `_ +that is implemented by an :class:`IamDataFrame`. + +The following arguments are available for filtering and can be combined as needed: + +Index +^^^^^ +- A *column* of the :attr:`IamDataFrame.index` + (usually '**model**' and '**scenario**'): string or list of strings +- '**index**': list of model/scenario-tuples or a :class:`pandas.MultiIndex` + +Timeseries data coordinates +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Any *column* of the :attr:`IamDataFrame.coordinates ` + ('**region**', '**variable**', '**unit**'): string or list of strings +- '**measurand**': a tuple (or list of tuples) of '*variable*' and '*unit*' +- '**level**': the "depth" of entries in the '*variable*' column (number of '|') + (excluding the strings in the '*variable*' argument, if given) +- '**year**': takes an integer (int/:class:`numpy.int64`), a list of integers or + a range. Note that the last year of a range is not included, + so ``range(2010, 2015)`` is interpreted as ``[2010, ..., 2014]`` +- '**time_domain**': can be 'year' or 'datetime' +- Arguments for filtering by :class:`datetime.datetime` or :class:`numpy.datetime64` + ('**month**', '**hour**', '**time**') + +Meta indicators and other attributes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +- Any *column* of the :attr:`IamDataFrame.meta ` dataframe: + string, integer, float, or list of these +- '**exclude**' (see :attr:`IamDataFrame.exclude `): boolean + +.. note:: + + In any string filters, '*' is interpreted as wildcard, unless the keyword argument + *regexp=True* is used; in this case, strings are treated as + `regular expressions `_. + +Methods for filtering and slicing an :class:`IamDataFrame` +---------------------------------------------------------- + +.. automethod:: pyam.IamDataFrame.filter + :noindex: + +.. automethod:: pyam.IamDataFrame.slice + :noindex: + +The **IamSlice** class +---------------------- + +This class is an auxiliary feature to streamline the implementation of the +:meth:`IamDataFrame.filter` method. + +.. autoclass:: pyam.slice.IamSlice + :members: dimensions, time, info + +Filtering using a proxy :class:`IamDataFrame` +--------------------------------------------- + +|pyam| includes a function to directly filter a :class:`pandas.DataFrame` +with appropriate columns or index dimensions (i.e.,'*model*' and '*scenario*') using +an :class:`IamDataFrame` and keyword arguments similar to :meth:`IamDataFrame.filter`. + +.. autofunction:: pyam.filter_by_meta diff --git a/docs/api/slice.rst b/docs/api/slice.rst deleted file mode 100644 index 44ae010f7..000000000 --- a/docs/api/slice.rst +++ /dev/null @@ -1,7 +0,0 @@ -.. currentmodule:: pyam.slice - -The **IamSlice** class -====================== - -.. autoclass:: IamSlice - :members: dimensions, time, info diff --git a/pyam/core.py b/pyam/core.py index 27a3a3f57..67e82d934 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1842,34 +1842,12 @@ def slice(self, *, keep=True, **kwargs): keep : bool, optional Keep all scenarios satisfying the filters (if *True*) or the inverse. **kwargs - Arguments for filtering. See the "Notes". + Arguments for filtering. Read more about the `available filter options + `_. Returns ------- :class:`pyam.slice.IamSlice` - - Notes - ----- - The following arguments are available for filtering: - - - 'model', 'scenario', 'region', 'variable', 'unit': - string or list of strings - - 'measurand': a tuple (or list of tuples) of 'variable' and 'unit' - - 'meta' columns: mapping of column name to allowed values - - 'exclude': values of :attr:`exclude` - - 'index': list of model, scenario 2-tuples or :class:`pandas.MultiIndex` - - 'level': the "depth" of entries in the variable column (number of '|') - (excluding the strings given in the 'variable' argument) - - 'year': takes an integer (int/np.int64), a list of integers or - a range. Note that the last year of a range is not included, - so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]` - - 'time_domain': can be "year" or "datetime" - - arguments for filtering by `datetime.datetime` or np.datetime64 - ('month', 'hour', 'time') - - 'regexp=True' disables pseudo-regexp syntax in `pattern_match()` - - In any string filters, `*` is interpreted as wildcard. - """ _keep = self._apply_filters(**kwargs) @@ -1891,7 +1869,12 @@ def filter(self, *, keep=True, inplace=False, **kwargs): inplace : bool, optional If *True*, do operation inplace and return *None*. **kwargs - Passed to :meth:`slice`. + Arguments for filtering. Read more about the `available filter options + `_. + + Returns + ------- + :class:`pyam.IamDataFrame` or **None** """ # downselect `data` rows and clean up index