Improve the docs of the filter() arguments (#870)

IAMconsortium · Aug 7, 2024 · bfdc8b6 · bfdc8b6
1 parent 281a126
commit bfdc8b6
Show file tree

Hide file tree

Showing 6 changed files with 79 additions and 43 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,6 +1,7 @@
 # Next release
 
-- [#866](https://github.com/IAMconsortium/pyam/pull/888) Support filtering by a `measurand` argument with tuples of
+- [#870](https://github.com/IAMconsortium/pyam/pull/870) Improve the docs of the `filter()` arguments
+- [#866](https://github.com/IAMconsortium/pyam/pull/866) Support filtering by a `measurand` argument with tuples of
   variable and units
 
 # Release v2.2.3

diff --git a/docs/api.rst b/docs/api.rst
@@ -13,7 +13,6 @@ and methods.
    api/general
    api/iamdataframe
    api/database
-   api/slice
    api/filtering
    api/compute
    api/plotting

diff --git a/docs/api/database.rst b/docs/api/database.rst
@@ -23,7 +23,7 @@ Reading from an |ixmp4| platform
 --------------------------------
 
 The |pyam| package provides a simple interface to read timeseries data and meta
-indicators from local or remote |ixmp4| platform instancs.
+indicators from local or remote |ixmp4| platform instances.
 
 .. autofunction:: read_ixmp4
 

diff --git a/docs/api/filtering.rst b/docs/api/filtering.rst
@@ -1,12 +1,72 @@
 .. currentmodule:: pyam
 
-Advanced filtering
-==================
+Filtering and slicing
+=====================
 
-|pyam| includes a function to directly downselect a :class:`pandas.DataFrame`
-with appropriate columns or index dimensions
-(i.e., :code:`['model', 'scenario']`)
-using a :class:`IamDataFrame` and keyword arguments similar
-to the :meth:`IamDataFrame.filter` function.
+Arguments for filtering an :class:`IamDataFrame`
+------------------------------------------------
 
-.. autofunction:: filter_by_meta
+The |pyam| package provides several methods to filter an :class:`IamDataFrame` by its
+(timeseries) **data** or **meta** values. Read more about the `Data Model <data.html>`_
+that is implemented by an :class:`IamDataFrame`.
+
+The following arguments are available for filtering and can be combined as needed:
+
+Index
+^^^^^
+- A *column* of the :attr:`IamDataFrame.index`
+  (usually '**model**' and '**scenario**'): string or list of strings
+- '**index**': list of model/scenario-tuples or a :class:`pandas.MultiIndex`
+
+Timeseries data coordinates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+- Any *column* of the :attr:`IamDataFrame.coordinates <pyam.IamDataFrame.coordinates>`
+  ('**region**', '**variable**', '**unit**'): string or list of strings
+- '**measurand**': a tuple (or list of tuples) of '*variable*' and '*unit*'
+- '**level**': the "depth" of entries in the '*variable*' column (number of '|')
+  (excluding the strings in the '*variable*' argument, if given)
+- '**year**': takes an integer (int/:class:`numpy.int64`), a list of integers or
+  a range. Note that the last year of a range is not included,
+  so ``range(2010, 2015)`` is interpreted as ``[2010, ..., 2014]``
+- '**time_domain**': can be 'year' or 'datetime'
+- Arguments for filtering by :class:`datetime.datetime` or :class:`numpy.datetime64`
+  ('**month**', '**hour**', '**time**')
+
+Meta indicators and other attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+- Any *column* of the :attr:`IamDataFrame.meta <pyam.IamDataFrame.meta>` dataframe:
+  string, integer, float, or list of these
+- '**exclude**' (see :attr:`IamDataFrame.exclude <pyam.IamDataFrame.exclude>`): boolean
+
+.. note::
+
+    In any string filters, '*' is interpreted as wildcard, unless the keyword argument
+    *regexp=True* is used; in this case, strings are treated as
+    `regular expressions <https://docs.python.org/3/library/re.html>`_.
+
+Methods for filtering and slicing an :class:`IamDataFrame`
+----------------------------------------------------------
+
+.. automethod:: pyam.IamDataFrame.filter
+   :noindex:
+
+.. automethod:: pyam.IamDataFrame.slice
+   :noindex:
+
+The **IamSlice** class
+----------------------
+
+This class is an auxiliary feature to streamline the implementation of the
+:meth:`IamDataFrame.filter` method.
+
+.. autoclass:: pyam.slice.IamSlice
+   :members: dimensions, time, info
+
+Filtering using a proxy :class:`IamDataFrame`
+---------------------------------------------
+
+|pyam| includes a function to directly filter a :class:`pandas.DataFrame`
+with appropriate columns or index dimensions (i.e.,'*model*' and '*scenario*') using
+an :class:`IamDataFrame` and keyword arguments similar to :meth:`IamDataFrame.filter`.
+
+.. autofunction:: pyam.filter_by_meta
diff --git a/docs/api/slice.rst b/docs/api/slice.rst
diff --git a/pyam/core.py b/pyam/core.py
@@ -1842,34 +1842,12 @@ def slice(self, *, keep=True, **kwargs):
         keep : bool, optional
             Keep all scenarios satisfying the filters (if *True*) or the inverse.
         **kwargs
-            Arguments for filtering. See the "Notes".
+            Arguments for filtering. Read more about the `available filter options
+            <https://pyam-iamc.readthedocs.io/en/stable/api/filtering.html>`_.
 
         Returns
         -------
         :class:`pyam.slice.IamSlice`
-
-        Notes
-        -----
-        The following arguments are available for filtering:
-
-         - 'model', 'scenario', 'region', 'variable', 'unit':
-           string or list of strings
-         - 'measurand': a tuple (or list of tuples) of 'variable' and 'unit'
-         - 'meta' columns: mapping of column name to allowed values
-         - 'exclude': values of :attr:`exclude`
-         - 'index': list of model, scenario 2-tuples or :class:`pandas.MultiIndex`
-         - 'level': the "depth" of entries in the variable column (number of '|')
-           (excluding the strings given in the 'variable' argument)
-         - 'year': takes an integer (int/np.int64), a list of integers or
-           a range. Note that the last year of a range is not included,
-           so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]`
-         - 'time_domain': can be "year" or "datetime"
-         - arguments for filtering by `datetime.datetime` or np.datetime64
-           ('month', 'hour', 'time')
-         - 'regexp=True' disables pseudo-regexp syntax in `pattern_match()`
-
-        In any string filters, `*` is interpreted as wildcard.
-
         """
 
         _keep = self._apply_filters(**kwargs)
@@ -1891,7 +1869,12 @@ def filter(self, *, keep=True, inplace=False, **kwargs):
         inplace : bool, optional
             If *True*, do operation inplace and return *None*.
         **kwargs
-            Passed to :meth:`slice`.
+            Arguments for filtering. Read more about the `available filter options
+            <https://pyam-iamc.readthedocs.io/en/stable/api/filtering.html>`_.
+
+        Returns
+        -------
+        :class:`pyam.IamDataFrame` or **None**
         """
 
         # downselect `data` rows and clean up index