From bfdc8b6a658236972100afc14bb7d18353b0fcc0 Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <dh@dergelbesalon.at>
Date: Wed, 7 Aug 2024 12:24:18 +0200
Subject: [PATCH] Improve the docs of the `filter()` arguments (#870)

---
 RELEASE_NOTES.md       |  3 +-
 docs/api.rst           |  1 -
 docs/api/database.rst  |  2 +-
 docs/api/filtering.rst | 76 +++++++++++++++++++++++++++++++++++++-----
 docs/api/slice.rst     |  7 ----
 pyam/core.py           | 33 +++++-------------
 6 files changed, 79 insertions(+), 43 deletions(-)
 delete mode 100644 docs/api/slice.rst

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index ddf78f9cc..f38f0fdc2 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -1,6 +1,7 @@
 # Next release
 
-- [#866](https://github.com/IAMconsortium/pyam/pull/888) Support filtering by a `measurand` argument with tuples of
+- [#870](https://github.com/IAMconsortium/pyam/pull/870) Improve the docs of the `filter()` arguments
+- [#866](https://github.com/IAMconsortium/pyam/pull/866) Support filtering by a `measurand` argument with tuples of
   variable and units
 
 # Release v2.2.3
diff --git a/docs/api.rst b/docs/api.rst
index fc34ad79d..8c9fd7260 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -13,7 +13,6 @@ and methods.
    api/general
    api/iamdataframe
    api/database
-   api/slice
    api/filtering
    api/compute
    api/plotting
diff --git a/docs/api/database.rst b/docs/api/database.rst
index a0c377b13..00e65cd6f 100644
--- a/docs/api/database.rst
+++ b/docs/api/database.rst
@@ -23,7 +23,7 @@ Reading from an |ixmp4| platform
 --------------------------------
 
 The |pyam| package provides a simple interface to read timeseries data and meta
-indicators from local or remote |ixmp4| platform instancs.
+indicators from local or remote |ixmp4| platform instances.
 
 .. autofunction:: read_ixmp4
 
diff --git a/docs/api/filtering.rst b/docs/api/filtering.rst
index 4a0efe8f0..0c9319c35 100644
--- a/docs/api/filtering.rst
+++ b/docs/api/filtering.rst
@@ -1,12 +1,72 @@
 .. currentmodule:: pyam
 
-Advanced filtering
-==================
+Filtering and slicing
+=====================
 
-|pyam| includes a function to directly downselect a :class:`pandas.DataFrame`
-with appropriate columns or index dimensions
-(i.e., :code:`['model', 'scenario']`)
-using a :class:`IamDataFrame` and keyword arguments similar
-to the :meth:`IamDataFrame.filter` function.
+Arguments for filtering an :class:`IamDataFrame`
+------------------------------------------------
 
-.. autofunction:: filter_by_meta
+The |pyam| package provides several methods to filter an :class:`IamDataFrame` by its
+(timeseries) **data** or **meta** values. Read more about the `Data Model <data.html>`_
+that is implemented by an :class:`IamDataFrame`.
+
+The following arguments are available for filtering and can be combined as needed:
+
+Index
+^^^^^
+- A *column* of the :attr:`IamDataFrame.index`
+  (usually '**model**' and '**scenario**'): string or list of strings
+- '**index**': list of model/scenario-tuples or a :class:`pandas.MultiIndex`
+
+Timeseries data coordinates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+- Any *column* of the :attr:`IamDataFrame.coordinates <pyam.IamDataFrame.coordinates>`
+  ('**region**', '**variable**', '**unit**'): string or list of strings
+- '**measurand**': a tuple (or list of tuples) of '*variable*' and '*unit*'
+- '**level**': the "depth" of entries in the '*variable*' column (number of '|')
+  (excluding the strings in the '*variable*' argument, if given)
+- '**year**': takes an integer (int/:class:`numpy.int64`), a list of integers or
+  a range. Note that the last year of a range is not included,
+  so ``range(2010, 2015)`` is interpreted as ``[2010, ..., 2014]``
+- '**time_domain**': can be 'year' or 'datetime'
+- Arguments for filtering by :class:`datetime.datetime` or :class:`numpy.datetime64`
+  ('**month**', '**hour**', '**time**')
+
+Meta indicators and other attributes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+- Any *column* of the :attr:`IamDataFrame.meta <pyam.IamDataFrame.meta>` dataframe:
+  string, integer, float, or list of these
+- '**exclude**' (see :attr:`IamDataFrame.exclude <pyam.IamDataFrame.exclude>`): boolean
+
+.. note::
+
+    In any string filters, '*' is interpreted as wildcard, unless the keyword argument
+    *regexp=True* is used; in this case, strings are treated as
+    `regular expressions <https://docs.python.org/3/library/re.html>`_.
+
+Methods for filtering and slicing an :class:`IamDataFrame`
+----------------------------------------------------------
+
+.. automethod:: pyam.IamDataFrame.filter
+   :noindex:
+
+.. automethod:: pyam.IamDataFrame.slice
+   :noindex:
+
+The **IamSlice** class
+----------------------
+
+This class is an auxiliary feature to streamline the implementation of the
+:meth:`IamDataFrame.filter` method.
+
+.. autoclass:: pyam.slice.IamSlice
+   :members: dimensions, time, info
+
+Filtering using a proxy :class:`IamDataFrame`
+---------------------------------------------
+
+|pyam| includes a function to directly filter a :class:`pandas.DataFrame`
+with appropriate columns or index dimensions (i.e.,'*model*' and '*scenario*') using
+an :class:`IamDataFrame` and keyword arguments similar to :meth:`IamDataFrame.filter`.
+
+.. autofunction:: pyam.filter_by_meta
diff --git a/docs/api/slice.rst b/docs/api/slice.rst
deleted file mode 100644
index 44ae010f7..000000000
--- a/docs/api/slice.rst
+++ /dev/null
@@ -1,7 +0,0 @@
-.. currentmodule:: pyam.slice
-
-The **IamSlice** class
-======================
-
-.. autoclass:: IamSlice
-   :members: dimensions, time, info
diff --git a/pyam/core.py b/pyam/core.py
index 27a3a3f57..67e82d934 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1842,34 +1842,12 @@ def slice(self, *, keep=True, **kwargs):
         keep : bool, optional
             Keep all scenarios satisfying the filters (if *True*) or the inverse.
         **kwargs
-            Arguments for filtering. See the "Notes".
+            Arguments for filtering. Read more about the `available filter options
+            <https://pyam-iamc.readthedocs.io/en/stable/api/filtering.html>`_.
 
         Returns
         -------
         :class:`pyam.slice.IamSlice`
-
-        Notes
-        -----
-        The following arguments are available for filtering:
-
-         - 'model', 'scenario', 'region', 'variable', 'unit':
-           string or list of strings
-         - 'measurand': a tuple (or list of tuples) of 'variable' and 'unit'
-         - 'meta' columns: mapping of column name to allowed values
-         - 'exclude': values of :attr:`exclude`
-         - 'index': list of model, scenario 2-tuples or :class:`pandas.MultiIndex`
-         - 'level': the "depth" of entries in the variable column (number of '|')
-           (excluding the strings given in the 'variable' argument)
-         - 'year': takes an integer (int/np.int64), a list of integers or
-           a range. Note that the last year of a range is not included,
-           so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]`
-         - 'time_domain': can be "year" or "datetime"
-         - arguments for filtering by `datetime.datetime` or np.datetime64
-           ('month', 'hour', 'time')
-         - 'regexp=True' disables pseudo-regexp syntax in `pattern_match()`
-
-        In any string filters, `*` is interpreted as wildcard.
-
         """
 
         _keep = self._apply_filters(**kwargs)
@@ -1891,7 +1869,12 @@ def filter(self, *, keep=True, inplace=False, **kwargs):
         inplace : bool, optional
             If *True*, do operation inplace and return *None*.
         **kwargs
-            Passed to :meth:`slice`.
+            Arguments for filtering. Read more about the `available filter options
+            <https://pyam-iamc.readthedocs.io/en/stable/api/filtering.html>`_.
+
+        Returns
+        -------
+        :class:`pyam.IamDataFrame` or **None**
         """
 
         # downselect `data` rows and clean up index