Add a learning rate feature (#602)

IAMconsortium · Jan 17, 2022 · 8dc1d27 · 8dc1d27
1 parent 27d0fac
commit 8dc1d27
Show file tree

Hide file tree

Showing 10 changed files with 208 additions and 10 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -1,5 +1,6 @@
 # Next release
 
+- [#602](https://github.com/IAMconsortium/pyam/pull/602) Add a `compute` module/accessor and a learning-rate method 
 - [#600](https://github.com/IAMconsortium/pyam/pull/600) Add a `diff()` method
 - [#592](https://github.com/IAMconsortium/pyam/pull/592) Fix for running in jupyter-lab notebooks
 - [#590](https://github.com/IAMconsortium/pyam/pull/590) Update expected figures of plotting tests to use matplotlib 3.5

diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -16,6 +16,7 @@ and methods.
    api/filtering
    api/plotting
    api/iiasa
+   api/compute
    api/statistics
    api/timeseries
    api/variables

diff --git a/doc/source/api/compute.rst b/doc/source/api/compute.rst
@@ -0,0 +1,7 @@
+.. currentmodule:: pyam
+
+Computing indicators
+====================
+
+.. autoclass:: IamComputeAccessor
+   :members:
diff --git a/doc/source/api/timeseries.rst b/doc/source/api/timeseries.rst
@@ -15,8 +15,6 @@ the time dimension as index.
 
 .. _`Slack channel, mailing list or GitHub issues`: ../contributing.html
 
-.. autofunction:: cumulative
-
-.. autofunction:: fill_series
-
-.. autofunction:: cross_threshold
+.. automodule:: pyam.timeseries
+   :autosummary:
+   :members:
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -32,7 +32,6 @@
 extensions = [
     "numpydoc",
     "sphinx.ext.autodoc",
-    "sphinx.ext.autosummary",
     "sphinx.ext.doctest",
     "sphinx.ext.intersphinx",
     "sphinx.ext.todo",
@@ -44,6 +43,7 @@
     "nbsphinx",
     "sphinx_gallery.gen_gallery",
     "cloud_sptheme.ext.table_styling",
+    "autodocsumm",
 ]
 
 autosummary_generate = True

diff --git a/pyam/compute.py b/pyam/compute.py
@@ -0,0 +1,106 @@
+import math
+import pandas as pd
+from pyam.utils import remove_from_list
+
+
+class IamComputeAccessor:
+    """Perform computations on the timeseries data of an IamDataFrame
+
+    An :class:`IamDataFrame` has a module for computation of (advanced) indicators
+    from the timeseries data.
+
+    The methods in this module can be accessed via
+
+    .. code-block:: python
+
+        IamDataFrame.compute.<method>(*args, **kwargs)
+    """
+
+    def __init__(self, df):
+        self._df = df
+
+    def learning_rate(self, name, performance, experience, append=False):
+        """Compute the implicit learning rate from timeseries data
+
+        Experience curves are based on the concept that a technology's performance
+        improves as experience with this technology grows.
+
+        The "learning rate" indicates the performance improvement (e.g., cost reduction)
+        for each doubling of the accumulated experience (e.g., cumulative capacity).
+
+        The experience curve parameter *b* is equivalent to the (linear) slope when
+        plotting performance and experience timeseries on double-logarithmic scales.
+        The learning rate can be computed from the experience curve parameter as
+        :math:`1 - 2^{b}`.
+
+        The learning rate parameter in period *t* is computed based on the changes
+        to the subsequent period, i.e., from period *t* to period *t+1*.
+
+        Parameters
+        ----------
+        name : str
+            Variable name of the computed timeseries data.
+        performance : str
+            Variable of the "performance" timeseries (e.g., specific investment costs).
+        experience : str
+            Variable of the "experience" timeseries (e.g., installed capacity).
+        append : bool, optional
+            Whether to append computed timeseries data to this instance.
+
+        Returns
+        -------
+        :class:`IamDataFrame` or **None**
+            Computed timeseries data or None if `append=True`.
+        """
+        value = (
+            self._df._data[self._df._apply_filters(variable=[performance, experience])]
+            .groupby(
+                remove_from_list(self._df.dimensions, ["variable", "year", "unit"])
+            )
+            .apply(_compute_learning_rate, performance, experience)
+        )
+
+        return self._df._finalize(value, append=append, variable=name, unit="")
+
+
+def _compute_learning_rate(x, performance, experience):
+    """Internal implementation for computing implicit learning rate from timeseries data
+
+    Parameters
+    ----------
+    x : :class:`pandas.Series`
+        Timeseries data of the *performance* and *experience* variables
+        indexed over the time domain.
+    performance : str
+        Variable of the "performance" timeseries (e.g., specific investment costs).
+    experience : str
+        Variable of the "experience" timeseries (e.g., cumulative installed capacity).
+
+    Returns
+    -------
+    Indexed :class:`pandas.Series` of implicit learning rates
+    """
+    # drop all index dimensions other than "variable" and "year"
+    x.index = x.index.droplevel(
+        [i for i in x.index.names if i not in ["variable", "year"]]
+    )
+
+    # apply log, dropping all values that are zero or negative
+    x = x[x > 0].apply(math.log10)
+
+    # return empty pd.Series if not all relevant variables exist
+    if not all([v in x.index for v in [performance, experience]]):
+        names = remove_from_list(x.index.names, "variable")
+        empty_list = [[]] * len(names)
+        return pd.Series(
+            index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names),
+            dtype="float64",
+        )
+
+    # compute the "experience parameter" (slope of experience curve on double-log scale)
+    b = (x[performance] - x[performance].shift(periods=-1)) / (
+        x[experience] - x[experience].shift(periods=-1)
+    )
+
+    # translate to "learning rate" (e.g., cost reduction per doubling of capacity)
+    return b.apply(lambda y: 1 - math.pow(2, y))
diff --git a/pyam/core.py b/pyam/core.py
@@ -54,6 +54,7 @@
 )
 from pyam.read_ixmp import read_ix
 from pyam.plotting import PlotAccessor
+from pyam.compute import IamComputeAccessor
 from pyam._compare import _compare
 from pyam.aggregation import (
     _aggregate,
@@ -203,8 +204,9 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
         if "exec" in run_control():
             self._execute_run_control()
 
-        # add the `plot` handler
+        # add the `plot` and `compute` handlers
         self.plot = PlotAccessor(self)
+        self._compute = None
 
     def _set_attributes(self):
         """Utility function to set attributes"""
@@ -224,6 +226,13 @@ def _set_attributes(self):
         for c in self.extra_cols:
             setattr(self, c, get_index_levels(self._data, c))
 
+    def _finalize(self, data, append, **args):
+        """Append `data` to `self` or return as new IamDataFrame with copy of `meta`"""
+        if append:
+            self.append(data, **args, inplace=True)
+        else:
+            return IamDataFrame(data, meta=self.meta, **args)
+
     def __getitem__(self, key):
         _key_check = [key] if isstr(key) else key
         if key == "value":
@@ -239,6 +248,13 @@ def __len__(self):
     def __repr__(self):
         return self.info()
 
+    @property
+    def compute(self):
+        """Access to advanced computation methods, see :class:`IamComputeAccessor`"""
+        if self._compute is None:
+            self._compute = IamComputeAccessor(self)
+        return self._compute
+
     def info(self, n=80, meta_rows=5, memory_usage=False):
         """Print a summary of the object index dimensions and meta indicators
 
@@ -1690,12 +1706,11 @@ def filter(self, keep=True, inplace=False, **kwargs):
              - 'level': the maximum "depth" of IAM variables (number of '|')
                (excluding the strings given in the 'variable' argument)
              - 'year': takes an integer (int/np.int64), a list of integers or
-                a range. Note that the last year of a range is not included,
+               a range. Note that the last year of a range is not included,
                so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]`
              - arguments for filtering by `datetime.datetime` or np.datetime64
                ('month', 'hour', 'time')
              - 'regexp=True' disables pseudo-regexp syntax in `pattern_match()`
-
         """
         if not isinstance(keep, bool):
             raise ValueError(f"Cannot filter by `keep={keep}`, must be a boolean!")
@@ -2110,6 +2125,7 @@ def diff(self, mapping, periods=1, append=False):
 
         This methods behaves as if applying :meth:`pandas.DataFrame.diff` on the
         timeseries data in wide format.
+        By default, the diff-value in period *t* is computed as *x[t] - x[t-1]*.
 
         Parameters
         ----------
@@ -2119,7 +2135,7 @@ def diff(self, mapping, periods=1, append=False):
 
             .. code-block:: python
 
-               {"current variable": "name of the diff-ed variable", ...}
+               {"current variable": "name of diff-ed variable", ...}
 
         periods : int, optional
             Periods to shift for calculating difference, accepts negative values;

diff --git a/pyam/utils.py b/pyam/utils.py
@@ -99,6 +99,12 @@ def to_list(x):
     return x if islistable(x) else [x]
 
 
+def remove_from_list(x, items):
+    """Remove `items` from list `x`"""
+    items = to_list(items)
+    return [i for i in x if i not in items]
+
+
 def write_sheet(writer, name, df, index=False):
     """Write a pandas.DataFrame to an ExcelWriter
 

diff --git a/setup.cfg b/setup.cfg
@@ -79,6 +79,7 @@ docs =
     sphinxcontrib-programoutput
     numpydoc
     kaleido
+    autodocsumm
     # docs requires 'tutorials'
 
 [flake8]

diff --git a/tests/test_feature_learning_rate.py b/tests/test_feature_learning_rate.py
@@ -0,0 +1,62 @@
+import pandas as pd
+from pyam import IamDataFrame, IAMC_IDX
+from pyam.testing import assert_iamframe_equal
+import pytest
+
+
+TEST_DF = IamDataFrame(
+    pd.DataFrame(
+        [
+            ["model_a", "scen_a", "World", "Cap", "GW", 1, 2],
+            ["model_a", "scen_a", "World", "Cost", "US$2010/kW", 1, 0.5],
+            ["model_a", "scen_b", "World", "Cap", "GW", 0.1, 0.2],
+            ["model_a", "scen_b", "World", "Cost", "US$2010/kW", 1, 0.5],
+            ["model_a", "scen_c", "World", "Cap", "GW", 10, 20],
+            ["model_a", "scen_c", "World", "Cost", "US$2010/kW", 1, 0.5],
+            ["model_a", "scen_d", "World", "Cap", "GW", 1, 2],
+            ["model_a", "scen_d", "World", "Cost", "US$2010/kW", 1, 0.75],
+            ["model_a", "scen_e", "World", "Cap", "GW", 1, 2],
+            ["model_a", "scen_e", "World", "Cost", "US$2010/kW", 1, 0.25],
+        ],
+        columns=IAMC_IDX + [2005, 2010],
+    )
+)
+
+EXP_DF = IamDataFrame(
+    pd.DataFrame(
+        [
+            ["model_a", "scen_a", "World", "Learning Rate", "", 0.5],
+            ["model_a", "scen_b", "World", "Learning Rate", "", 0.5],
+            ["model_a", "scen_c", "World", "Learning Rate", "", 0.5],
+            ["model_a", "scen_d", "World", "Learning Rate", "", 0.25],
+            ["model_a", "scen_e", "World", "Learning Rate", "", 0.75],
+        ],
+        columns=IAMC_IDX + [2005],
+    )
+)
+
+
+@pytest.mark.parametrize("append", (False, True))
+def test_learning_rate(append):
+    """Check computing the learning rate"""
+
+    if append:
+        obs = TEST_DF.copy()
+        obs.compute.learning_rate("Learning Rate", "Cost", "Cap", append=True)
+        assert_iamframe_equal(TEST_DF.append(EXP_DF), obs)
+    else:
+        obs = TEST_DF.compute.learning_rate("Learning Rate", "Cost", "Cap")
+        assert_iamframe_equal(EXP_DF, obs)
+
+
+@pytest.mark.parametrize("append", (False, True))
+def test_learning_rate_empty(append):
+    """Assert that computing the learning rate with invalid variables returns empty"""
+
+    if append:
+        obs = TEST_DF.copy()
+        obs.compute.learning_rate("Learning Rate", "foo", "Cap", append=True)
+        assert_iamframe_equal(TEST_DF, obs)  # assert that no data was added
+    else:
+        obs = TEST_DF.compute.learning_rate("Learning Rate", "foo", "Cap")
+        assert obs.empty