Skip to content

Commit

Permalink
Add a learning rate feature (#602)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann authored Jan 17, 2022
1 parent 27d0fac commit 8dc1d27
Show file tree
Hide file tree
Showing 10 changed files with 208 additions and 10 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Next release

- [#602](https://github.com/IAMconsortium/pyam/pull/602) Add a `compute` module/accessor and a learning-rate method
- [#600](https://github.com/IAMconsortium/pyam/pull/600) Add a `diff()` method
- [#592](https://github.com/IAMconsortium/pyam/pull/592) Fix for running in jupyter-lab notebooks
- [#590](https://github.com/IAMconsortium/pyam/pull/590) Update expected figures of plotting tests to use matplotlib 3.5
Expand Down
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ and methods.
api/filtering
api/plotting
api/iiasa
api/compute
api/statistics
api/timeseries
api/variables
Expand Down
7 changes: 7 additions & 0 deletions doc/source/api/compute.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.. currentmodule:: pyam

Computing indicators
====================

.. autoclass:: IamComputeAccessor
:members:
8 changes: 3 additions & 5 deletions doc/source/api/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ the time dimension as index.

.. _`Slack channel, mailing list or GitHub issues`: ../contributing.html

.. autofunction:: cumulative

.. autofunction:: fill_series

.. autofunction:: cross_threshold
.. automodule:: pyam.timeseries
:autosummary:
:members:
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
extensions = [
"numpydoc",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.doctest",
"sphinx.ext.intersphinx",
"sphinx.ext.todo",
Expand All @@ -44,6 +43,7 @@
"nbsphinx",
"sphinx_gallery.gen_gallery",
"cloud_sptheme.ext.table_styling",
"autodocsumm",
]

autosummary_generate = True
Expand Down
106 changes: 106 additions & 0 deletions pyam/compute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import math
import pandas as pd
from pyam.utils import remove_from_list


class IamComputeAccessor:
"""Perform computations on the timeseries data of an IamDataFrame
An :class:`IamDataFrame` has a module for computation of (advanced) indicators
from the timeseries data.
The methods in this module can be accessed via
.. code-block:: python
IamDataFrame.compute.<method>(*args, **kwargs)
"""

def __init__(self, df):
self._df = df

def learning_rate(self, name, performance, experience, append=False):
"""Compute the implicit learning rate from timeseries data
Experience curves are based on the concept that a technology's performance
improves as experience with this technology grows.
The "learning rate" indicates the performance improvement (e.g., cost reduction)
for each doubling of the accumulated experience (e.g., cumulative capacity).
The experience curve parameter *b* is equivalent to the (linear) slope when
plotting performance and experience timeseries on double-logarithmic scales.
The learning rate can be computed from the experience curve parameter as
:math:`1 - 2^{b}`.
The learning rate parameter in period *t* is computed based on the changes
to the subsequent period, i.e., from period *t* to period *t+1*.
Parameters
----------
name : str
Variable name of the computed timeseries data.
performance : str
Variable of the "performance" timeseries (e.g., specific investment costs).
experience : str
Variable of the "experience" timeseries (e.g., installed capacity).
append : bool, optional
Whether to append computed timeseries data to this instance.
Returns
-------
:class:`IamDataFrame` or **None**
Computed timeseries data or None if `append=True`.
"""
value = (
self._df._data[self._df._apply_filters(variable=[performance, experience])]
.groupby(
remove_from_list(self._df.dimensions, ["variable", "year", "unit"])
)
.apply(_compute_learning_rate, performance, experience)
)

return self._df._finalize(value, append=append, variable=name, unit="")


def _compute_learning_rate(x, performance, experience):
"""Internal implementation for computing implicit learning rate from timeseries data
Parameters
----------
x : :class:`pandas.Series`
Timeseries data of the *performance* and *experience* variables
indexed over the time domain.
performance : str
Variable of the "performance" timeseries (e.g., specific investment costs).
experience : str
Variable of the "experience" timeseries (e.g., cumulative installed capacity).
Returns
-------
Indexed :class:`pandas.Series` of implicit learning rates
"""
# drop all index dimensions other than "variable" and "year"
x.index = x.index.droplevel(
[i for i in x.index.names if i not in ["variable", "year"]]
)

# apply log, dropping all values that are zero or negative
x = x[x > 0].apply(math.log10)

# return empty pd.Series if not all relevant variables exist
if not all([v in x.index for v in [performance, experience]]):
names = remove_from_list(x.index.names, "variable")
empty_list = [[]] * len(names)
return pd.Series(
index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names),
dtype="float64",
)

# compute the "experience parameter" (slope of experience curve on double-log scale)
b = (x[performance] - x[performance].shift(periods=-1)) / (
x[experience] - x[experience].shift(periods=-1)
)

# translate to "learning rate" (e.g., cost reduction per doubling of capacity)
return b.apply(lambda y: 1 - math.pow(2, y))
24 changes: 20 additions & 4 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
)
from pyam.read_ixmp import read_ix
from pyam.plotting import PlotAccessor
from pyam.compute import IamComputeAccessor
from pyam._compare import _compare
from pyam.aggregation import (
_aggregate,
Expand Down Expand Up @@ -203,8 +204,9 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
if "exec" in run_control():
self._execute_run_control()

# add the `plot` handler
# add the `plot` and `compute` handlers
self.plot = PlotAccessor(self)
self._compute = None

def _set_attributes(self):
"""Utility function to set attributes"""
Expand All @@ -224,6 +226,13 @@ def _set_attributes(self):
for c in self.extra_cols:
setattr(self, c, get_index_levels(self._data, c))

def _finalize(self, data, append, **args):
"""Append `data` to `self` or return as new IamDataFrame with copy of `meta`"""
if append:
self.append(data, **args, inplace=True)
else:
return IamDataFrame(data, meta=self.meta, **args)

def __getitem__(self, key):
_key_check = [key] if isstr(key) else key
if key == "value":
Expand All @@ -239,6 +248,13 @@ def __len__(self):
def __repr__(self):
return self.info()

@property
def compute(self):
"""Access to advanced computation methods, see :class:`IamComputeAccessor`"""
if self._compute is None:
self._compute = IamComputeAccessor(self)
return self._compute

def info(self, n=80, meta_rows=5, memory_usage=False):
"""Print a summary of the object index dimensions and meta indicators
Expand Down Expand Up @@ -1690,12 +1706,11 @@ def filter(self, keep=True, inplace=False, **kwargs):
- 'level': the maximum "depth" of IAM variables (number of '|')
(excluding the strings given in the 'variable' argument)
- 'year': takes an integer (int/np.int64), a list of integers or
a range. Note that the last year of a range is not included,
a range. Note that the last year of a range is not included,
so `range(2010, 2015)` is interpreted as `[2010, ..., 2014]`
- arguments for filtering by `datetime.datetime` or np.datetime64
('month', 'hour', 'time')
- 'regexp=True' disables pseudo-regexp syntax in `pattern_match()`
"""
if not isinstance(keep, bool):
raise ValueError(f"Cannot filter by `keep={keep}`, must be a boolean!")
Expand Down Expand Up @@ -2110,6 +2125,7 @@ def diff(self, mapping, periods=1, append=False):
This methods behaves as if applying :meth:`pandas.DataFrame.diff` on the
timeseries data in wide format.
By default, the diff-value in period *t* is computed as *x[t] - x[t-1]*.
Parameters
----------
Expand All @@ -2119,7 +2135,7 @@ def diff(self, mapping, periods=1, append=False):
.. code-block:: python
{"current variable": "name of the diff-ed variable", ...}
{"current variable": "name of diff-ed variable", ...}
periods : int, optional
Periods to shift for calculating difference, accepts negative values;
Expand Down
6 changes: 6 additions & 0 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,12 @@ def to_list(x):
return x if islistable(x) else [x]


def remove_from_list(x, items):
"""Remove `items` from list `x`"""
items = to_list(items)
return [i for i in x if i not in items]


def write_sheet(writer, name, df, index=False):
"""Write a pandas.DataFrame to an ExcelWriter
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ docs =
sphinxcontrib-programoutput
numpydoc
kaleido
autodocsumm
# docs requires 'tutorials'

[flake8]
Expand Down
62 changes: 62 additions & 0 deletions tests/test_feature_learning_rate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import pandas as pd
from pyam import IamDataFrame, IAMC_IDX
from pyam.testing import assert_iamframe_equal
import pytest


TEST_DF = IamDataFrame(
pd.DataFrame(
[
["model_a", "scen_a", "World", "Cap", "GW", 1, 2],
["model_a", "scen_a", "World", "Cost", "US$2010/kW", 1, 0.5],
["model_a", "scen_b", "World", "Cap", "GW", 0.1, 0.2],
["model_a", "scen_b", "World", "Cost", "US$2010/kW", 1, 0.5],
["model_a", "scen_c", "World", "Cap", "GW", 10, 20],
["model_a", "scen_c", "World", "Cost", "US$2010/kW", 1, 0.5],
["model_a", "scen_d", "World", "Cap", "GW", 1, 2],
["model_a", "scen_d", "World", "Cost", "US$2010/kW", 1, 0.75],
["model_a", "scen_e", "World", "Cap", "GW", 1, 2],
["model_a", "scen_e", "World", "Cost", "US$2010/kW", 1, 0.25],
],
columns=IAMC_IDX + [2005, 2010],
)
)

EXP_DF = IamDataFrame(
pd.DataFrame(
[
["model_a", "scen_a", "World", "Learning Rate", "", 0.5],
["model_a", "scen_b", "World", "Learning Rate", "", 0.5],
["model_a", "scen_c", "World", "Learning Rate", "", 0.5],
["model_a", "scen_d", "World", "Learning Rate", "", 0.25],
["model_a", "scen_e", "World", "Learning Rate", "", 0.75],
],
columns=IAMC_IDX + [2005],
)
)


@pytest.mark.parametrize("append", (False, True))
def test_learning_rate(append):
"""Check computing the learning rate"""

if append:
obs = TEST_DF.copy()
obs.compute.learning_rate("Learning Rate", "Cost", "Cap", append=True)
assert_iamframe_equal(TEST_DF.append(EXP_DF), obs)
else:
obs = TEST_DF.compute.learning_rate("Learning Rate", "Cost", "Cap")
assert_iamframe_equal(EXP_DF, obs)


@pytest.mark.parametrize("append", (False, True))
def test_learning_rate_empty(append):
"""Assert that computing the learning rate with invalid variables returns empty"""

if append:
obs = TEST_DF.copy()
obs.compute.learning_rate("Learning Rate", "foo", "Cap", append=True)
assert_iamframe_equal(TEST_DF, obs) # assert that no data was added
else:
obs = TEST_DF.compute.learning_rate("Learning Rate", "foo", "Cap")
assert obs.empty

0 comments on commit 8dc1d27

Please sign in to comment.