Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an "annualized growth rate" feature #604

Merged
merged 13 commits into from
Jan 23, 2022
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Next release

- [#604](https://github.com/IAMconsortium/pyam/pull/604) Add an annualized-growth-rate method
- [#602](https://github.com/IAMconsortium/pyam/pull/602) Add a `compute` module/accessor and a learning-rate method
- [#600](https://github.com/IAMconsortium/pyam/pull/600) Add a `diff()` method
- [#592](https://github.com/IAMconsortium/pyam/pull/592) Fix for running in jupyter-lab notebooks
Expand Down
2 changes: 1 addition & 1 deletion doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ and methods.
api/iamdataframe
api/database
api/filtering
api/compute
api/plotting
api/iiasa
api/compute
api/statistics
api/timeseries
api/variables
Expand Down
4 changes: 2 additions & 2 deletions doc/source/api/compute.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.. currentmodule:: pyam

Computing indicators
====================
Derived timeseries data
=======================

.. autoclass:: IamComputeAccessor
:members:
67 changes: 61 additions & 6 deletions pyam/compute.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import math
import pandas as pd
from pyam.index import replace_index_values
from pyam.timeseries import growth_rate
from pyam.utils import remove_from_list


Expand All @@ -19,6 +21,55 @@ class IamComputeAccessor:
def __init__(self, df):
self._df = df

def growth_rate(self, mapping, append=False):
"""Compute the annualized growth rate of a timeseries along the time dimension

The growth rate parameter in period *t* is computed based on the changes
to the subsequent period, i.e., from period *t* to period *t+1*.

Parameters
----------
mapping : dict
Mapping of *variable* item(s) to the name(s) of the computed data,
e.g.,

.. code-block:: python

{"variable": "name of growth-rate variable", ...}

append : bool, optional
Whether to append computed timeseries data to this instance.

Returns
-------
:class:`IamDataFrame` or **None**
Computed timeseries data or None if `append=True`.

Raises
------
ValueError
Math domain error when timeseries crosses 0.

See Also
--------
pyam.timeseries.growth_rate

"""
value = (
self._df._data[self._df._apply_filters(variable=mapping)]
.groupby(remove_from_list(self._df.dimensions, ["year"]), group_keys=False)
.apply(growth_rate)
)
if value.empty:
value = empty_series(remove_from_list(self._df.dimensions, "unit"))
else:
# drop level "unit" and reinsert below, replace "variable"
value.index = replace_index_values(
value.index.droplevel("unit"), "variable", mapping
)

return self._df._finalize(value, append=append, unit="")

def learning_rate(self, name, performance, experience, append=False):
"""Compute the implicit learning rate from timeseries data

Expand Down Expand Up @@ -90,12 +141,7 @@ def _compute_learning_rate(x, performance, experience):

# return empty pd.Series if not all relevant variables exist
if not all([v in x.index for v in [performance, experience]]):
names = remove_from_list(x.index.names, "variable")
empty_list = [[]] * len(names)
return pd.Series(
index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names),
dtype="float64",
)
return empty_series(remove_from_list(x.index.names, "variable"))

# compute the "experience parameter" (slope of experience curve on double-log scale)
b = (x[performance] - x[performance].shift(periods=-1)) / (
Expand All @@ -104,3 +150,12 @@ def _compute_learning_rate(x, performance, experience):

# translate to "learning rate" (e.g., cost reduction per doubling of capacity)
return b.apply(lambda y: 1 - math.pow(2, y))


def empty_series(names):
"""Return an empty pd.Series with correct index names"""
empty_list = [[]] * len(names)
return pd.Series(
index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names),
dtype="float64",
)
2 changes: 1 addition & 1 deletion pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2141,7 +2141,7 @@ def diff(self, mapping, periods=1, append=False):
Periods to shift for calculating difference, accepts negative values;
passed to :meth:`pandas.DataFrame.diff`.
append : bool, optional
Whether to append aggregated timeseries data to this instance.
Whether to append computed timeseries data to this instance.

Returns
-------
Expand Down
48 changes: 47 additions & 1 deletion pyam/timeseries.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
import math
import numpy as np
from pyam.utils import isstr, to_int
import pandas as pd
from pyam.utils import isstr, to_int, raise_data_error

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -127,3 +129,47 @@ def cross_threshold(
if return_type == int:
return [y + 1 for y in map(int, years)]
return years


def growth_rate(x):
"""Compute the annualized growth rate from timeseries data

The annualized growth rate parameter in period *t* is computed assuming exponential
growth based on the changes from period *t* to period *t+1*.

Parameters
----------
x : :class:`pandas.Series`
Timeseries data indexed over the time domain.

Returns
-------
Indexed :class:`pandas.Series` of annualized growth rates

Raises
------
ValueError
Math domain error when timeseries crosses 0.

See Also
--------
pyam.IamComputeAccessor.growth_rate

"""

if not (all([v > 0 for v in x.values]) or all([v < 0 for v in x.values])):
raise_data_error("Cannot compute growth rate when timeseries crosses 0", x)

x = x.sort_index()
growth_rate = (-x.diff(periods=-1) / x).values[:-1] # diff on latest period is nan

if isinstance(x.index, pd.MultiIndex):
periods = x.index.get_level_values("year")
else:
periods = x.index
period_length = -pd.Series(periods).diff(periods=-1).values[:-1]

return pd.Series(
[math.pow(1 + v, 1 / d) - 1 for v, d in zip(growth_rate, period_length)],
index=x.index[:-1],
)
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ install_requires =
numpy >= 1.19.0
requests
openpyxl
pandas >= 1.1.1
pandas >= 1.1.1, < 1.4
pint <= 0.17
PyYAML
matplotlib >= 3.2.0
Expand Down
69 changes: 69 additions & 0 deletions tests/test_feature_growth_rate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import math
import pandas as pd
import pandas.testing as pdt
from pyam import IamDataFrame, IAMC_IDX
from pyam.testing import assert_iamframe_equal
from pyam.timeseries import growth_rate
import pytest

from conftest import META_DF


EXP_DF = IamDataFrame(
pd.DataFrame(
[
["model_a", "scen_a", "World", "Growth Rate", "", 0.430969],
["model_a", "scen_b", "World", "Growth Rate", "", 0.284735],
],
columns=IAMC_IDX + [2005],
),
meta=META_DF,
)


@pytest.mark.parametrize("append", (False, True))
def test_growth_rate(test_df_year, append):
"""Check computing the growth rate from an IamDataFrame"""

if append:
obs = test_df_year.copy()
obs.compute.growth_rate({"Primary Energy": "Growth Rate"}, append=True)
assert_iamframe_equal(test_df_year.append(EXP_DF), obs)
else:
obs = test_df_year.compute.growth_rate({"Primary Energy": "Growth Rate"})
assert_iamframe_equal(EXP_DF, obs)


@pytest.mark.parametrize("append", (False, True))
def test_growth_rate_empty(test_df_year, append):
"""Assert that computing the growth rate with invalid variables returns empty"""

if append:
obs = test_df_year.copy()
obs.compute.growth_rate({"foo": "bar"}, append=True)
assert_iamframe_equal(test_df_year, obs) # assert that no data was added
else:
obs = test_df_year.compute.growth_rate({"foo": "bar"})
assert obs.empty


@pytest.mark.parametrize("x2010", (1, 27, -3))
@pytest.mark.parametrize("rates", ([0.05, 1.25], [0.5, -0.5]))
def test_growth_rate_timeseries(x2010, rates):
"""Check several combinations of growth rates directly on the timeseries"""

x2013 = x2010 * math.pow(1 + rates[0], 3) # 3 years: 2010 - 2013
x2017 = x2013 * math.pow(1 + rates[1], 4) # 4 years: 2013 - 2017

pdt.assert_series_equal(
growth_rate(pd.Series([x2010, x2013, x2017], index=[2010, 2013, 2017])),
pd.Series(rates, index=[2010, 2013]),
)


@pytest.mark.parametrize("value", (0, -1))
def test_growth_rate_timeseries_fails(value):
"""Check that a timeseries reaching/crossing 0 raises"""

with pytest.raises(ValueError, match="Cannot compute growth rate when*."):
growth_rate(pd.Series([1.0, value]))