From 6b941ad7f693fbd745e4737ee1f7006bed8ab816 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 16 Jan 2022 15:51:16 +0100 Subject: [PATCH 01/13] Add growth-rate method to `timeseries` and `compute` modules --- pyam/compute.py | 35 +++++++++++++++++++++++++++++++++++ pyam/core.py | 2 +- pyam/timeseries.py | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/pyam/compute.py b/pyam/compute.py index 5e14a6834..5cc210512 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -1,5 +1,7 @@ import math import pandas as pd +from pyam.index import replace_index_values +from pyam.timeseries import growth_rate from pyam.utils import remove_from_list @@ -19,6 +21,39 @@ class IamComputeAccessor: def __init__(self, df): self._df = df + def growth_rate(self, mapping, append=False): + """Compute the annualized growth rate of a timeseries along the time dimension + + The growth rate parameter in period *t* is computed based on the changes + to the subsequent period, i.e., from period *t* to period *t+1*. + + Parameters + ---------- + mapping : dict + Mapping of *variable* item(s) to the name(s) of the computed data, + e.g., + + .. code-block:: python + + {"current variable": "name of growth-rate variable", ...} + + append : bool, optional + Whether to append computed timeseries data to this instance. + + Returns + ------- + :class:`IamDataFrame` or **None** + Computed timeseries data or None if `append=True`. + """ + value = ( + self._df._data[self._df._apply_filters(variable=mapping)] + .groupby(remove_from_list(self._df.dimensions, ["year"]), group_keys=False) + .apply(growth_rate) + ) + value.index = replace_index_values(value.index, "variable", mapping) + + return self._finalize(value, append=append) + def learning_rate(self, name, performance, experience, append=False): """Compute the implicit learning rate from timeseries data diff --git a/pyam/core.py b/pyam/core.py index 6c1283719..336f49eba 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -2141,7 +2141,7 @@ def diff(self, mapping, periods=1, append=False): Periods to shift for calculating difference, accepts negative values; passed to :meth:`pandas.DataFrame.diff`. append : bool, optional - Whether to append aggregated timeseries data to this instance. + Whether to append computed timeseries data to this instance. Returns ------- diff --git a/pyam/timeseries.py b/pyam/timeseries.py index a098b2746..1ad746a80 100644 --- a/pyam/timeseries.py +++ b/pyam/timeseries.py @@ -1,5 +1,7 @@ import logging +import math import numpy as np +import pandas as pd from pyam.utils import isstr, to_int logger = logging.getLogger(__name__) @@ -127,3 +129,37 @@ def cross_threshold( if return_type == int: return [y + 1 for y in map(int, years)] return years + + +def growth_rate(x): + """Compute the annualized growth rate from timeseries data + + The annualized growth rate parameter in period *t* is computed based on the changes + from period *t* to period *t+1*. + + Parameters + ---------- + x : :class:`pandas.Series` + Timeseries data indexed over the time domain. + + Returns + ------- + Indexed :class:`pandas.Series` of annualized growth rates + """ + + x = x.sort_index() + growth_rate = (-x.diff(periods=-1) / x).values + + if isinstance(x.index, pd.MultiIndex): + periods = x.index.get_level_values("year") + else: + periods = x.index + period_length = -pd.Series(periods).diff(periods=-1).values + + return pd.Series( + [ + math.copysign(math.pow(1 + abs(v), 1 / d) - 1, v) + for v, d in zip(growth_rate[:-1], period_length[:-1]) + ], + index=x.index[:-1], + ) From d8c04a08da681e2002b858c6b616cc6f1d8eb106 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 16 Jan 2022 16:56:10 +0100 Subject: [PATCH 02/13] Add `empty_series` utility --- pyam/compute.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index 5cc210512..6224101c8 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -125,12 +125,7 @@ def _compute_learning_rate(x, performance, experience): # return empty pd.Series if not all relevant variables exist if not all([v in x.index for v in [performance, experience]]): - names = remove_from_list(x.index.names, "variable") - empty_list = [[]] * len(names) - return pd.Series( - index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names), - dtype="float64", - ) + return empty_series(remove_from_list(x.index.names, "variable")) # compute the "experience parameter" (slope of experience curve on double-log scale) b = (x[performance] - x[performance].shift(periods=-1)) / ( @@ -139,3 +134,12 @@ def _compute_learning_rate(x, performance, experience): # translate to "learning rate" (e.g., cost reduction per doubling of capacity) return b.apply(lambda y: 1 - math.pow(2, y)) + + +def empty_series(names): + """Return an empty pd.Series with correct index names""" + empty_list = [[]] * len(names) + return pd.Series( + index=pd.MultiIndex(levels=empty_list, codes=empty_list, names=names), + dtype="float64", + ) From 05bb2018862d393b3b3f22be9bae27087aeeb09e Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 16 Jan 2022 17:01:15 +0100 Subject: [PATCH 03/13] Fix returned unit to dimensionless, add test for IamDataFrame behavior --- pyam/compute.py | 10 +++++-- tests/test_feature_growth_rate.py | 44 +++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 tests/test_feature_growth_rate.py diff --git a/pyam/compute.py b/pyam/compute.py index 6224101c8..f4e8d7992 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -50,9 +50,15 @@ def growth_rate(self, mapping, append=False): .groupby(remove_from_list(self._df.dimensions, ["year"]), group_keys=False) .apply(growth_rate) ) - value.index = replace_index_values(value.index, "variable", mapping) + if value.empty: + value = empty_series(remove_from_list(self._df.dimensions, "unit")) + else: + # drop level "unit" and reinsert below, replace "variable" + value.index = ( + replace_index_values(value.index.droplevel("unit"), "variable", mapping) + ) - return self._finalize(value, append=append) + return self._finalize(value, append=append, unit="") def learning_rate(self, name, performance, experience, append=False): """Compute the implicit learning rate from timeseries data diff --git a/tests/test_feature_growth_rate.py b/tests/test_feature_growth_rate.py new file mode 100644 index 000000000..8f3aab9a4 --- /dev/null +++ b/tests/test_feature_growth_rate.py @@ -0,0 +1,44 @@ +import pandas as pd +from pyam import IamDataFrame, IAMC_IDX +from pyam.testing import assert_iamframe_equal +import pytest + +from conftest import META_DF + + +EXP_DF = IamDataFrame( + pd.DataFrame( + [ + ["model_a", "scen_a", "World", "Growth Rate", "", 0.430969], + ["model_a", "scen_b", "World", "Growth Rate", "", 0.284735], + ], + columns=IAMC_IDX + [2005], + ), + meta=META_DF, +) + + +@pytest.mark.parametrize("append", (False, True)) +def test_learning_rate(test_df_year, append): + """Check computing the growth rate from an IamDataFrame""" + + if append: + obs = test_df_year.copy() + obs.compute.growth_rate({"Primary Energy": "Growth Rate"}, append=True) + assert_iamframe_equal(test_df_year.append(EXP_DF), obs) + else: + obs = test_df_year.compute.growth_rate({"Primary Energy": "Growth Rate"}) + assert_iamframe_equal(EXP_DF, obs) + + +@pytest.mark.parametrize("append", (False, True)) +def test_learning_rate_empty(test_df_year, append): + """Assert that computing the growth rate with invalid variables returns empty""" + + if append: + obs = test_df_year.copy() + obs.compute.growth_rate({"foo": "bar"}, append=True) + assert_iamframe_equal(test_df_year, obs) # assert that no data was added + else: + obs = test_df_year.compute.growth_rate({"foo": "bar"}) + assert obs.empty From 98bb81d16f6d16faf588d054f006df6dd623d82d Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 16 Jan 2022 22:05:30 +0100 Subject: [PATCH 04/13] Fix after rebase --- pyam/compute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyam/compute.py b/pyam/compute.py index f4e8d7992..9200a62ab 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -58,7 +58,7 @@ def growth_rate(self, mapping, append=False): replace_index_values(value.index.droplevel("unit"), "variable", mapping) ) - return self._finalize(value, append=append, unit="") + return self._df._finalize(value, append=append, unit="") def learning_rate(self, name, performance, experience, append=False): """Compute the implicit learning rate from timeseries data From 8e315946477cf9c04e2ab8e3b76203fee9ed3b2f Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 17 Jan 2022 15:16:06 +0100 Subject: [PATCH 05/13] Update documentation --- pyam/timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyam/timeseries.py b/pyam/timeseries.py index 1ad746a80..858219fbe 100644 --- a/pyam/timeseries.py +++ b/pyam/timeseries.py @@ -134,8 +134,8 @@ def cross_threshold( def growth_rate(x): """Compute the annualized growth rate from timeseries data - The annualized growth rate parameter in period *t* is computed based on the changes - from period *t* to period *t+1*. + The annualized growth rate parameter in period *t* is computed assuming exponential + growth based on the changes from period *t* to period *t+1*. Parameters ---------- From 150ec268fa016c43024573bf7eb303f2338bd509 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 17 Jan 2022 20:15:41 +0100 Subject: [PATCH 06/13] Fix math domain error --- pyam/compute.py | 5 +++++ pyam/timeseries.py | 19 ++++++++++++------- tests/test_feature_growth_rate.py | 21 +++++++++++++++++++-- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index 9200a62ab..9b7161c1f 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -44,6 +44,11 @@ def growth_rate(self, mapping, append=False): ------- :class:`IamDataFrame` or **None** Computed timeseries data or None if `append=True`. + + Raises + ------ + ValueError + Math domain error when timeseries crosses 0. """ value = ( self._df._data[self._df._apply_filters(variable=mapping)] diff --git a/pyam/timeseries.py b/pyam/timeseries.py index 858219fbe..a11f2109f 100644 --- a/pyam/timeseries.py +++ b/pyam/timeseries.py @@ -2,7 +2,7 @@ import math import numpy as np import pandas as pd -from pyam.utils import isstr, to_int +from pyam.utils import isstr, to_int, raise_data_error logger = logging.getLogger(__name__) @@ -145,21 +145,26 @@ def growth_rate(x): Returns ------- Indexed :class:`pandas.Series` of annualized growth rates + + Raises + ------ + ValueError + Math domain error when timeseries crosses 0. """ + if not (all([v > 0 for v in x.values]) or all([v < 0 for v in x.values])): + raise_data_error("Cannot compute growth rate when timeseries crosses 0", x) + x = x.sort_index() - growth_rate = (-x.diff(periods=-1) / x).values + growth_rate = (-x.diff(periods=-1) / x).values[:-1] # diff on latest period is nan if isinstance(x.index, pd.MultiIndex): periods = x.index.get_level_values("year") else: periods = x.index - period_length = -pd.Series(periods).diff(periods=-1).values + period_length = -pd.Series(periods).diff(periods=-1).values[:-1] return pd.Series( - [ - math.copysign(math.pow(1 + abs(v), 1 / d) - 1, v) - for v, d in zip(growth_rate[:-1], period_length[:-1]) - ], + [math.pow(1 + v, 1 / d) - 1 for v, d in zip(growth_rate, period_length)], index=x.index[:-1], ) diff --git a/tests/test_feature_growth_rate.py b/tests/test_feature_growth_rate.py index 8f3aab9a4..83e902b6a 100644 --- a/tests/test_feature_growth_rate.py +++ b/tests/test_feature_growth_rate.py @@ -1,6 +1,9 @@ +import math import pandas as pd +import pandas.testing as pdt from pyam import IamDataFrame, IAMC_IDX from pyam.testing import assert_iamframe_equal +from pyam.timeseries import growth_rate import pytest from conftest import META_DF @@ -19,7 +22,7 @@ @pytest.mark.parametrize("append", (False, True)) -def test_learning_rate(test_df_year, append): +def test_growth_rate(test_df_year, append): """Check computing the growth rate from an IamDataFrame""" if append: @@ -32,7 +35,7 @@ def test_learning_rate(test_df_year, append): @pytest.mark.parametrize("append", (False, True)) -def test_learning_rate_empty(test_df_year, append): +def test_growth_rate_empty(test_df_year, append): """Assert that computing the growth rate with invalid variables returns empty""" if append: @@ -42,3 +45,17 @@ def test_learning_rate_empty(test_df_year, append): else: obs = test_df_year.compute.growth_rate({"foo": "bar"}) assert obs.empty + + +@pytest.mark.parametrize("x2010", (1, 27, -3)) +@pytest.mark.parametrize("rates", ([0.05, 1.25], [0.5, -0.5])) +def test_growth_rate_timeseries(x2010, rates): + """Check several combinations of growth rates directly on the timeseries""" + + x2013 = x2010 * math.pow(1 + rates[0], 3) # 3 years: 2010 - 2013 + x2017 = x2013 * math.pow(1 + rates[1], 4) # 4 years: 2013 - 2017 + + pdt.assert_series_equal( + growth_rate(pd.Series([x2010, x2013, x2017], index=[2010, 2013, 2017])), + pd.Series(rates, index=[2010, 2013]), + ) From 72fc10ea34d5c5165e643e32f2c4030ce065f517 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 17 Jan 2022 20:19:13 +0100 Subject: [PATCH 07/13] Add a test for reaching/crossing 0 --- tests/test_feature_growth_rate.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_feature_growth_rate.py b/tests/test_feature_growth_rate.py index 83e902b6a..1cb70f32f 100644 --- a/tests/test_feature_growth_rate.py +++ b/tests/test_feature_growth_rate.py @@ -59,3 +59,11 @@ def test_growth_rate_timeseries(x2010, rates): growth_rate(pd.Series([x2010, x2013, x2017], index=[2010, 2013, 2017])), pd.Series(rates, index=[2010, 2013]), ) + + +@pytest.mark.parametrize("value", (0, -1)) +def test_growth_rate_timeseries_fails(value): + """Check that a timeseries reaching/crossing 0 raises""" + + with pytest.raises(ValueError, match="Cannot compute growth rate when*."): + growth_rate(pd.Series([1., value])) From c3096f81b93c8d2137a5821d92b95a4cd09d7e64 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 17 Jan 2022 20:28:45 +0100 Subject: [PATCH 08/13] Add to release notes --- RELEASE_NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 455812e01..2fbdf42ad 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,6 @@ # Next release +- [#604](https://github.com/IAMconsortium/pyam/pull/604) Add an annualized-growth-rate method - [#602](https://github.com/IAMconsortium/pyam/pull/602) Add a `compute` module/accessor and a learning-rate method - [#600](https://github.com/IAMconsortium/pyam/pull/600) Add a `diff()` method - [#592](https://github.com/IAMconsortium/pyam/pull/592) Fix for running in jupyter-lab notebooks From 54209e9171fcfaad3b5725254214c461a706821e Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Mon, 17 Jan 2022 20:29:22 +0100 Subject: [PATCH 09/13] Make black --- pyam/compute.py | 4 ++-- tests/test_feature_growth_rate.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyam/compute.py b/pyam/compute.py index 9b7161c1f..a3884834e 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -59,8 +59,8 @@ def growth_rate(self, mapping, append=False): value = empty_series(remove_from_list(self._df.dimensions, "unit")) else: # drop level "unit" and reinsert below, replace "variable" - value.index = ( - replace_index_values(value.index.droplevel("unit"), "variable", mapping) + value.index = replace_index_values( + value.index.droplevel("unit"), "variable", mapping ) return self._df._finalize(value, append=append, unit="") diff --git a/tests/test_feature_growth_rate.py b/tests/test_feature_growth_rate.py index 1cb70f32f..c03fd5ce4 100644 --- a/tests/test_feature_growth_rate.py +++ b/tests/test_feature_growth_rate.py @@ -66,4 +66,4 @@ def test_growth_rate_timeseries_fails(value): """Check that a timeseries reaching/crossing 0 raises""" with pytest.raises(ValueError, match="Cannot compute growth rate when*."): - growth_rate(pd.Series([1., value])) + growth_rate(pd.Series([1.0, value])) From 90537bf7b293f9bce5b66f9576b3f12b0ba0d8b0 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 23 Jan 2022 13:17:35 +0100 Subject: [PATCH 10/13] Add "See also" section --- pyam/compute.py | 5 +++++ pyam/timeseries.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/pyam/compute.py b/pyam/compute.py index a3884834e..2df944ea6 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -49,6 +49,11 @@ def growth_rate(self, mapping, append=False): ------ ValueError Math domain error when timeseries crosses 0. + + See Also + -------- + pyam.timeseries.growth_rate + """ value = ( self._df._data[self._df._apply_filters(variable=mapping)] diff --git a/pyam/timeseries.py b/pyam/timeseries.py index a11f2109f..6476a8fb1 100644 --- a/pyam/timeseries.py +++ b/pyam/timeseries.py @@ -150,6 +150,11 @@ def growth_rate(x): ------ ValueError Math domain error when timeseries crosses 0. + + See Also + -------- + pyam.IamComputeAccessor.growth_rate + """ if not (all([v > 0 for v in x.values]) or all([v < 0 for v in x.values])): From ddf52fca201014971e19ee007ddc99b5ca4801c0 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 23 Jan 2022 13:25:12 +0100 Subject: [PATCH 11/13] Change the title of the compute-docs --- doc/source/api.rst | 2 +- doc/source/api/compute.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index bc832f8ab..61d657b49 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -14,9 +14,9 @@ and methods. api/iamdataframe api/database api/filtering + api/compute api/plotting api/iiasa - api/compute api/statistics api/timeseries api/variables diff --git a/doc/source/api/compute.rst b/doc/source/api/compute.rst index 523104f3d..3ee86f9a2 100644 --- a/doc/source/api/compute.rst +++ b/doc/source/api/compute.rst @@ -1,7 +1,7 @@ .. currentmodule:: pyam -Computing indicators -==================== +Advanced timeseries indicators +============================== .. autoclass:: IamComputeAccessor :members: From 1be4512ec6d613416e60e73f42b3bb06f0289805 Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 23 Jan 2022 13:25:21 +0100 Subject: [PATCH 12/13] More minor fixes --- doc/source/api/compute.rst | 4 ++-- pyam/compute.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/api/compute.rst b/doc/source/api/compute.rst index 3ee86f9a2..8191b2135 100644 --- a/doc/source/api/compute.rst +++ b/doc/source/api/compute.rst @@ -1,7 +1,7 @@ .. currentmodule:: pyam -Advanced timeseries indicators -============================== +Derived timeseries data +======================= .. autoclass:: IamComputeAccessor :members: diff --git a/pyam/compute.py b/pyam/compute.py index 2df944ea6..dee80d268 100644 --- a/pyam/compute.py +++ b/pyam/compute.py @@ -35,7 +35,7 @@ def growth_rate(self, mapping, append=False): .. code-block:: python - {"current variable": "name of growth-rate variable", ...} + {"variable": "name of growth-rate variable", ...} append : bool, optional Whether to append computed timeseries data to this instance. From b91feff54cd564caf74182c021d394759258159b Mon Sep 17 00:00:00 2001 From: Daniel Huppmann Date: Sun, 23 Jan 2022 13:39:05 +0100 Subject: [PATCH 13/13] Pin pandas<1.4 as a quickfix --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 1af6d17bf..7bc347cad 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,7 +29,7 @@ install_requires = numpy >= 1.19.0 requests openpyxl - pandas >= 1.1.1 + pandas >= 1.1.1, < 1.4 pint <= 0.17 PyYAML matplotlib >= 3.2.0