From 2457149a3c67949192ed7e61383f9cbc8f92a64b Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Fri, 13 May 2022 09:03:55 +0200 Subject: [PATCH 1/6] add offset method to compute offsets relative to base-year values --- pyam/core.py | 40 ++++++++++++++++++++++++++++++++++++++++ tests/test_core.py | 19 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/pyam/core.py b/pyam/core.py index 55a86af0d..2315d33ef 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1269,6 +1269,46 @@ def normalize(self, inplace=False, **kwargs): if not inplace: return ret + def offset(self, padding=0, inplace=False, **kwargs): + """Compute new data which is offset from a specific data point + + For example, offsetting from `year=2005` will provide data + *relative* to `year=2005` such that the value in 2005 is 0 and + all other values `value[year] - value[2005]`. + + Conceptually this operation performs as: + ``` + df - df.filter(**kwargs) + padding + ``` + + Note: Currently only supports normalizing to a specific time. + + Parameters + ---------- + padding : float, optional + an additional offset padding + inplace : bool, optional + if :obj:`True`, do operation inplace and return None + kwargs + the column and value on which to offset (e.g., `year=2005`) + """ + if len(kwargs) > 1 or self.time_col not in kwargs: + raise ValueError("Only time(year)-based normalization supported") + ret = self.copy() if not inplace else self + df = ret.data + # change all below if supporting more in the future + cols = self.time_col + value = kwargs[self.time_col] + x = df.set_index(IAMC_IDX) + x["value"] -= x[x[cols] == value]["value"] + x["value"] += padding + + x = x.reset_index() + ret._data = x.set_index(self.dimensions).value + + if not inplace: + return ret + def aggregate( self, variable, diff --git a/tests/test_core.py b/tests/test_core.py index f87d9c173..ee429cbc1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -789,3 +789,22 @@ def test_normalize(test_df): def test_normalize_not_time(test_df): pytest.raises(ValueError, test_df.normalize, variable="foo") pytest.raises(ValueError, test_df.normalize, year=2015, variable="foo") + +@pytest.mark.parametrize("padding", [0, 2]) +def test_offset(test_df, padding): + exp = test_df.data.copy().reset_index(drop=True) + exp.loc[1::2, "value"] -= exp["value"][::2].values - padding + exp.loc[::2, "value"] -= exp["value"][::2].values - padding + # only call with kwarg if padding != 0 (the default) + kwargs = {'padding': padding} if padding else {} + if "year" in test_df.data: + obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True) + else: + obs = test_df.offset(time=datetime.datetime(2005, 6, 17), **kwargs).data.reset_index( + drop=True + ) + pd.testing.assert_frame_equal(obs, exp) + +def test_offset_not_time(test_df): + pytest.raises(ValueError, test_df.offset, variable="foo") + pytest.raises(ValueError, test_df.offset, year=2015, variable="foo") \ No newline at end of file From e5a8ceb110c7c8c2600f4af343bb8bdbcc6ef12a Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Fri, 13 May 2022 09:08:31 +0200 Subject: [PATCH 2/6] blacking --- pyam/core.py | 2 +- tests/test_core.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index 2315d33ef..c140a3700 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1273,7 +1273,7 @@ def offset(self, padding=0, inplace=False, **kwargs): """Compute new data which is offset from a specific data point For example, offsetting from `year=2005` will provide data - *relative* to `year=2005` such that the value in 2005 is 0 and + *relative* to `year=2005` such that the value in 2005 is 0 and all other values `value[year] - value[2005]`. Conceptually this operation performs as: diff --git a/tests/test_core.py b/tests/test_core.py index ee429cbc1..f13973b8c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -790,21 +790,23 @@ def test_normalize_not_time(test_df): pytest.raises(ValueError, test_df.normalize, variable="foo") pytest.raises(ValueError, test_df.normalize, year=2015, variable="foo") + @pytest.mark.parametrize("padding", [0, 2]) def test_offset(test_df, padding): exp = test_df.data.copy().reset_index(drop=True) exp.loc[1::2, "value"] -= exp["value"][::2].values - padding exp.loc[::2, "value"] -= exp["value"][::2].values - padding # only call with kwarg if padding != 0 (the default) - kwargs = {'padding': padding} if padding else {} + kwargs = {"padding": padding} if padding else {} if "year" in test_df.data: obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True) else: - obs = test_df.offset(time=datetime.datetime(2005, 6, 17), **kwargs).data.reset_index( - drop=True - ) + obs = test_df.offset( + time=datetime.datetime(2005, 6, 17), **kwargs + ).data.reset_index(drop=True) pd.testing.assert_frame_equal(obs, exp) + def test_offset_not_time(test_df): pytest.raises(ValueError, test_df.offset, variable="foo") - pytest.raises(ValueError, test_df.offset, year=2015, variable="foo") \ No newline at end of file + pytest.raises(ValueError, test_df.offset, year=2015, variable="foo") From e23d99950e63c4c2a7741e0ce8281bb2982d8b89 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Fri, 20 May 2022 10:35:51 +0200 Subject: [PATCH 3/6] apply faster set_index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jonas Hörsch --- pyam/core.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index c140a3700..076dc7e99 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1295,16 +1295,10 @@ def offset(self, padding=0, inplace=False, **kwargs): if len(kwargs) > 1 or self.time_col not in kwargs: raise ValueError("Only time(year)-based normalization supported") ret = self.copy() if not inplace else self - df = ret.data - # change all below if supporting more in the future - cols = self.time_col + data = ret._data value = kwargs[self.time_col] - x = df.set_index(IAMC_IDX) - x["value"] -= x[x[cols] == value]["value"] - x["value"] += padding - - x = x.reset_index() - ret._data = x.set_index(self.dimensions).value + base_value = data.loc[data.index.isin([value], level=self.time_col)].droplevel(self.time_col) + ret._data = data - base_value + padding if not inplace: return ret From 0aaeb811ca0b68ec11589da1fe9c44574b4ccda5 Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Fri, 20 May 2022 10:39:26 +0200 Subject: [PATCH 4/6] provide a fill_value --- pyam/core.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index 076dc7e99..90b00371d 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1269,7 +1269,7 @@ def normalize(self, inplace=False, **kwargs): if not inplace: return ret - def offset(self, padding=0, inplace=False, **kwargs): + def offset(self, padding=0, fill_value=None, inplace=False, **kwargs): """Compute new data which is offset from a specific data point For example, offsetting from `year=2005` will provide data @@ -1287,6 +1287,9 @@ def offset(self, padding=0, inplace=False, **kwargs): ---------- padding : float, optional an additional offset padding + fill_value : float or None, optional + Applied on subtraction. Fills exisiting missing (NaN) values. + See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.subtract.html inplace : bool, optional if :obj:`True`, do operation inplace and return None kwargs @@ -1297,8 +1300,10 @@ def offset(self, padding=0, inplace=False, **kwargs): ret = self.copy() if not inplace else self data = ret._data value = kwargs[self.time_col] - base_value = data.loc[data.index.isin([value], level=self.time_col)].droplevel(self.time_col) - ret._data = data - base_value + padding + base_value = data.loc[data.index.isin([value], level=self.time_col)].droplevel( + self.time_col + ) + ret._data = data.subtract(base_value, fill_value=fill_value) + padding if not inplace: return ret From a23385a50c4a003e5796dca9d9874b56635c60eb Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Fri, 20 May 2022 10:40:37 +0200 Subject: [PATCH 5/6] add to release notes --- RELEASE_NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 6654c69cc..b3b370dd6 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -1,5 +1,6 @@ # Next release +- [#659](https://github.com/IAMconsortium/pyam/pull/659) Add an `offset` method - [#657](https://github.com/IAMconsortium/pyam/pull/657) Add an `IamSlice` class # Release v1.4.0 From b203f5669bad2768dff8445eea441cfe2ba6b55b Mon Sep 17 00:00:00 2001 From: Matthew Gidden Date: Fri, 20 May 2022 10:42:23 +0200 Subject: [PATCH 6/6] stickler --- pyam/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyam/core.py b/pyam/core.py index 90b00371d..543c6cc8a 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -1288,8 +1288,8 @@ def offset(self, padding=0, fill_value=None, inplace=False, **kwargs): padding : float, optional an additional offset padding fill_value : float or None, optional - Applied on subtraction. Fills exisiting missing (NaN) values. - See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.subtract.html + Applied on subtraction. Fills exisiting missing (NaN) values. See + https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.subtract.html inplace : bool, optional if :obj:`True`, do operation inplace and return None kwargs