From 2457149a3c67949192ed7e61383f9cbc8f92a64b Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 13 May 2022 09:03:55 +0200
Subject: [PATCH 1/6] add offset method to compute offsets relative to
 base-year values

---
 pyam/core.py       | 40 ++++++++++++++++++++++++++++++++++++++++
 tests/test_core.py | 19 +++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/pyam/core.py b/pyam/core.py
index 55a86af0d..2315d33ef 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1269,6 +1269,46 @@ def normalize(self, inplace=False, **kwargs):
         if not inplace:
             return ret
 
+    def offset(self, padding=0, inplace=False, **kwargs):
+        """Compute new data which is offset from a specific data point
+
+        For example, offsetting from `year=2005` will provide data
+        *relative* to `year=2005` such that the value in 2005 is 0 and 
+        all other values `value[year] - value[2005]`.
+
+        Conceptually this operation performs as:
+        ```
+        df - df.filter(**kwargs) + padding
+        ```
+
+        Note: Currently only supports normalizing to a specific time.
+
+        Parameters
+        ----------
+        padding : float, optional
+            an additional offset padding
+        inplace : bool, optional
+            if :obj:`True`, do operation inplace and return None
+        kwargs
+            the column and value on which to offset (e.g., `year=2005`)
+        """
+        if len(kwargs) > 1 or self.time_col not in kwargs:
+            raise ValueError("Only time(year)-based normalization supported")
+        ret = self.copy() if not inplace else self
+        df = ret.data
+        # change all below if supporting more in the future
+        cols = self.time_col
+        value = kwargs[self.time_col]
+        x = df.set_index(IAMC_IDX)
+        x["value"] -= x[x[cols] == value]["value"]
+        x["value"] += padding
+
+        x = x.reset_index()
+        ret._data = x.set_index(self.dimensions).value
+
+        if not inplace:
+            return ret
+
     def aggregate(
         self,
         variable,
diff --git a/tests/test_core.py b/tests/test_core.py
index f87d9c173..ee429cbc1 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -789,3 +789,22 @@ def test_normalize(test_df):
 def test_normalize_not_time(test_df):
     pytest.raises(ValueError, test_df.normalize, variable="foo")
     pytest.raises(ValueError, test_df.normalize, year=2015, variable="foo")
+
+@pytest.mark.parametrize("padding", [0, 2])
+def test_offset(test_df, padding):
+    exp = test_df.data.copy().reset_index(drop=True)
+    exp.loc[1::2, "value"] -= exp["value"][::2].values - padding
+    exp.loc[::2, "value"] -= exp["value"][::2].values - padding
+    # only call with kwarg if padding != 0 (the default)
+    kwargs = {'padding': padding} if padding else {} 
+    if "year" in test_df.data:
+        obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True)
+    else:
+        obs = test_df.offset(time=datetime.datetime(2005, 6, 17), **kwargs).data.reset_index(
+            drop=True
+        )
+    pd.testing.assert_frame_equal(obs, exp)
+
+def test_offset_not_time(test_df):
+    pytest.raises(ValueError, test_df.offset, variable="foo")
+    pytest.raises(ValueError, test_df.offset, year=2015, variable="foo")
\ No newline at end of file

From e5a8ceb110c7c8c2600f4af343bb8bdbcc6ef12a Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 13 May 2022 09:08:31 +0200
Subject: [PATCH 2/6] blacking

---
 pyam/core.py       |  2 +-
 tests/test_core.py | 12 +++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 2315d33ef..c140a3700 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1273,7 +1273,7 @@ def offset(self, padding=0, inplace=False, **kwargs):
         """Compute new data which is offset from a specific data point
 
         For example, offsetting from `year=2005` will provide data
-        *relative* to `year=2005` such that the value in 2005 is 0 and 
+        *relative* to `year=2005` such that the value in 2005 is 0 and
         all other values `value[year] - value[2005]`.
 
         Conceptually this operation performs as:
diff --git a/tests/test_core.py b/tests/test_core.py
index ee429cbc1..f13973b8c 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -790,21 +790,23 @@ def test_normalize_not_time(test_df):
     pytest.raises(ValueError, test_df.normalize, variable="foo")
     pytest.raises(ValueError, test_df.normalize, year=2015, variable="foo")
 
+
 @pytest.mark.parametrize("padding", [0, 2])
 def test_offset(test_df, padding):
     exp = test_df.data.copy().reset_index(drop=True)
     exp.loc[1::2, "value"] -= exp["value"][::2].values - padding
     exp.loc[::2, "value"] -= exp["value"][::2].values - padding
     # only call with kwarg if padding != 0 (the default)
-    kwargs = {'padding': padding} if padding else {} 
+    kwargs = {"padding": padding} if padding else {}
     if "year" in test_df.data:
         obs = test_df.offset(year=2005, **kwargs).data.reset_index(drop=True)
     else:
-        obs = test_df.offset(time=datetime.datetime(2005, 6, 17), **kwargs).data.reset_index(
-            drop=True
-        )
+        obs = test_df.offset(
+            time=datetime.datetime(2005, 6, 17), **kwargs
+        ).data.reset_index(drop=True)
     pd.testing.assert_frame_equal(obs, exp)
 
+
 def test_offset_not_time(test_df):
     pytest.raises(ValueError, test_df.offset, variable="foo")
-    pytest.raises(ValueError, test_df.offset, year=2015, variable="foo")
\ No newline at end of file
+    pytest.raises(ValueError, test_df.offset, year=2015, variable="foo")

From e23d99950e63c4c2a7741e0ce8281bb2982d8b89 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 20 May 2022 10:35:51 +0200
Subject: [PATCH 3/6] apply faster set_index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jonas Hörsch <coroa@posteo.de>
---
 pyam/core.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index c140a3700..076dc7e99 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1295,16 +1295,10 @@ def offset(self, padding=0, inplace=False, **kwargs):
         if len(kwargs) > 1 or self.time_col not in kwargs:
             raise ValueError("Only time(year)-based normalization supported")
         ret = self.copy() if not inplace else self
-        df = ret.data
-        # change all below if supporting more in the future
-        cols = self.time_col
+        data = ret._data
         value = kwargs[self.time_col]
-        x = df.set_index(IAMC_IDX)
-        x["value"] -= x[x[cols] == value]["value"]
-        x["value"] += padding
-
-        x = x.reset_index()
-        ret._data = x.set_index(self.dimensions).value
+        base_value = data.loc[data.index.isin([value], level=self.time_col)].droplevel(self.time_col)
+        ret._data = data - base_value + padding
 
         if not inplace:
             return ret

From 0aaeb811ca0b68ec11589da1fe9c44574b4ccda5 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 20 May 2022 10:39:26 +0200
Subject: [PATCH 4/6] provide a fill_value

---
 pyam/core.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 076dc7e99..90b00371d 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1269,7 +1269,7 @@ def normalize(self, inplace=False, **kwargs):
         if not inplace:
             return ret
 
-    def offset(self, padding=0, inplace=False, **kwargs):
+    def offset(self, padding=0, fill_value=None, inplace=False, **kwargs):
         """Compute new data which is offset from a specific data point
 
         For example, offsetting from `year=2005` will provide data
@@ -1287,6 +1287,9 @@ def offset(self, padding=0, inplace=False, **kwargs):
         ----------
         padding : float, optional
             an additional offset padding
+        fill_value : float or None, optional
+            Applied on subtraction. Fills exisiting missing (NaN) values.
+            See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.subtract.html
         inplace : bool, optional
             if :obj:`True`, do operation inplace and return None
         kwargs
@@ -1297,8 +1300,10 @@ def offset(self, padding=0, inplace=False, **kwargs):
         ret = self.copy() if not inplace else self
         data = ret._data
         value = kwargs[self.time_col]
-        base_value = data.loc[data.index.isin([value], level=self.time_col)].droplevel(self.time_col)
-        ret._data = data - base_value + padding
+        base_value = data.loc[data.index.isin([value], level=self.time_col)].droplevel(
+            self.time_col
+        )
+        ret._data = data.subtract(base_value, fill_value=fill_value) + padding
 
         if not inplace:
             return ret

From a23385a50c4a003e5796dca9d9874b56635c60eb Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 20 May 2022 10:40:37 +0200
Subject: [PATCH 5/6] add to release notes

---
 RELEASE_NOTES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 6654c69cc..b3b370dd6 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -1,5 +1,6 @@
 # Next release
 
+- [#659](https://github.com/IAMconsortium/pyam/pull/659) Add an `offset` method
 - [#657](https://github.com/IAMconsortium/pyam/pull/657) Add an `IamSlice` class
 
 # Release v1.4.0

From b203f5669bad2768dff8445eea441cfe2ba6b55b Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 20 May 2022 10:42:23 +0200
Subject: [PATCH 6/6] stickler

---
 pyam/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 90b00371d..543c6cc8a 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -1288,8 +1288,8 @@ def offset(self, padding=0, fill_value=None, inplace=False, **kwargs):
         padding : float, optional
             an additional offset padding
         fill_value : float or None, optional
-            Applied on subtraction. Fills exisiting missing (NaN) values.
-            See https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.subtract.html
+            Applied on subtraction. Fills exisiting missing (NaN) values. See
+            https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.subtract.html
         inplace : bool, optional
             if :obj:`True`, do operation inplace and return None
         kwargs