diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 29344748d..e9874dce0 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,7 +7,8 @@ the attribute `_LONG_IDX` as deprecated. Please use `dimensions` instead. ## Individual updates -- [#559](https://github.com/IAMconsortium/pyam/pull/559) Add attribute dimensions, fix compatibility with pandas v1.3 +- [#560](https://github.com/IAMconsortium/pyam/pull/560) Add a feature to `swap_year_for_time()` +- [#559](https://github.com/IAMconsortium/pyam/pull/559) Add attribute `dimensions`, fix compatibility with pandas v1.3 - [#557](https://github.com/IAMconsortium/pyam/pull/557) Swap time for year keeping subannual resolution - [#556](https://github.com/IAMconsortium/pyam/pull/556) Set explicit minimum numpy version (1.19.0) diff --git a/doc/source/conf.py b/doc/source/conf.py index e6a0c3e70..b64265cd8 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -324,6 +324,7 @@ # Intersphinx configuration. intersphinx_mapping = { "python": ("https://docs.python.org/3", None), + "dateutil": ("https://dateutil.readthedocs.io/en/stable/", None), "numpy": ("https://numpy.org/doc/stable/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), "pint": ("https://pint.readthedocs.io/en/stable", None), diff --git a/pyam/__init__.py b/pyam/__init__.py index 86d95d45f..1aee585f2 100644 --- a/pyam/__init__.py +++ b/pyam/__init__.py @@ -10,6 +10,7 @@ from pyam.iiasa import read_iiasa from pyam.datareader import read_worldbank from pyam.unfccc import read_unfccc +from pyam.testing import assert_iamframe_equal from pyam.logging import defer_logging_config diff --git a/pyam/core.py b/pyam/core.py index 758a57149..2be98e10f 100755 --- a/pyam/core.py +++ b/pyam/core.py @@ -11,9 +11,6 @@ from pathlib import Path from tempfile import TemporaryDirectory -from pyam._debiasing import _compute_bias -from pyam.time import swap_time_for_year - try: from datapackage import Package @@ -57,7 +54,7 @@ _raise_data_error, ) from pyam.read_ixmp import read_ix -from pyam.plotting import PlotAccessor, mpl_args_to_meta_cols +from pyam.plotting import PlotAccessor from pyam._compare import _compare from pyam.aggregation import ( _aggregate, @@ -75,6 +72,8 @@ verify_index_integrity, replace_index_values, ) +from pyam.time import swap_time_for_year, swap_year_for_time +from pyam._debiasing import _compute_bias from pyam.logging import deprecation_warning logger = logging.getLogger(__name__) @@ -613,25 +612,67 @@ def interpolate(self, time, inplace=False, **kwargs): return ret def swap_time_for_year(self, subannual=False, inplace=False): - """Convert the `time` column to `year`. + """Convert the `time` dimension to `year` (as integer). Parameters ---------- subannual : bool, str or func, optional Merge non-year components of the "time" domain as new column "subannual". - Apply `strftime()` on the values of the "time" domain using `subannual` - as format (if a string) or using "%m-%d %H:%M%z" (if True). + Apply :meth:`strftime() ` on the values of the + "time" domain using `subannual` (if a string) or "%m-%d %H:%M%z" (if True). If it is a function, apply the function on the values of the "time" domain. inplace : bool, optional If True, do operation inplace and return None. + Returns + ------- + :class:`IamDataFrame` or **None** + Object with altered time domain or None if `inplace=True`. + Raises ------ ValueError "time" is not a column of `self.data` + + See Also + -------- + swap_year_for_time + """ return swap_time_for_year(self, subannual=subannual, inplace=inplace) + def swap_year_for_time(self, inplace=False): + """Convert the `year` and `subannual` dimensions to `time` (as datetime). + + The method applies :meth:`dateutil.parser.parse` on the combined columns + `year` and `subannual`: + + .. code-block:: python + + dateutil.parser.parse([f"{y}-{s}" for y, s in zip(year, subannual)]) + + Parameters + ---------- + inplace : bool, optional + If True, do operation inplace and return None. + + Returns + ------- + :class:`IamDataFrame` or **None** + Object with altered time domain or None if `inplace=True`. + + Raises + ------ + ValueError + "year" or "subannual" are not a column of `self.data` + + See Also + -------- + swap_time_for_year + + """ + return swap_year_for_time(self, inplace=inplace) + def as_pandas(self, meta_cols=True): """Return object as a pandas.DataFrame diff --git a/pyam/time.py b/pyam/time.py index f5a3dac78..7e7b67268 100644 --- a/pyam/time.py +++ b/pyam/time.py @@ -1,3 +1,4 @@ +import dateutil import pandas as pd from pyam.index import append_index_col from pyam.utils import _raise_data_error @@ -28,6 +29,7 @@ def swap_time_for_year(df, inplace, subannual=False): _subannual = time.apply(subannual) index = append_index_col(index, _subannual, "subannual") + ret.extra_cols.append("subannual") rows = index.duplicated() if any(rows): @@ -42,3 +44,33 @@ def swap_time_for_year(df, inplace, subannual=False): if not inplace: return ret + + +def swap_year_for_time(df, inplace): + """Internal implementation to swap 'year' domain to 'time' (as datetime)""" + + if not df.time_col == "year": + raise ValueError("Time domain must be 'year' to use this method") + if "subannual" not in df.extra_cols: + raise ValueError("Data must have a dimension 'subannual' to use this method") + + ret = df.copy() if not inplace else df + index = ret._data.index + order = [v if v != "year" else "time" for v in index.names].remove("subannual") + + time_cols = ["year", "subannual"] + time_values = zip(*[index.get_level_values(c) for c in time_cols]) + time = list(map(dateutil.parser.parse, [f"{y}-{s}" for y, s in time_values])) + + index = index.droplevel(["year", "subannual"]) + index = append_index_col(index, time, "time", order=order) + + # assign data and other attributes + ret._data.index = index + ret.extra_cols.remove("subannual") + ret.time_col = "time" + ret._set_attributes() + delattr(ret, "year") + + if not inplace: + return ret diff --git a/tests/test_time.py b/tests/test_time.py index 4d6a56ca4..acb2e2d7a 100644 --- a/tests/test_time.py +++ b/tests/test_time.py @@ -69,11 +69,21 @@ def test_swap_time_to_year_subannual(test_pd_df, columns, subannual, dates, inpl """Swap time column for year (int) keeping subannual resolution as extra-column""" test_pd_df.rename({2005: columns[0], 2010: columns[1]}, axis=1, inplace=True) - obs = IamDataFrame(test_pd_df).swap_time_for_year(subannual=subannual) + + # check swapping time for year + df = IamDataFrame(test_pd_df) + obs = df.swap_time_for_year(subannual=subannual, inplace=inplace) + + if inplace: + assert obs is None + obs = df exp = get_subannual_df(dates[0], dates[1]) assert_iamframe_equal(obs, exp) + # check that reverting using `swap_year_for_time` yields the original data + assert_iamframe_equal(obs.swap_year_for_time(), IamDataFrame(test_pd_df)) + def test_swap_time_to_year_errors(test_df): """Assert that swapping time column for year (int) raises the expected errors"""