fix: Bump pandas to the latest version

Bump pandas and update tests to handle the latest version of pandas. This also allows the use of newer versions of Python during CI. BREAKING CHANGE: Removes support for Python 3.7
openscm · Apr 3, 2023 · 49254de · 49254de
1 parent 0e63108
commit 49254de
Show file tree

Hide file tree

Showing 9 changed files with 64 additions and 40 deletions.
diff --git a/.github/workflows/ci-cd-workflow.yml b/.github/workflows/ci-cd-workflow.yml
@@ -55,12 +55,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: [3.7, 3.8, 3.9]
-        exclude:
-          # netCDF4>=1.6.0 doesn't include prebuilt wheels for windows/Python3.7
-          # Older netCDF4 or self-built versions will likely work
-          - os: windows-latest
-            python-version: 3.7
+        python-version: [3.8, 3.9, '3.10', 3.11]
     steps:
     - name: Checkout repository
       uses: actions/checkout@v3
@@ -92,7 +87,7 @@ jobs:
       run: |
           pytest tests -r a --cov=scmdata --cov-report=term-missing --cov-fail-under=$env:MIN_COVERAGE
     - name: Upload coverage to Codecov
-      if: startsWith(runner.os, 'Linux') && matrix.python-version == 3.7
+      if: startsWith(runner.os, 'Linux') && matrix.python-version == 3.11
       uses: codecov/codecov-action@v3
       with:
         file: ./coverage.xml
@@ -101,7 +96,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7]
+        python-version: [3.9]
         pandas-version: [1.0.5, 1.1]
 
     steps:
@@ -134,7 +129,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [ 3.7 ]
+        python-version: [ 3.9 ]
         xarray-version: [ 0.16.2, 0.17.0, 0.18.2 ]
 
     steps:
@@ -170,7 +165,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7]
+        python-version: [3.9]
 
     steps:
       - name: Checkout repository
@@ -201,7 +196,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, 3.11]
 
     steps:
     - name: Checkout repository
@@ -226,7 +221,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, '3.10', 3.11]
 
     steps:
     - name: Checkout repository

diff --git a/.github/workflows/test-conda-install.yml b/.github/workflows/test-conda-install.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, '3.10', 3.11]
 
     steps:
     - name: Setup conda

diff --git a/.github/workflows/test-pypi-install.yml b/.github/workflows/test-pypi-install.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         os: ["ubuntu-latest", "macos-latest", "windows-latest"]
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, '3.10', 3.11]
 
     steps:
     - name: Setup python

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -4,6 +4,7 @@ Changelog
 master
 ------
 
+- (`#223 <https://github.com/openscm/scmdata/pull/223>`_) Loosen the pandas requirement to cover pandas>=1.4.3. Also support officially support Python 3.10 and 3.11
 - (`#222 <https://github.com/openscm/scmdata/pull/222>`_) Decrease the minimum number of time points for interpolation to 2
 - (`#221 <https://github.com/openscm/scmdata/pull/221>`_) Add option to :func:`scmdata.ScmRun.interpolate` to allow for interpolation which ignores leap-years. This also fixes a bug where :func:`scmdata.ScmRun.interpolate` converts integer values into unix time. This functionality isn't consistent with the behaviour of the TimePoints class where integers are converted into years.
 - (`#218 <https://github.com/openscm/scmdata/pull/218>`_) Replaced internal calls to :func:`scmdata.groupby.RunGroupby.map` with :func:`scmdata.groupby.RunGroupby.apply`

diff --git a/setup.cfg b/setup.cfg
@@ -20,9 +20,10 @@ classifiers =
     License :: OSI Approved :: BSD License
     Intended Audience :: Developers
     Operating System :: OS Independent
-    Programming Language :: Python :: 3.7
     Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
 
 [options]
 packages = find:
@@ -34,7 +35,7 @@ install_requires =
     numpy
     openscm-units
     packaging
-    pandas>=1.0.4, <=1.4.2
+    pandas>=1.0.4, <2
     pint<0.20
     pint-pandas
     python-dateutil

diff --git a/src/scmdata/run.py b/src/scmdata/run.py
@@ -1715,7 +1715,7 @@ def process_over(
             ts.index = pd.MultiIndex.from_frame(ts_idx.fillna(na_override))
 
         group_cols = list(set(ts.index.names) - set(cols))
-        grouper = ts.groupby(group_cols)
+        grouper = ts.groupby(group_cols, group_keys=False)
 
         # This is a subset of the available functions
         #  https://pandas.pydata.org/pandas-docs/stable/reference/groupby.html
@@ -2537,7 +2537,7 @@ def run_append(
         # Convert back from str
         for c in nan_cols[nan_cols].index:
             new_meta[c].replace("nan", np.nan, inplace=True)
-        for c, dtype in orig_dtypes.iteritems():
+        for c, dtype in orig_dtypes.items():
             new_meta[c] = new_meta[c].astype(dtype)
 
         ret._meta = pd.MultiIndex.from_frame(new_meta.astype("category"))

diff --git a/tests/integration/test_processing.py b/tests/integration/test_processing.py
@@ -414,7 +414,9 @@ def test_exceedance_probabilities_over_time(
     exp = pd.DataFrame(
         np.array(exp_vals)[np.newaxis, :],
         index=exp_idx,
-        columns=test_processing_scm_df.time_points.to_index(),
+        # This forces the coercion to a DateTimeIndex which now happens automatically for
+        # pandas>=1.4.3
+        columns=pd.Index(test_processing_scm_df.time_points.to_index()),
     )
     exp.index = exp.index.set_levels(
         [_get_calculate_exeedance_probs_expected_name(output_name, threshold)],
@@ -424,7 +426,7 @@ def test_exceedance_probabilities_over_time(
         level="unit",
     )
 
-    pdt.assert_frame_equal(res, exp, check_like=True, check_column_type=False)
+    pdt.assert_frame_equal(res, exp, check_like=True, check_names=False)
 
 
 def test_exceedance_probabilities_over_time_multiple_res(
@@ -444,7 +446,11 @@ def test_exceedance_probabilities_over_time_multiple_res(
         start.meta.drop(["ensemble_member"], axis="columns").drop_duplicates()
     )
 
-    exp = pd.DataFrame(exp_vals, index=exp_idx, columns=start.time_points.to_index())
+    exp = pd.DataFrame(
+        exp_vals,
+        index=exp_idx,
+        columns=pd.Index(start.time_points.to_index()),
+    )
     exp.index = exp.index.set_levels(
         [_get_calculate_exeedance_probs_expected_name(None, threshold)],
         level="variable",
@@ -453,7 +459,7 @@ def test_exceedance_probabilities_over_time_multiple_res(
         level="unit",
     )
 
-    pdt.assert_frame_equal(res, exp, check_like=True, check_column_type=False)
+    pdt.assert_frame_equal(res, exp, check_like=True, check_names=False)
 
 
 def test_exceedance_probabilities_over_time_multiple_grouping(
@@ -478,7 +484,7 @@ def test_exceedance_probabilities_over_time_multiple_grouping(
     exp = pd.DataFrame(
         exp_vals[np.newaxis, :],
         index=exp_idx,
-        columns=start.time_points.to_index(),
+        columns=pd.Index(start.time_points.to_index()),
     )
     exp.index = exp.index.set_levels(
         [_get_calculate_exeedance_probs_expected_name(None, threshold)],

diff --git a/tests/unit/test_ops.py b/tests/unit/test_ops.py
@@ -99,7 +99,7 @@ def test_no_scipy(scm_run):
     with pytest.raises(
         ImportError, match="scipy is not installed. Run 'pip install scipy'"
     ):
-        scm_run.integrate()
+        scm_run.cumtrapz()
 
 
 @OPS_MARK
@@ -222,7 +222,7 @@ def perform_pint_op(base, pint_obj, op):
     base_ts = base_ts.pint.quantify(level=unit_level)
 
     out = []
-    for _, series in base_ts.iteritems():
+    for _, series in base_ts.items():
         if op == "add":
             op_series = series + pint_obj
 

diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py
@@ -126,6 +126,7 @@ def test_init_df_formats(test_pd_run_df, in_format):
         res_df[test_pd_run_df.columns.tolist()],
         test_pd_run_df,
         check_like=True,
+        check_names=False,
     )
 
 
@@ -289,7 +290,9 @@ def test_init_with_ts(test_ts, test_pd_df):
     )
 
     tdf = get_test_pd_df_with_datetime_columns(test_pd_df)
-    pd.testing.assert_frame_equal(df.timeseries().reset_index(), tdf, check_like=True)
+    pd.testing.assert_frame_equal(
+        df.timeseries().reset_index(), tdf, check_like=True, check_names=False
+    )
 
     b = ScmRun(test_pd_df)
 
@@ -329,7 +332,9 @@ def test_init_with_years_as_str(test_pd_df, years):
 def test_init_with_year_columns(test_pd_df):
     df = ScmRun(test_pd_df)
     tdf = get_test_pd_df_with_datetime_columns(test_pd_df)
-    pd.testing.assert_frame_equal(df.timeseries().reset_index(), tdf, check_like=True)
+    pd.testing.assert_frame_equal(
+        df.timeseries().reset_index(), tdf, check_like=True, check_names=False
+    )
 
 
 def test_init_with_decimal_years():
@@ -372,7 +377,9 @@ def test_init_df_with_extra_col(test_pd_df):
 
     tdf = get_test_pd_df_with_datetime_columns(tdf)
     assert extra_col in df.meta
-    pd.testing.assert_frame_equal(df.timeseries().reset_index(), tdf, check_like=True)
+    pd.testing.assert_frame_equal(
+        df.timeseries().reset_index(), tdf, check_like=True, check_names=False
+    )
 
 
 def test_init_df_without_required_arguments(test_run_ts):
@@ -981,24 +988,24 @@ def with_nan_assertion(a, b):
 
 
 def test_filter_index(scm_run):
-    pd.testing.assert_index_equal(scm_run.meta.index, pd.Int64Index([0, 1, 2]))
+    pd.testing.assert_index_equal(scm_run.meta.index, pd.Index([0, 1, 2], dtype=int))
 
     run = scm_run.filter(variable="Primary Energy")
-    exp_index = pd.Int64Index([0, 2])
+    exp_index = pd.Index([0, 2], dtype=int)
     pd.testing.assert_index_equal(run["variable"].index, exp_index)
     pd.testing.assert_index_equal(run.meta.index, exp_index)
     pd.testing.assert_index_equal(run._df.columns, exp_index)
 
     run = scm_run.filter(variable="Primary Energy", keep=False)
-    exp_index = pd.Int64Index([1])
+    exp_index = pd.Index([1], dtype=int)
     pd.testing.assert_index_equal(run["variable"].index, exp_index)
     pd.testing.assert_index_equal(run.meta.index, exp_index)
     pd.testing.assert_index_equal(run._df.columns, exp_index)
 
 
 def test_append_index(scm_run):
     def _check(res, reversed):
-        exp_index = pd.Int64Index([0, 1, 2])
+        exp_index = pd.Index([0, 1, 2], dtype=int)
         pd.testing.assert_index_equal(res.meta.index, exp_index)
 
         exp_order = ["Primary Energy", "Primary Energy", "Primary Energy|Coal"]
@@ -1036,13 +1043,13 @@ def test_append_index_extra(scm_run):
         r = scm_run.filter(variable="Primary Energy")
         r["run_id"] = i + 1
 
-        pd.testing.assert_index_equal(r.meta.index, pd.Int64Index([0, 2]))
+        pd.testing.assert_index_equal(r.meta.index, pd.Index([0, 2], dtype=int))
         runs.append(r)
 
     res = run_append(runs)
 
     # note that the indexes are reset for subsequent appends and then increment
-    exp_index = pd.Int64Index([0, 1, 2, 3, 4, 5])
+    exp_index = pd.Index([0, 1, 2, 3, 4, 5], dtype=int)
     pd.testing.assert_index_equal(res.meta.index, exp_index)
     pd.testing.assert_series_equal(
         res["run_id"],
@@ -1313,7 +1320,10 @@ def test_quantile_over_lower(test_processing_scm_df):
     )
     obs = test_processing_scm_df.process_over("scenario", "quantile", q=0)
     pd.testing.assert_frame_equal(
-        exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
+        exp,
+        obs.reset_index(),
+        check_like=True,
+        check_names=False,
     )
 
 
@@ -1335,7 +1345,10 @@ def test_quantile_over_upper(test_processing_scm_df):
     )
     obs = test_processing_scm_df.process_over(["model", "scenario"], "quantile", q=1)
     pd.testing.assert_frame_equal(
-        exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
+        exp,
+        obs.reset_index(),
+        check_like=True,
+        check_names=False,
     )
 
 
@@ -1376,7 +1389,10 @@ def test_mean_over(test_processing_scm_df):
     )
     obs = test_processing_scm_df.process_over("scenario", "mean")
     pd.testing.assert_frame_equal(
-        exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
+        exp,
+        obs.reset_index(),
+        check_like=True,
+        check_names=False,
     )
 
 
@@ -1407,8 +1423,9 @@ def test_median_over(test_processing_scm_df):
         ],
     )
     obs = test_processing_scm_df.process_over("scenario", "median")
+
     pd.testing.assert_frame_equal(
-        exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
+        exp, obs.reset_index(), check_like=True, check_names=False
     )
 
 
@@ -1672,7 +1689,9 @@ def test_quantiles_over(test_processing_scm_df):
         cols=["model", "scenario"],
         quantiles=[0, 0.5, 1, "mean", "median"],
     )
-    pd.testing.assert_frame_equal(exp.set_index(obs.index.names), obs, check_like=True)
+    pd.testing.assert_frame_equal(
+        exp.set_index(obs.index.names), obs, check_like=True, check_names=False
+    )
 
 
 def test_quantiles_over_operation_in_kwargs(test_processing_scm_df):
@@ -1902,7 +1921,9 @@ def test_append_duplicate_times(test_append_scm_runs, duplicate_msg):
 
         error_msg = exc_info.value.args[0]
         assert error_msg.startswith(exp_msg)
-        pd.testing.assert_frame_equal(base.meta.append(other.meta), exc_info.value.meta)
+        pd.testing.assert_frame_equal(
+            pd.concat([base.meta, other.meta]), exc_info.value.meta
+        )
 
         return