Skip to content

Commit

Permalink
fix: Bump pandas to the latest version
Browse files Browse the repository at this point in the history
Bump pandas and update tests to handle the latest version of pandas. This also allows the use of newer versions of Python during CI.

BREAKING CHANGE: Removes support for Python 3.7
  • Loading branch information
lewisjared committed Apr 3, 2023
1 parent 0e63108 commit 49254de
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 40 deletions.
19 changes: 7 additions & 12 deletions .github/workflows/ci-cd-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,7 @@ jobs:
strategy:
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: [3.7, 3.8, 3.9]
exclude:
# netCDF4>=1.6.0 doesn't include prebuilt wheels for windows/Python3.7
# Older netCDF4 or self-built versions will likely work
- os: windows-latest
python-version: 3.7
python-version: [3.8, 3.9, '3.10', 3.11]
steps:
- name: Checkout repository
uses: actions/checkout@v3
Expand Down Expand Up @@ -92,7 +87,7 @@ jobs:
run: |
pytest tests -r a --cov=scmdata --cov-report=term-missing --cov-fail-under=$env:MIN_COVERAGE
- name: Upload coverage to Codecov
if: startsWith(runner.os, 'Linux') && matrix.python-version == 3.7
if: startsWith(runner.os, 'Linux') && matrix.python-version == 3.11
uses: codecov/codecov-action@v3
with:
file: ./coverage.xml
Expand All @@ -101,7 +96,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7]
python-version: [3.9]
pandas-version: [1.0.5, 1.1]

steps:
Expand Down Expand Up @@ -134,7 +129,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ 3.7 ]
python-version: [ 3.9 ]
xarray-version: [ 0.16.2, 0.17.0, 0.18.2 ]

steps:
Expand Down Expand Up @@ -170,7 +165,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7]
python-version: [3.9]

steps:
- name: Checkout repository
Expand Down Expand Up @@ -201,7 +196,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, 3.11]

steps:
- name: Checkout repository
Expand All @@ -226,7 +221,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, '3.10', 3.11]

steps:
- name: Checkout repository
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-conda-install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
strategy:
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, '3.10', 3.11]

steps:
- name: Setup conda
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-pypi-install.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
strategy:
matrix:
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
python-version: [3.7, 3.8, 3.9]
python-version: [3.8, 3.9, '3.10', 3.11]

steps:
- name: Setup python
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Changelog
master
------

- (`#223 <https://github.com/openscm/scmdata/pull/223>`_) Loosen the pandas requirement to cover pandas>=1.4.3. Also support officially support Python 3.10 and 3.11
- (`#222 <https://github.com/openscm/scmdata/pull/222>`_) Decrease the minimum number of time points for interpolation to 2
- (`#221 <https://github.com/openscm/scmdata/pull/221>`_) Add option to :func:`scmdata.ScmRun.interpolate` to allow for interpolation which ignores leap-years. This also fixes a bug where :func:`scmdata.ScmRun.interpolate` converts integer values into unix time. This functionality isn't consistent with the behaviour of the TimePoints class where integers are converted into years.
- (`#218 <https://github.com/openscm/scmdata/pull/218>`_) Replaced internal calls to :func:`scmdata.groupby.RunGroupby.map` with :func:`scmdata.groupby.RunGroupby.apply`
Expand Down
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ classifiers =
License :: OSI Approved :: BSD License
Intended Audience :: Developers
Operating System :: OS Independent
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11

[options]
packages = find:
Expand All @@ -34,7 +35,7 @@ install_requires =
numpy
openscm-units
packaging
pandas>=1.0.4, <=1.4.2
pandas>=1.0.4, <2
pint<0.20
pint-pandas
python-dateutil
Expand Down
4 changes: 2 additions & 2 deletions src/scmdata/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -1715,7 +1715,7 @@ def process_over(
ts.index = pd.MultiIndex.from_frame(ts_idx.fillna(na_override))

group_cols = list(set(ts.index.names) - set(cols))
grouper = ts.groupby(group_cols)
grouper = ts.groupby(group_cols, group_keys=False)

# This is a subset of the available functions
# https://pandas.pydata.org/pandas-docs/stable/reference/groupby.html
Expand Down Expand Up @@ -2537,7 +2537,7 @@ def run_append(
# Convert back from str
for c in nan_cols[nan_cols].index:
new_meta[c].replace("nan", np.nan, inplace=True)
for c, dtype in orig_dtypes.iteritems():
for c, dtype in orig_dtypes.items():
new_meta[c] = new_meta[c].astype(dtype)

ret._meta = pd.MultiIndex.from_frame(new_meta.astype("category"))
Expand Down
16 changes: 11 additions & 5 deletions tests/integration/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,9 @@ def test_exceedance_probabilities_over_time(
exp = pd.DataFrame(
np.array(exp_vals)[np.newaxis, :],
index=exp_idx,
columns=test_processing_scm_df.time_points.to_index(),
# This forces the coercion to a DateTimeIndex which now happens automatically for
# pandas>=1.4.3
columns=pd.Index(test_processing_scm_df.time_points.to_index()),
)
exp.index = exp.index.set_levels(
[_get_calculate_exeedance_probs_expected_name(output_name, threshold)],
Expand All @@ -424,7 +426,7 @@ def test_exceedance_probabilities_over_time(
level="unit",
)

pdt.assert_frame_equal(res, exp, check_like=True, check_column_type=False)
pdt.assert_frame_equal(res, exp, check_like=True, check_names=False)


def test_exceedance_probabilities_over_time_multiple_res(
Expand All @@ -444,7 +446,11 @@ def test_exceedance_probabilities_over_time_multiple_res(
start.meta.drop(["ensemble_member"], axis="columns").drop_duplicates()
)

exp = pd.DataFrame(exp_vals, index=exp_idx, columns=start.time_points.to_index())
exp = pd.DataFrame(
exp_vals,
index=exp_idx,
columns=pd.Index(start.time_points.to_index()),
)
exp.index = exp.index.set_levels(
[_get_calculate_exeedance_probs_expected_name(None, threshold)],
level="variable",
Expand All @@ -453,7 +459,7 @@ def test_exceedance_probabilities_over_time_multiple_res(
level="unit",
)

pdt.assert_frame_equal(res, exp, check_like=True, check_column_type=False)
pdt.assert_frame_equal(res, exp, check_like=True, check_names=False)


def test_exceedance_probabilities_over_time_multiple_grouping(
Expand All @@ -478,7 +484,7 @@ def test_exceedance_probabilities_over_time_multiple_grouping(
exp = pd.DataFrame(
exp_vals[np.newaxis, :],
index=exp_idx,
columns=start.time_points.to_index(),
columns=pd.Index(start.time_points.to_index()),
)
exp.index = exp.index.set_levels(
[_get_calculate_exeedance_probs_expected_name(None, threshold)],
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def test_no_scipy(scm_run):
with pytest.raises(
ImportError, match="scipy is not installed. Run 'pip install scipy'"
):
scm_run.integrate()
scm_run.cumtrapz()


@OPS_MARK
Expand Down Expand Up @@ -222,7 +222,7 @@ def perform_pint_op(base, pint_obj, op):
base_ts = base_ts.pint.quantify(level=unit_level)

out = []
for _, series in base_ts.iteritems():
for _, series in base_ts.items():
if op == "add":
op_series = series + pint_obj

Expand Down
51 changes: 36 additions & 15 deletions tests/unit/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def test_init_df_formats(test_pd_run_df, in_format):
res_df[test_pd_run_df.columns.tolist()],
test_pd_run_df,
check_like=True,
check_names=False,
)


Expand Down Expand Up @@ -289,7 +290,9 @@ def test_init_with_ts(test_ts, test_pd_df):
)

tdf = get_test_pd_df_with_datetime_columns(test_pd_df)
pd.testing.assert_frame_equal(df.timeseries().reset_index(), tdf, check_like=True)
pd.testing.assert_frame_equal(
df.timeseries().reset_index(), tdf, check_like=True, check_names=False
)

b = ScmRun(test_pd_df)

Expand Down Expand Up @@ -329,7 +332,9 @@ def test_init_with_years_as_str(test_pd_df, years):
def test_init_with_year_columns(test_pd_df):
df = ScmRun(test_pd_df)
tdf = get_test_pd_df_with_datetime_columns(test_pd_df)
pd.testing.assert_frame_equal(df.timeseries().reset_index(), tdf, check_like=True)
pd.testing.assert_frame_equal(
df.timeseries().reset_index(), tdf, check_like=True, check_names=False
)


def test_init_with_decimal_years():
Expand Down Expand Up @@ -372,7 +377,9 @@ def test_init_df_with_extra_col(test_pd_df):

tdf = get_test_pd_df_with_datetime_columns(tdf)
assert extra_col in df.meta
pd.testing.assert_frame_equal(df.timeseries().reset_index(), tdf, check_like=True)
pd.testing.assert_frame_equal(
df.timeseries().reset_index(), tdf, check_like=True, check_names=False
)


def test_init_df_without_required_arguments(test_run_ts):
Expand Down Expand Up @@ -981,24 +988,24 @@ def with_nan_assertion(a, b):


def test_filter_index(scm_run):
pd.testing.assert_index_equal(scm_run.meta.index, pd.Int64Index([0, 1, 2]))
pd.testing.assert_index_equal(scm_run.meta.index, pd.Index([0, 1, 2], dtype=int))

run = scm_run.filter(variable="Primary Energy")
exp_index = pd.Int64Index([0, 2])
exp_index = pd.Index([0, 2], dtype=int)
pd.testing.assert_index_equal(run["variable"].index, exp_index)
pd.testing.assert_index_equal(run.meta.index, exp_index)
pd.testing.assert_index_equal(run._df.columns, exp_index)

run = scm_run.filter(variable="Primary Energy", keep=False)
exp_index = pd.Int64Index([1])
exp_index = pd.Index([1], dtype=int)
pd.testing.assert_index_equal(run["variable"].index, exp_index)
pd.testing.assert_index_equal(run.meta.index, exp_index)
pd.testing.assert_index_equal(run._df.columns, exp_index)


def test_append_index(scm_run):
def _check(res, reversed):
exp_index = pd.Int64Index([0, 1, 2])
exp_index = pd.Index([0, 1, 2], dtype=int)
pd.testing.assert_index_equal(res.meta.index, exp_index)

exp_order = ["Primary Energy", "Primary Energy", "Primary Energy|Coal"]
Expand Down Expand Up @@ -1036,13 +1043,13 @@ def test_append_index_extra(scm_run):
r = scm_run.filter(variable="Primary Energy")
r["run_id"] = i + 1

pd.testing.assert_index_equal(r.meta.index, pd.Int64Index([0, 2]))
pd.testing.assert_index_equal(r.meta.index, pd.Index([0, 2], dtype=int))
runs.append(r)

res = run_append(runs)

# note that the indexes are reset for subsequent appends and then increment
exp_index = pd.Int64Index([0, 1, 2, 3, 4, 5])
exp_index = pd.Index([0, 1, 2, 3, 4, 5], dtype=int)
pd.testing.assert_index_equal(res.meta.index, exp_index)
pd.testing.assert_series_equal(
res["run_id"],
Expand Down Expand Up @@ -1313,7 +1320,10 @@ def test_quantile_over_lower(test_processing_scm_df):
)
obs = test_processing_scm_df.process_over("scenario", "quantile", q=0)
pd.testing.assert_frame_equal(
exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
exp,
obs.reset_index(),
check_like=True,
check_names=False,
)


Expand All @@ -1335,7 +1345,10 @@ def test_quantile_over_upper(test_processing_scm_df):
)
obs = test_processing_scm_df.process_over(["model", "scenario"], "quantile", q=1)
pd.testing.assert_frame_equal(
exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
exp,
obs.reset_index(),
check_like=True,
check_names=False,
)


Expand Down Expand Up @@ -1376,7 +1389,10 @@ def test_mean_over(test_processing_scm_df):
)
obs = test_processing_scm_df.process_over("scenario", "mean")
pd.testing.assert_frame_equal(
exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
exp,
obs.reset_index(),
check_like=True,
check_names=False,
)


Expand Down Expand Up @@ -1407,8 +1423,9 @@ def test_median_over(test_processing_scm_df):
],
)
obs = test_processing_scm_df.process_over("scenario", "median")

pd.testing.assert_frame_equal(
exp.set_index(obs.index.names), obs, check_like=True, check_column_type=False
exp, obs.reset_index(), check_like=True, check_names=False
)


Expand Down Expand Up @@ -1672,7 +1689,9 @@ def test_quantiles_over(test_processing_scm_df):
cols=["model", "scenario"],
quantiles=[0, 0.5, 1, "mean", "median"],
)
pd.testing.assert_frame_equal(exp.set_index(obs.index.names), obs, check_like=True)
pd.testing.assert_frame_equal(
exp.set_index(obs.index.names), obs, check_like=True, check_names=False
)


def test_quantiles_over_operation_in_kwargs(test_processing_scm_df):
Expand Down Expand Up @@ -1902,7 +1921,9 @@ def test_append_duplicate_times(test_append_scm_runs, duplicate_msg):

error_msg = exc_info.value.args[0]
assert error_msg.startswith(exp_msg)
pd.testing.assert_frame_equal(base.meta.append(other.meta), exc_info.value.meta)
pd.testing.assert_frame_equal(
pd.concat([base.meta, other.meta]), exc_info.value.meta
)

return

Expand Down

0 comments on commit 49254de

Please sign in to comment.