Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove 'xls' as by-default-supported file format #708

Merged
merged 16 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/pytest-dependency.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.7'
python-version: '3.7.1'

- name: Install specific out-dated version of dependencies
# Update the package requirements when changing minimum dependency versions
# Please also add a section "Dependency changes" to the release notes
run: pip install pandas==1.1.1 numpy==1.19.0 matplotlib==3.5.0 iam-units==2020.4.21 xlrd==2.0 pint==0.13
run: pip install pandas==1.2.0 numpy==1.19.0 matplotlib==3.5.0 iam-units==2020.4.21 xlrd==2.0 pint==0.13

- name: Install other dependencies and package
run: pip install .[tests,optional_plotting,optional_io_formats,tutorials]
Expand Down
9 changes: 9 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# Next Release

## Dependency changes

Remove **xlrd** as a dependency; please install it explicitly for reading `.xls` files.
Bump minimum version of **pandas** to v1.2.0 to support automatic engine selection.

## Individual updates

- [#708](https://github.com/IAMconsortium/pyam/pull/708) Remove 'xls' as by-default-supported file format

# Release v1.6.0

## Highlights
Expand Down
67 changes: 38 additions & 29 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
self.set_meta(meta)

# if initializing from xlsx, try to load `meta` table from file
if meta_sheet and isinstance(data, Path) and data.suffix == ".xlsx":
excel_file = pd.ExcelFile(data, engine="openpyxl")
if meta_sheet and isinstance(data, Path) and data.suffix in [".xlsx", ".xls"]:
excel_file = pd.ExcelFile(data)
if meta_sheet in excel_file.sheet_names:
self.load_meta(excel_file, sheet_name=meta_sheet, ignore_conflict=True)

Expand Down Expand Up @@ -1344,7 +1344,7 @@ def check_aggregate(
List of variables to aggregate, defaults to sub-categories of `variable`.
method : func or str, optional
Method to use for aggregation,
e.g. :any:`numpy.mean`, :aby:`numpy.sum`, 'min', 'max'.
e.g. :any:`numpy.mean`, :any:`numpy.sum`, 'min', 'max'.
exclude_on_fail : bool, optional
Flag scenarios failing validation as `exclude: True`.
multiplier : number, optional
Expand Down Expand Up @@ -2261,18 +2261,23 @@ def _to_file_format(self, iamc_index):
df = df.rename(columns={c: str(c).title() for c in df.columns})
return df

def to_csv(self, path, iamc_index=False, **kwargs):
"""Write timeseries data of this object to a csv file
def to_csv(self, path=None, iamc_index=False, **kwargs):
"""Write :meth:`IamDataFrame.timeseries` to a comma-separated values (csv) file

Parameters
----------
path : str or path object
file path or :class:`pathlib.Path`
iamc_index : bool, default False
if True, use `['model', 'scenario', 'region', 'variable', 'unit']`;
else, use all 'data' columns
path : str, path or file-like, optional
File path as string or :class:`pathlib.Path`, or file-like object.
If *None*, the result is returned as a csv-formatted string.
See :meth:`pandas.DataFrame.to_csv` for details.
iamc_index : bool, optional
If True, use `['model', 'scenario', 'region', 'variable', 'unit']`;
else, use all :attr:`dimensions`.
See :meth:`IamDataFrame.timeseries` for details.
**kwargs
Passed to :meth:`pandas.DataFrame.to_csv`.
"""
self._to_file_format(iamc_index).to_csv(path, index=False, **kwargs)
return self._to_file_format(iamc_index).to_csv(path, index=False, **kwargs)

def to_excel(
self,
Expand All @@ -2286,30 +2291,33 @@ def to_excel(

Parameters
----------
excel_writer : str, path object or ExcelWriter object
any valid string path, :class:`pathlib.Path`
or :class:`pandas.ExcelWriter`
excel_writer : path-like, file-like, or ExcelWriter object
File path as string or :class:`pathlib.Path`,
or existing :class:`pandas.ExcelWriter`.
sheet_name : string
name of sheet which will contain :meth:`timeseries` data
iamc_index : bool, default False
if True, use `['model', 'scenario', 'region', 'variable', 'unit']`;
else, use all 'data' columns
include_meta : boolean or string
if True, write 'meta' to an Excel sheet name 'meta' (default);
if this is a string, use it as sheet name
Name of sheet which will contain :meth:`IamDataFrame.timeseries` data.
iamc_index : bool, optional
If True, use `['model', 'scenario', 'region', 'variable', 'unit']`;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as above

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See answer above.

else, use all :attr:`dimensions`.
See :meth:`IamDataFrame.timeseries` for details.
include_meta : boolean or string, optional
If True, write :any:`IamDataFrame.meta` to a sheet 'meta' (default);
if this is a string, use it as sheet name.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like)
"""
# open a new ExcelWriter instance (if necessary)
close = False
if not isinstance(excel_writer, pd.ExcelWriter):
close = True
excel_writer = pd.ExcelWriter(excel_writer, engine="xlsxwriter")
excel_writer = pd.ExcelWriter(excel_writer, **kwargs)

# write data table
write_sheet(excel_writer, sheet_name, self._to_file_format(iamc_index))

# write meta table unless `include_meta=False`
if include_meta:
meta_rename = dict([(i, i.capitalize()) for i in META_IDX])
meta_rename = dict([(i, i.capitalize()) for i in self.index.names])
write_sheet(
excel_writer,
"meta" if include_meta is True else include_meta,
Expand All @@ -2320,20 +2328,21 @@ def to_excel(
if close:
excel_writer.close()

def export_meta(self, excel_writer, sheet_name="meta"):
"""Write the 'meta' indicators of this object to an Excel sheet
def export_meta(self, excel_writer, sheet_name="meta", **kwargs):
"""Write the 'meta' indicators of this object to an Excel spreadsheet

Parameters
----------
excel_writer : str, path object or ExcelWriter object
any valid string path, :class:`pathlib.Path`
or :class:`pandas.ExcelWriter`
File path, :class:`pathlib.Path`, or existing :class:`pandas.ExcelWriter`.
sheet_name : str
name of sheet which will contain dataframe of 'meta' indicators
Name of sheet which will contain 'meta'.
**kwargs
Passed to :class:`pandas.ExcelWriter` (if *excel_writer* is path-like)
"""
close = False
if not isinstance(excel_writer, pd.ExcelWriter):
excel_writer = pd.ExcelWriter(excel_writer, engine="xlsxwriter")
excel_writer = pd.ExcelWriter(excel_writer, **kwargs)
close = True
write_sheet(excel_writer, sheet_name, self.meta, index=True)
if close:
Expand Down
5 changes: 2 additions & 3 deletions pyam/iiasa.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,7 @@ def lazy_read_iiasa(
----------
file : str or :class:`pathlib.Path`
The location to test for valid data and save the data if not up-to-date. Must be
either xls, xlsx or csv.
either xlsx or csv.
name : str
| Name of an IIASA Scenario Explorer database instance.
| See :attr:`pyam.iiasa.Connection.valid_connections`.
Expand All @@ -642,8 +642,7 @@ def lazy_read_iiasa(
assert file.suffix in [
".csv",
".xlsx",
".xls",
], "We will only read and write to csv, xls and xlsx format."
], "We will only read and write to csv and xlsx format."
if os.path.exists(file):
date_set = pd.to_datetime(os.path.getmtime(file), unit="s")
version_info = Connection(name, creds, base_url).properties()
Expand Down
12 changes: 4 additions & 8 deletions pyam/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,19 +136,15 @@ def write_sheet(writer, name, df, index=False):

def read_pandas(path, sheet_name=["data*", "Data*"], *args, **kwargs):
"""Read a file and return a pandas.DataFrame"""

if isinstance(path, Path) and path.suffix == ".csv":
return pd.read_csv(path, *args, **kwargs)
else:
if isinstance(path, pd.ExcelFile):
xl = path
else:
xl = pd.ExcelFile(
path, engine="xlrd" if path.suffix == ".xls" else "openpyxl"
)

sheet_names = pd.Series(xl.sheet_names)
else:
xl = path if isinstance(path, pd.ExcelFile) else pd.ExcelFile(path)

# reading multiple sheets
sheet_names = pd.Series(xl.sheet_names)
if len(sheet_names) > 1:
sheets = kwargs.pop("sheet_name", sheet_name)
# apply pattern-matching for sheet names (use * as wildcard)
Expand Down
6 changes: 3 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ classifiers =
[options]
packages = pyam
include_package_data = True
python_requires = >=3.7, <3.11
python_requires = >=3.7.1, <3.11

# NOTE TO DEVS
# If you change a minimum version below, please explicitly implement the change
Expand All @@ -32,14 +32,13 @@ install_requires =
pyjwt
httpx[http2]
openpyxl
pandas >= 1.1.1
pandas >= 1.2.0
scipy
pint >= 0.13
PyYAML
matplotlib >= 3.2.0
seaborn
six
xlrd >= 2.0
setuptools >= 41
setuptools_scm
# required explicitly for Python 3.7
Expand All @@ -61,6 +60,7 @@ optional_plotting =
optional_io_formats =
datapackage
pandas-datareader
xlrd
unfccc_di_api >= 3.0.1
tutorials =
pypandoc
Expand Down
Binary file added tests/data/test_df.xls
Binary file not shown.
5 changes: 1 addition & 4 deletions tests/test_iiasa.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,10 +392,7 @@ def test_lazy_read(tmpdir):
df_newfilt = lazy_read_iiasa(tmp_file, TEST_API, model="model_b")
assert df_newfilt.model == ["model_b"]
assert os.path.getmtime(tmp_file) > writetime
# file can also be xls or xlsx
# file can also xlsx
xlsx_file = tmpdir / "test_database.xlsx"
df_xlsx = lazy_read_iiasa(xlsx_file, TEST_API, model="model_b")
assert df_newfilt.equals(df_xlsx)
xls_file = tmpdir / "test_database.xls"
df_xls = lazy_read_iiasa(xls_file, TEST_API, model="model_b")
assert df_xls.equals(df_xlsx)
31 changes: 30 additions & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@

from .conftest import TEST_DATA_DIR, META_DF

try:
phackstock marked this conversation as resolved.
Show resolved Hide resolved
import xlrd # noqa: F401

has_xlrd = True
except ModuleNotFoundError: # pragma: no cover
has_xlrd = False


FILTER_ARGS = dict(scenario="scen_a")


Expand Down Expand Up @@ -40,7 +48,7 @@ def test_io_list():
IamDataFrame([1, 2])


def test_io_csv(test_df, tmpdir):
def test_io_csv_to_file(test_df, tmpdir):
# write to csv
file = tmpdir / "testing_io_write_read.csv"
test_df.to_csv(file)
Expand All @@ -50,6 +58,21 @@ def test_io_csv(test_df, tmpdir):
pd.testing.assert_frame_equal(test_df.data, import_df.data)


def test_io_csv_none(test_df_year):
# parse data as csv and return as string
exp = (
"Model,Scenario,Region,Variable,Unit,2005,2010\n"
"model_a,scen_a,World,Primary Energy,EJ/yr,1.0,6.0\n"
"model_a,scen_a,World,Primary Energy|Coal,EJ/yr,0.5,3.0\n"
"model_a,scen_b,World,Primary Energy,EJ/yr,2.0,7.0\n"
)
try:
assert test_df_year.to_csv(lineterminator="\n") == exp
# special treatment for Python 3.7 and pandas < 1.5
except TypeError:
assert test_df_year.to_csv(line_terminator="\n") == exp


@pytest.mark.parametrize(
"meta_args", [[{}, {}], [dict(include_meta="foo"), dict(meta_sheet_name="foo")]]
)
Expand Down Expand Up @@ -92,6 +115,12 @@ def test_io_xlsx_multiple_data_sheets(test_df, sheets, sheetname, tmpdir):
assert_iamframe_equal(test_df, import_df)


@pytest.mark.skipif(not has_xlrd, reason="Package 'xlrd' not installed.")
def test_read_xls(test_df_year):
import_df = IamDataFrame(TEST_DATA_DIR / "test_df.xls")
assert_iamframe_equal(test_df_year, import_df)


def test_init_df_with_na_unit(test_pd_df, tmpdir):
# missing values in the unit column are replaced by an empty string
test_pd_df.loc[1, "unit"] = np.nan
Expand Down