From 6fb272c0fde4bfaca9b6322b18ac2cf962e26ee3 Mon Sep 17 00:00:00 2001 From: crusaderky Date: Tue, 8 Oct 2019 22:23:46 +0100 Subject: [PATCH] Rolling minimum dependency versions policy (#3358) * - Downgrade numpy to 1.14, pandas to 0.20, scipy to 0.19 (24 months old) - Downgrade dask to 1.1 (6 months old) - Don't pin patch versions * Apply rolling policy (see #3222) * Automated tool to verify the minimum versions * Drop Python 3.5 * lint * Trivial cosmetic * Cosmetic * (temp) debug CI failure * Parallelize versions check script * Remove hacks for legacy dask * Documentation * Assorted cleanup * Assorted cleanup * Fix regression * Cleanup * type annotations upgraded to Python 3.6 * count_not_none backport * pd.Index.equals on legacy pandas returned False when comparing vs. a ndarray * Documentation * pathlib cleanup * Slide deprecations from 0.14 to 0.15 * More cleanups * More cleanups * Fix min_deps_check * Fix min_deps_check * Set policy of 12 months for pandas and scipy * Cleanup * Cleanup * Sphinx fix * Overhaul readthedocs environment * Fix test crash * Fix test crash * Prune readthedocs environment * Cleanup * Hack around versioneer bug on readthedocs CI * Code review * Prevent random timeouts in the readthedocs CI * What's New polish * Merge from Master * Trivial cosmetic * Reimplement pandas.core.common.count_not_none --- azure-pipelines.yml | 22 +- ci/min_deps_check.py | 187 ++++++++++++++ ci/requirements/doc.yml | 21 ++ ci/requirements/py35-bare-minimum.yml | 15 -- ci/requirements/py36-bare-minimum.yml | 11 + ci/requirements/py36-min-all-deps.yml | 61 ++--- ci/requirements/py36-min-nep18.yml | 12 +- ci/requirements/py36.yml | 13 +- ci/requirements/py37-windows.yml | 13 +- ci/requirements/py37.yml | 15 +- doc/conf.py | 2 +- doc/contributing.rst | 2 +- doc/environment.yml | 28 --- doc/groupby.rst | 4 +- doc/installing.rst | 74 ++++-- doc/io.rst | 1 + doc/pandas.rst | 3 +- doc/plotting.rst | 1 + doc/whats-new.rst | 40 ++- readthedocs.yml | 4 +- setup.py | 5 +- xarray/backends/api.py | 6 +- xarray/backends/file_manager.py | 2 +- xarray/backends/locks.py | 29 +-- xarray/backends/netCDF4_.py | 12 - xarray/backends/rasterio_.py | 13 +- xarray/backends/scipy_.py | 14 -- xarray/backends/zarr.py | 20 -- xarray/coding/cftime_offsets.py | 24 +- xarray/coding/cftimeindex.py | 5 - xarray/coding/times.py | 7 +- xarray/coding/variables.py | 18 +- xarray/core/alignment.py | 2 +- xarray/core/combine.py | 4 +- xarray/core/common.py | 33 +-- xarray/core/computation.py | 40 +-- xarray/core/dask_array_compat.py | 173 ------------- xarray/core/dask_array_ops.py | 34 +-- xarray/core/dataarray.py | 27 +- xarray/core/dataset.py | 78 ++---- xarray/core/duck_array_ops.py | 14 +- xarray/core/formatting.py | 7 +- xarray/core/indexing.py | 29 +-- xarray/core/merge.py | 4 +- xarray/core/missing.py | 8 +- xarray/core/npcompat.py | 284 ---------------------- xarray/core/pdcompat.py | 81 +----- xarray/core/rolling.py | 21 -- xarray/core/rolling_exp.py | 7 +- xarray/core/utils.py | 2 +- xarray/plot/utils.py | 18 +- xarray/testing.py | 6 +- xarray/tests/__init__.py | 31 +-- xarray/tests/test_accessor_str.py | 2 +- xarray/tests/test_backends.py | 33 +-- xarray/tests/test_cftimeindex.py | 122 +++++----- xarray/tests/test_cftimeindex_resample.py | 1 - xarray/tests/test_coding_times.py | 14 +- xarray/tests/test_combine.py | 2 +- xarray/tests/test_computation.py | 9 +- xarray/tests/test_dask.py | 33 +-- xarray/tests/test_dataarray.py | 11 +- xarray/tests/test_dataset.py | 39 +-- xarray/tests/test_distributed.py | 4 +- xarray/tests/test_duck_array_ops.py | 12 +- xarray/tests/test_indexing.py | 3 +- xarray/tests/test_plot.py | 11 +- xarray/tests/test_ufuncs.py | 15 +- xarray/tests/test_utils.py | 8 +- xarray/tests/test_variable.py | 8 - 70 files changed, 633 insertions(+), 1281 deletions(-) create mode 100755 ci/min_deps_check.py create mode 100644 ci/requirements/doc.yml delete mode 100644 ci/requirements/py35-bare-minimum.yml create mode 100644 ci/requirements/py36-bare-minimum.yml delete mode 100644 doc/environment.yml delete mode 100644 xarray/core/dask_array_compat.py diff --git a/azure-pipelines.yml b/azure-pipelines.yml index d023aa317c7..c7f9de73cf4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -8,8 +8,8 @@ jobs: - job: Linux strategy: matrix: - py35-bare-minimum: - conda_env: py35-bare-minimum + py36-bare-minimum: + conda_env: py36-bare-minimum py36-min-all-deps: conda_env: py36-min-all-deps py36-min-nep18: @@ -82,13 +82,29 @@ jobs: mypy . displayName: mypy type checks +- job: MinimumVersionsPolicy + pool: + vmImage: 'ubuntu-16.04' + steps: + - template: ci/azure/add-conda-to-path.yml + - bash: | + conda install -y pyyaml + python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml + python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml + displayName: minimum versions policy + - job: Docs pool: vmImage: 'ubuntu-16.04' steps: - template: ci/azure/install.yml parameters: - env_file: doc/environment.yml + env_file: ci/requirements/doc.yml + - bash: | + source activate xarray-tests + # Replicate the exact environment created by the readthedocs CI + conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme + displayName: Replicate readthedocs CI environment - bash: | source activate xarray-tests cd doc diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py new file mode 100755 index 00000000000..3bdd48ca76d --- /dev/null +++ b/ci/min_deps_check.py @@ -0,0 +1,187 @@ +"""Fetch from conda database all available versions of the xarray dependencies and their +publication date. Compare it against requirements/py36-min-all-deps.yml to verify the +policy on obsolete dependencies is being followed. Print a pretty report :) +""" +import subprocess +import sys +from concurrent.futures import ThreadPoolExecutor +from datetime import datetime, timedelta +from typing import Dict, Iterator, Tuple + +import yaml + +IGNORE_DEPS = { + "black", + "coveralls", + "flake8", + "hypothesis", + "mypy", + "pip", + "pytest", + "pytest-cov", + "pytest-env", +} + +POLICY_MONTHS = {"python": 42, "numpy": 24, "pandas": 12, "scipy": 12} +POLICY_MONTHS_DEFAULT = 6 + +has_errors = False + + +def error(msg: str) -> None: + global has_errors + has_errors = True + print("ERROR:", msg) + + +def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]: + """Load requirements/py36-min-all-deps.yml + + Yield (package name, major version, minor version) + """ + global has_errors + + with open(fname) as fh: + contents = yaml.safe_load(fh) + for row in contents["dependencies"]: + if isinstance(row, dict) and list(row) == ["pip"]: + continue + pkg, eq, version = row.partition("=") + if pkg.rstrip("<>") in IGNORE_DEPS: + continue + if pkg.endswith("<") or pkg.endswith(">") or eq != "=": + error("package should be pinned with exact version: " + row) + continue + try: + major, minor = version.split(".") + except ValueError: + error("expected major.minor (without patch): " + row) + continue + try: + yield pkg, int(major), int(minor) + except ValueError: + error("failed to parse version: " + row) + + +def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]: + """Query the conda repository for a specific package + + Return map of {(major version, minor version): publication date} + """ + stdout = subprocess.check_output( + ["conda", "search", pkg, "--info", "-c", "defaults", "-c", "conda-forge"] + ) + out = {} # type: Dict[Tuple[int, int], datetime] + major = None + minor = None + + for row in stdout.decode("utf-8").splitlines(): + label, _, value = row.partition(":") + label = label.strip() + if label == "file name": + value = value.strip()[len(pkg) :] + major, minor = value.split("-")[1].split(".")[:2] + major = int(major) + minor = int(minor) + if label == "timestamp": + assert major is not None + assert minor is not None + ts = datetime.strptime(value.split()[0].strip(), "%Y-%m-%d") + + if (major, minor) in out: + out[major, minor] = min(out[major, minor], ts) + else: + out[major, minor] = ts + + # Hardcoded fix to work around incorrect dates in conda + if pkg == "python": + out.update( + { + (2, 7): datetime(2010, 6, 3), + (3, 5): datetime(2015, 9, 13), + (3, 6): datetime(2016, 12, 23), + (3, 7): datetime(2018, 6, 27), + (3, 8): datetime(2019, 10, 14), + } + ) + + return out + + +def process_pkg( + pkg: str, req_major: int, req_minor: int +) -> Tuple[str, int, int, str, int, int, str, str]: + """Compare package version from requirements file to available versions in conda. + Return row to build pandas dataframe: + + - package name + - major version in requirements file + - minor version in requirements file + - publication date of version in requirements file (YYYY-MM-DD) + - major version suggested by policy + - minor version suggested by policy + - publication date of version suggested by policy (YYYY-MM-DD) + - status ("<", "=", "> (!)") + """ + print("Analyzing %s..." % pkg) + versions = query_conda(pkg) + + try: + req_published = versions[req_major, req_minor] + except KeyError: + error("not found in conda: " + pkg) + return pkg, req_major, req_minor, "-", 0, 0, "-", "(!)" + + policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT) + policy_published = datetime.now() - timedelta(days=policy_months * 30) + + policy_major = req_major + policy_minor = req_minor + policy_published_actual = req_published + for (major, minor), published in reversed(sorted(versions.items())): + if published < policy_published: + break + policy_major = major + policy_minor = minor + policy_published_actual = published + + if (req_major, req_minor) < (policy_major, policy_minor): + status = "<" + elif (req_major, req_minor) > (policy_major, policy_minor): + status = "> (!)" + error("Package is too new: " + pkg) + else: + status = "=" + + return ( + pkg, + req_major, + req_minor, + req_published.strftime("%Y-%m-%d"), + policy_major, + policy_minor, + policy_published_actual.strftime("%Y-%m-%d"), + status, + ) + + +def main() -> None: + fname = sys.argv[1] + with ThreadPoolExecutor(8) as ex: + futures = [ + ex.submit(process_pkg, pkg, major, minor) + for pkg, major, minor in parse_requirements(fname) + ] + rows = [f.result() for f in futures] + + print("Package Required Policy Status") + print("------------- ----------------- ----------------- ------") + fmt = "{:13} {:>1d}.{:<2d} ({:10}) {:>1d}.{:<2d} ({:10}) {}" + for row in rows: + print(fmt.format(*row)) + + assert not has_errors + + +if __name__ == "__main__": + main() diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml new file mode 100644 index 00000000000..e521ee4a4b8 --- /dev/null +++ b/ci/requirements/doc.yml @@ -0,0 +1,21 @@ +name: xarray-docs +channels: + # Don't change to pkgs/main, as it causes random timeouts in readthedocs + - conda-forge +dependencies: + - python=3.7 + - bottleneck + - cartopy + - h5netcdf + - ipython + - iris + - netcdf4 + - numpy + - numpydoc + - pandas<0.25 # Hack around https://github.com/pydata/xarray/issues/3369 + - rasterio + - seaborn + - sphinx + - sphinx-gallery + - sphinx_rtd_theme + - zarr diff --git a/ci/requirements/py35-bare-minimum.yml b/ci/requirements/py35-bare-minimum.yml deleted file mode 100644 index 7651a1bdcf1..00000000000 --- a/ci/requirements/py35-bare-minimum.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: xarray-tests -channels: - - conda-forge -dependencies: - - python=3.5.3 - - pytest - - flake8 - - mock - - pip - - numpy=1.12 - - pandas=0.19.2 - - pip: - - pytest-env - - pytest-cov - - coveralls diff --git a/ci/requirements/py36-bare-minimum.yml b/ci/requirements/py36-bare-minimum.yml new file mode 100644 index 00000000000..05186bc8748 --- /dev/null +++ b/ci/requirements/py36-bare-minimum.yml @@ -0,0 +1,11 @@ +name: xarray-tests +channels: + - conda-forge +dependencies: + - python=3.6 + - coveralls + - pytest + - pytest-cov + - pytest-env + - numpy=1.14 + - pandas=0.24 diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml index affbf8637fd..4e4f8550e16 100644 --- a/ci/requirements/py36-min-all-deps.yml +++ b/ci/requirements/py36-min-all-deps.yml @@ -2,42 +2,47 @@ name: xarray-tests channels: - conda-forge dependencies: - - python=3.6.7 + # MINIMUM VERSIONS POLICY: see doc/installing.rst + # Run ci/min_deps_check.py to verify that this file respects the policy. + # When upgrading python, numpy, or pandas, must also change + # doc/installing.rst and setup.py. + - python=3.6 - black - - boto3=1.9.235 - - bottleneck=1.2.1 - - cdms2=3.1.3 - - cfgrib=0.9.7.2 - - cftime=1.0.3.4 + - boto3=1.9 + - bottleneck=1.2 + - cartopy=0.17 + - cdms2=3.1 + - cfgrib=0.9 + - cftime=1.0 - coveralls - - dask=2.4.0 - - distributed=2.4.0 + - dask=1.2 + - distributed=1.27 - flake8 - - h5netcdf=0.7.4 - - h5py=2.10.0 - - hdf5=1.10.5 + - h5netcdf=0.7 + - h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest + - hdf5=1.10 - hypothesis - - iris=2.2.0 - - lxml=4.4.1 # optional dep of pydap - - matplotlib=3.1.1 - - mypy==0.730 # Must match .pre-commit-config.yaml - - nc-time-axis=1.2.0 - - netcdf4=1.5.1.2 - - numba=0.45.1 - - numpy=1.17.2 - - pandas=0.25.1 + - iris=2.2 + - lxml=4.4 # Optional dep of pydap + - matplotlib=3.1 + - mypy=0.730 # Must match .pre-commit-config.yaml + - nc-time-axis=1.2 + - netcdf4=1.4 + - numba=0.44 + - numpy=1.14 + - pandas=0.24 - pip - - pseudonetcdf=3.0.2 - - pydap=3.2.2 - - pynio=1.5.5 + - pseudonetcdf=3.0 + - pydap=3.2 + - pynio=1.5 - pytest - pytest-cov - pytest-env - - rasterio=1.0.28 - - scipy=1.3.1 - - seaborn=0.9.0 + - rasterio=1.0 + - scipy=1.0 # Policy allows for 1.2, but scipy>=1.1 breaks numpy=1.14 + - seaborn=0.9 # - sparse # See py36-min-nep18.yml - - toolz=0.10.0 - - zarr=2.3.2 + - toolz=0.10 + - zarr=2.3 - pip: - numbagg==0.1 diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index 8680e412a99..5b291cf554c 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -4,14 +4,14 @@ channels: dependencies: # Optional dependencies that require NEP18, such as sparse, # require drastically newer packages than everything else - - python=3.6.7 + - python=3.6 - coveralls - - dask=2.4.0 - - distributed=2.4.0 + - dask=2.4 + - distributed=2.4 - numpy=1.17 - - pandas=0.25 + - pandas=0.24 - pytest - pytest-cov - pytest-env - - scipy=1.3 - - sparse=0.8.0 + - scipy=1.2 + - sparse=0.8 diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml index bdb649f6f1b..cc91e8a12da 100644 --- a/ci/requirements/py36.yml +++ b/ci/requirements/py36.yml @@ -6,8 +6,9 @@ dependencies: - black - boto3 - bottleneck + - cartopy - cdms2 - - cfgrib>=0.9.2 + - cfgrib - cftime - coveralls - dask @@ -17,17 +18,17 @@ dependencies: - h5py - hdf5 - hypothesis - - iris>=1.10 + - iris - lxml # optional dep of pydap - matplotlib - - mypy==0.730 # Must match .pre-commit-config.yaml + - mypy=0.730 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba - - numpy>=1.12 - - pandas>=0.19 + - numpy + - pandas - pip - - pseudonetcdf>=3.0.1 + - pseudonetcdf - pydap - pynio - pytest diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml index 79b54030bc6..bf485b59a49 100644 --- a/ci/requirements/py37-windows.yml +++ b/ci/requirements/py37-windows.yml @@ -6,6 +6,7 @@ dependencies: - black - boto3 - bottleneck + - cartopy # - cdms2 # Not available on Windows # - cfgrib>=0.9.2 # Causes Python interpreter crash on Windows - cftime @@ -17,17 +18,17 @@ dependencies: - h5py - hdf5 - hypothesis - - iris>=1.10 - - lxml # optional dep of pydap + - iris + - lxml # Optional dep of pydap - matplotlib - - mypy==0.730 # Must match .pre-commit-config.yaml + - mypy=0.730 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba - - numpy>=1.12 - - pandas>=0.19 + - numpy + - pandas - pip - - pseudonetcdf>=3.0.1 + - pseudonetcdf - pydap # - pynio # Not available on Windows - pytest diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml index a4fe2d82a6f..5c9a1cec5b5 100644 --- a/ci/requirements/py37.yml +++ b/ci/requirements/py37.yml @@ -6,8 +6,9 @@ dependencies: - black - boto3 - bottleneck + - cartopy - cdms2 - - cfgrib>=0.9.2 + - cfgrib - cftime - coveralls - dask @@ -17,17 +18,17 @@ dependencies: - h5py - hdf5 - hypothesis - - iris>=1.10 - - lxml # optional dep of pydap + - iris + - lxml # Optional dep of pydap - matplotlib - - mypy==0.730 # Must match .pre-commit-config.yaml + - mypy=0.730 # Must match .pre-commit-config.yaml - nc-time-axis - netcdf4 - numba - - numpy>=1.12 - - pandas>=0.19 + - numpy + - pandas - pip - - pseudonetcdf>=3.0.1 + - pseudonetcdf - pydap - pynio - pytest diff --git a/doc/conf.py b/doc/conf.py index a80e470af26..7c1557a1e66 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -201,7 +201,7 @@ # Sometimes the savefig directory doesn't exist and needs to be created # https://github.com/ipython/ipython/issues/8733 -# becomes obsolete when we can pin ipython>=5.2; see doc/environment.yml +# becomes obsolete when we can pin ipython>=5.2; see ci/requirements/doc.yml ipython_savefig_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), "_build", "html", "_static" ) diff --git a/doc/contributing.rst b/doc/contributing.rst index 53b8cb51f60..66e8377600e 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -696,7 +696,7 @@ environment by:: or, to use a specific Python interpreter,:: - asv run -e -E existing:python3.5 + asv run -e -E existing:python3.6 This will display stderr from the benchmarks, and use your local ``python`` that comes from your ``$PATH``. diff --git a/doc/environment.yml b/doc/environment.yml deleted file mode 100644 index e1b5c5475f7..00000000000 --- a/doc/environment.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: xarray-docs -channels: - - conda-forge -dependencies: - - python=3.7 - - numpy=1.16.0 - - pandas=0.23.3 - - scipy=1.2.0 - - matplotlib=3.0.2 - - seaborn=0.9.0 - - dask=1.1.0 - - ipython=7.2.0 - - netCDF4=1.4.2 - - h5netcdf=0.7.4 - - cartopy=0.17.0 - - rasterio=1.0.24 - - zarr=2.2.0 - - iris=2.2.0 - - flake8=3.6.0 - - cftime=1.0.3.4 - - bottleneck=1.2.1 - - sphinx=1.8.2 - - numpydoc=0.8.0 - - sphinx-gallery=0.2.0 - - pillow=5.4.1 - - sphinx_rtd_theme=0.4.2 - - mock=2.0.0 - - pip diff --git a/doc/groupby.rst b/doc/groupby.rst index 03c0881d836..cc2682f2ee3 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -77,7 +77,7 @@ a customized coordinate, but xarray facilitates this via the x_bins = [0,25,50] ds.groupby_bins('x', x_bins).groups -The binning is implemented via `pandas.cut`__, whose documentation details how +The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are labeled with strings using set notation to precisely identify the bin limits. To override this behavior, you can specify the bin labels explicitly. Here we @@ -88,8 +88,6 @@ choose `float` labels which identify the bin centers: x_bin_labels = [12.5,37.5] ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups -__ http://pandas.pydata.org/pandas-docs/version/0.17.1/generated/pandas.cut.html - Apply ~~~~~ diff --git a/doc/installing.rst b/doc/installing.rst index a81f6c23328..b1bf072dbe1 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -6,9 +6,9 @@ Installation Required dependencies --------------------- -- Python (3.5.3 or later) -- `numpy `__ (1.12 or later) -- `pandas `__ (0.19.2 or later) +- Python (3.6 or later) +- `numpy `__ (1.14 or later) +- `pandas `__ (0.24 or later) Optional dependencies --------------------- @@ -32,7 +32,7 @@ For netCDF and IO for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files (ffi1001) and many other. - `rasterio `__: for reading GeoTiffs and - other gridded raster datasets. (version 1.0 or later) + other gridded raster datasets. - `iris `__: for conversion to and from iris' Cube objects - `cfgrib `__: for reading GRIB files via the @@ -41,30 +41,67 @@ For netCDF and IO For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ -- `scipy `__: necessary to enable the interpolation features for xarray objects +- `scipy `__: necessary to enable the interpolation features for + xarray objects - `bottleneck `__: speeds up NaN-skipping and rolling window aggregations by a large factor - (1.1 or later) - `numbagg `_: for exponential rolling window operations For parallel computing ~~~~~~~~~~~~~~~~~~~~~~ -- `dask.array `__ (0.16 or later): required for - :ref:`dask`. +- `dask.array `__: required for :ref:`dask`. For plotting ~~~~~~~~~~~~ - `matplotlib `__: required for :ref:`plotting` - (1.5 or later) -- `cartopy `__: recommended for - :ref:`plot-maps` +- `cartopy `__: recommended for :ref:`plot-maps` - `seaborn `__: for better color palettes - `nc-time-axis `__: for plotting - cftime.datetime objects (1.2.0 or later) + cftime.datetime objects + +Alternative data containers +~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- `sparse `_: for sparse arrays +- Any numpy-like objects that support + `NEP-18 `_. + Note that while such libraries theoretically should work, they are untested. + Integration tests are in the process of being written for individual libraries. + + +.. _mindeps_policy: + +Minimum dependency versions +--------------------------- +xarray adopts a rolling policy regarding the minimum supported version of its +dependencies: + +- **Python:** 42 months + (`NEP-29 `_) +- **numpy:** 24 months + (`NEP-29 `_) +- **pandas:** 12 months +- **scipy:** 12 months +- **sparse** and other libraries that rely on + `NEP-18 `_ + for integration: very latest available versions only, until the technology will have + matured. This extends to dask when used in conjunction with any of these libraries. + numpy >=1.17. +- **all other libraries:** 6 months + +The above should be interpreted as *the minor version (X.Y) initially published no more +than N months ago*. Patch versions (x.y.Z) are not pinned, and only the latest available +at the moment of publishing the xarray release is guaranteed to work. + +You can see the actual minimum tested versions: + +- `For NEP-18 libraries + `_ +- `For everything else + `_ Instructions @@ -93,13 +130,9 @@ pandas) installed first. Then, install xarray with pip:: Testing ------- -To run the test suite after installing xarray, first install (via pypi or conda) - -- `py.test `__: Simple unit testing library -- `mock `__: additional testing library required for python version 2 - -and run -``py.test --pyargs xarray``. +To run the test suite after installing xarray, install (via pypi or conda) `py.test +`__ and run ``pytest`` in the root directory of the xarray +repository. Performance Monitoring @@ -110,7 +143,8 @@ A fixed-point performance monitoring of (a part of) our codes can be seen on To run these benchmark tests in a local machine, first install -- `airspeed-velocity `__: a tool for benchmarking Python packages over their lifetime. +- `airspeed-velocity `__: a tool for benchmarking + Python packages over their lifetime. and run ``asv run # this will install some conda environments in ./.asv/envs`` diff --git a/doc/io.rst b/doc/io.rst index 7f0c2333ce5..dab2a195e90 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -451,6 +451,7 @@ This feature is availabe through :py:func:`DataArray.to_netcdf` and and currently raises a warning unless ``invalid_netcdf=True`` is set: .. ipython:: python + :okwarning: # Writing complex valued data da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) diff --git a/doc/pandas.rst b/doc/pandas.rst index 4fa73eec18c..4f3088b4c34 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -65,8 +65,7 @@ For datasets containing dask arrays where the data should be lazily loaded, see To create a ``Dataset`` from a ``DataFrame``, use the :py:meth:`~xarray.Dataset.from_dataframe` class method or the equivalent -:py:meth:`pandas.DataFrame.to_xarray ` method (pandas -v0.18 or later): +:py:meth:`pandas.DataFrame.to_xarray ` method: .. ipython:: python diff --git a/doc/plotting.rst b/doc/plotting.rst index 3e61e85f78c..e9d30fb63c8 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -249,6 +249,7 @@ As an alternative, also a step plot similar to matplotlib's ``plt.step`` can be made using 1D data. .. ipython:: python + :okwarning: @savefig plotting_example_step.png width=4in air1d[:20].plot.step(where='mid') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7103d7b8ab3..5b73059b34c 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,32 +13,59 @@ What's New import xarray as xr np.random.seed(123456) -.. _whats-new.0.13.1: +.. _whats-new.0.14.0: -v0.13.1 (unreleased) +v0.14.0 (unreleased) -------------------- +Breaking changes +~~~~~~~~~~~~~~~~ +- This release introduces a rolling policy for minimum dependency versions: + :ref:`mindeps_policy`. + + Several minimum versions have been increased: + + ============ ================== ==== + Package Old New + ============ ================== ==== + Python 3.5.3 3.6 + numpy 1.12 1.14 + pandas 0.19.2 0.24 + dask 0.16 (tested: 2.4) 1.2 + bottleneck 1.1 (tested: 1.2) 1.2 + matplotlib 1.5 (tested: 3.1) 3.1 + ============ ================== ==== + + Obsolete patch versions (x.y.Z) are not tested anymore. + The oldest supported versions of all optional dependencies are now covered by + automated tests (before, only the very latest versions were tested). + + (:issue:`3222`, :issue:`3293`, :issue:`3340`, :issue:`3346`, :issue:`3358`). + By `Guido Imperiale `_. + New functions/methods ~~~~~~~~~~~~~~~~~~~~~ Enhancements ~~~~~~~~~~~~ -- Add a repr for :py:class:`~xarray.core.GroupBy` objects. By `Deepak Cherian `_. +- Add a repr for :py:class:`~xarray.core.GroupBy` objects (:issue:`3344`). Example:: >>> da.groupby("time.season") DataArrayGroupBy, grouped over 'season' 4 groups with labels 'DJF', 'JJA', 'MAM', 'SON' + By `Deepak Cherian `_. + Bug fixes ~~~~~~~~~ - Reintroduce support for :mod:`weakref` (broken in v0.13.0). Support has been reinstated for :class:`DataArray` and :class:`Dataset` objects only. Internal xarray - objects remain unaddressable by weakref in order to save memory. - (:issue:`3317`) by `Guido Imperiale `_. + objects remain unaddressable by weakref in order to save memory + (:issue:`3317`). By `Guido Imperiale `_. - Line plots with the ``x`` or ``y`` argument set to a 1D non-dimensional coord - now plot the correct data for 2D DataArrays. + now plot the correct data for 2D DataArrays (:issue:`3334`). By `Tom Nicholas `_. Documentation @@ -58,6 +85,7 @@ Documentation (:pull:`3353`). By `Gregory Gundersen `_. + .. _whats-new.0.13.0: v0.13.0 (17 Sep 2019) diff --git a/readthedocs.yml b/readthedocs.yml index 8e9c09c9414..6429780e7d7 100644 --- a/readthedocs.yml +++ b/readthedocs.yml @@ -1,8 +1,8 @@ build: image: latest conda: - file: doc/environment.yml + file: ci/requirements/doc.yml python: - version: 3.6 + version: 3.7 setup_py_install: true formats: [] diff --git a/setup.py b/setup.py index 5cfa4d9f9df..08d4f54764f 100644 --- a/setup.py +++ b/setup.py @@ -16,14 +16,13 @@ "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Topic :: Scientific/Engineering", ] -PYTHON_REQUIRES = ">=3.5.3" -INSTALL_REQUIRES = ["numpy >= 1.12", "pandas >= 0.19.2"] +PYTHON_REQUIRES = ">=3.6" +INSTALL_REQUIRES = ["numpy >= 1.14", "pandas >= 0.24"] needs_pytest = {"pytest", "test", "ptr"}.intersection(sys.argv) SETUP_REQUIRES = ["pytest-runner >= 4.2"] if needs_pytest else [] TESTS_REQUIRE = ["pytest >= 2.7.1"] diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 458a2d0cc42..8f6881b804a 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -912,7 +912,7 @@ def open_mfdataset( # Remove this after deprecation cycle from #2616 is complete basic_msg = dedent( """\ - In xarray version 0.14 the default behaviour of `open_mfdataset` + In xarray version 0.15 the default behaviour of `open_mfdataset` will change. To retain the existing behavior, pass combine='nested'. To use future default behavior, pass combine='by_coords'. See @@ -963,11 +963,11 @@ def open_mfdataset( return combined -WRITEABLE_STORES = { +WRITEABLE_STORES: Dict[str, Callable] = { "netcdf4": backends.NetCDF4DataStore.open, "scipy": backends.ScipyDataStore, "h5netcdf": backends.H5NetCDFStore, -} # type: Dict[str, Callable] +} def to_netcdf( diff --git a/xarray/backends/file_manager.py b/xarray/backends/file_manager.py index eac28852281..0ff574b5d81 100644 --- a/xarray/backends/file_manager.py +++ b/xarray/backends/file_manager.py @@ -13,7 +13,7 @@ assert FILE_CACHE.maxsize, "file cache must be at least size one" -REF_COUNTS = {} # type: Dict[Any, int] +REF_COUNTS: Dict[Any, int] = {} _DEFAULT_MODE = utils.ReprObject("") diff --git a/xarray/backends/locks.py b/xarray/backends/locks.py index 865ce1ddccd..d0bf790f074 100644 --- a/xarray/backends/locks.py +++ b/xarray/backends/locks.py @@ -21,7 +21,7 @@ NETCDFC_LOCK = SerializableLock() -_FILE_LOCKS = weakref.WeakValueDictionary() # type: MutableMapping[Any, threading.Lock] +_FILE_LOCKS: MutableMapping[Any, threading.Lock] = weakref.WeakValueDictionary() def _get_threaded_lock(key): @@ -72,17 +72,11 @@ def _get_scheduler(get=None, collection=None): dask.base.get_scheduler """ try: - # dask 0.18.1 and later - from dask.base import get_scheduler - - actual_get = get_scheduler(get, collection) + import dask # noqa: F401 except ImportError: - try: - from dask.utils import effective_get + return None - actual_get = effective_get(get, collection) - except ImportError: - return None + actual_get = dask.base.get_scheduler(get, collection) try: from dask.distributed import Client @@ -90,15 +84,12 @@ def _get_scheduler(get=None, collection=None): if isinstance(actual_get.__self__, Client): return "distributed" except (ImportError, AttributeError): - try: - import dask.multiprocessing - - if actual_get == dask.multiprocessing.get: - return "multiprocessing" - else: - return "threaded" - except ImportError: - return "threaded" + pass + + if actual_get is dask.multiprocessing.get: + return "multiprocessing" + else: + return "threaded" def get_write_lock(key): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 813942c2f32..203a2157e70 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -1,9 +1,7 @@ import functools import operator -import warnings from collections import OrderedDict from contextlib import suppress -from distutils.version import LooseVersion import numpy as np @@ -354,16 +352,6 @@ def open( ): import netCDF4 - if len(filename) == 88 and LooseVersion(netCDF4.__version__) < "1.3.1": - warnings.warn( - "A segmentation fault may occur when the " - "file path has exactly 88 characters as it does " - "in this case. The issue is known to occur with " - "version 1.2.4 of netCDF4 and can be addressed by " - "upgrading netCDF4 to at least version 1.3.1. " - "More details can be found here: " - "https://github.com/pydata/xarray/issues/1745" - ) if format is None: format = "NETCDF4" diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index 316f13470b7..deff2eaed66 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -1,7 +1,6 @@ import os import warnings from collections import OrderedDict -from distutils.version import LooseVersion import numpy as np @@ -253,18 +252,14 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc coords["band"] = np.asarray(riods.indexes) # Get coordinates - if LooseVersion(rasterio.__version__) < "1.0": - transform = riods.affine - else: - transform = riods.transform - if transform.is_rectilinear: + if riods.transform.is_rectilinear: # 1d coordinates parse = True if parse_coordinates is None else parse_coordinates if parse: nx, ny = riods.width, riods.height # xarray coordinates are pixel centered - x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * transform - _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * transform + x, _ = (np.arange(nx) + 0.5, np.zeros(nx) + 0.5) * riods.transform + _, y = (np.zeros(ny) + 0.5, np.arange(ny) + 0.5) * riods.transform coords["y"] = y coords["x"] = x else: @@ -287,7 +282,7 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc # For serialization store as tuple of 6 floats, the last row being # always (0, 0, 1) per definition (see # https://github.com/sgillies/affine) - attrs["transform"] = tuple(transform)[:6] + attrs["transform"] = tuple(riods.transform)[:6] if hasattr(riods, "crs") and riods.crs: # CRS is a dict-like object specific to rasterio # If CRS is not None, we convert it back to a PROJ4 string using diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index c4f9666f0c1..7f93ca237b1 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -1,6 +1,4 @@ -import warnings from collections import OrderedDict -from distutils.version import LooseVersion from io import BytesIO import numpy as np @@ -113,18 +111,6 @@ class ScipyDataStore(WritableCFDataStore): def __init__( self, filename_or_obj, mode="r", format=None, group=None, mmap=None, lock=None ): - import scipy - import scipy.io - - if mode != "r" and scipy.__version__ < LooseVersion("0.13"): # pragma: no cover - warnings.warn( - "scipy %s detected; " - "the minimal recommended version is 0.13. " - "Older version of this library do not reliably " - "read and write files." % scipy.__version__, - ImportWarning, - ) - if group is not None: raise ValueError( "cannot save to a group with the " "scipy.io.netcdf backend" diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 9a115de55ef..b550efe052e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1,6 +1,5 @@ import warnings from collections import OrderedDict -from distutils.version import LooseVersion import numpy as np @@ -254,25 +253,6 @@ def open_group( ): import zarr - min_zarr = "2.2" - - if LooseVersion(zarr.__version__) < min_zarr: # pragma: no cover - raise NotImplementedError( - "Zarr version %s or greater is " - "required by xarray. See zarr " - "installation " - "http://zarr.readthedocs.io/en/stable/" - "#installation" % min_zarr - ) - - if consolidated or consolidate_on_close: - if LooseVersion(zarr.__version__) <= "2.2.1.dev2": # pragma: no cover - raise NotImplementedError( - "Zarr version 2.2.1.dev2 or greater " - "is required by for consolidated " - "metadata." - ) - open_kwargs = dict(mode=mode, synchronizer=synchronizer, path=group) if consolidated: # TODO: an option to pass the metadata_key keyword diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index d7841fd43f8..515d309d75b 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -43,10 +43,11 @@ import re from datetime import timedelta from functools import partial -from typing import ClassVar +from typing import ClassVar, Optional import numpy as np +from ..core.pdcompat import count_not_none from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso from .times import format_cftime_datetime @@ -73,8 +74,8 @@ def get_date_type(calendar): class BaseCFTimeOffset: - _freq = None # type: ClassVar[str] - _day_option = None # type: ClassVar[str] + _freq: ClassVar[Optional[str]] = None + _day_option: ClassVar[Optional[str]] = None def __init__(self, n=1): if not isinstance(n, int): @@ -350,8 +351,8 @@ class QuarterOffset(BaseCFTimeOffset): """Quarter representation copied off of pandas/tseries/offsets.py """ - _freq = None # type: ClassVar[str] - _default_month = None # type: ClassVar[int] + _freq: ClassVar[str] + _default_month: ClassVar[int] def __init__(self, n=1, month=None): BaseCFTimeOffset.__init__(self, n) @@ -447,9 +448,9 @@ def rollback(self, date): class YearOffset(BaseCFTimeOffset): - _freq = None # type: ClassVar[str] - _day_option = None # type: ClassVar[str] - _default_month = None # type: ClassVar[int] + _freq: ClassVar[str] + _day_option: ClassVar[str] + _default_month: ClassVar[int] def __init__(self, n=1, month=None): BaseCFTimeOffset.__init__(self, n) @@ -774,11 +775,6 @@ def _generate_range(start, end, periods, offset): current = next_date -def _count_not_none(*args): - """Compute the number of non-None arguments.""" - return sum([arg is not None for arg in args]) - - def cftime_range( start=None, end=None, @@ -957,7 +953,7 @@ def cftime_range( pandas.date_range """ # Adapted from pandas.core.indexes.datetimes._generate_range. - if _count_not_none(start, end, periods, freq) != 3: + if count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the arguments 'start', 'end', 'periods', and 'freq', three " "must be specified at a time." diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 16ab258d32e..802dd94f06c 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -437,11 +437,6 @@ def __sub__(self, other): def __rsub__(self, other): return pd.TimedeltaIndex(other - np.array(self)) - def _add_delta(self, deltas): - # To support TimedeltaIndex + CFTimeIndex with older versions of - # pandas. No longer used as of pandas 0.23. - return self + deltas - def to_datetimeindex(self, unsafe=False): """If possible, convert this index to a pandas.DatetimeIndex. diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 7b5a7c56a53..1508fb50b38 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd +from pandas.errors import OutOfBoundsDatetime from ..core import indexing from ..core.common import contains_cftime_datetimes @@ -21,12 +22,6 @@ unpack_for_encoding, ) -try: - from pandas.errors import OutOfBoundsDatetime -except ImportError: - # pandas < 0.20 - from pandas.tslib import OutOfBoundsDatetime - # standard calendars recognized by cftime _STANDARD_CALENDARS = {"standard", "gregorian", "proleptic_gregorian"} diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 7adaca4e9bc..f78502d81be 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -1,7 +1,7 @@ """Coders for individual Variable objects.""" import warnings from functools import partial -from typing import Any +from typing import Any, Hashable import numpy as np import pandas as pd @@ -33,14 +33,18 @@ class VariableCoder: variables in the underlying store. """ - def encode(self, variable, name=None): # pragma: no cover - # type: (Variable, Any) -> Variable - """Convert an encoded variable to a decoded variable.""" + def encode( + self, variable: Variable, name: Hashable = None + ) -> Variable: # pragma: no cover + """Convert an encoded variable to a decoded variable + """ raise NotImplementedError() - def decode(self, variable, name=None): # pragma: no cover - # type: (Variable, Any) -> Variable - """Convert an decoded variable to a encoded variable.""" + def decode( + self, variable: Variable, name: Hashable = None + ) -> Variable: # pragma: no cover + """Convert an decoded variable to a encoded variable + """ raise NotImplementedError() diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 3bc60db0a0b..b4fee1773b8 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -549,7 +549,7 @@ def reindex_variables( if dim in variables: var = variables[dim] - args = (var.attrs, var.encoding) # type: tuple + args: tuple = (var.attrs, var.encoding) else: args = () reindexed[dim] = IndexVariable((dim,), target, *args) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 38befd5698f..8c3555941c4 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -789,7 +789,7 @@ def auto_combine( if not from_openmfds: basic_msg = dedent( """\ - In xarray version 0.14 `auto_combine` will be deprecated. See + In xarray version 0.15 `auto_combine` will be deprecated. See http://xarray.pydata.org/en/stable/combining.html#combining-multi""" ) warnings.warn(basic_msg, FutureWarning, stacklevel=2) @@ -831,7 +831,7 @@ def auto_combine( message += dedent( """\ The datasets supplied require both concatenation and merging. From - xarray version 0.14 this will operation will require either using the + xarray version 0.15 this will operation will require either using the new `combine_nested` function (or the `combine='nested'` option to open_mfdataset), with a nested list structure such that you can combine along the dimensions {}. Alternatively if your datasets have global diff --git a/xarray/core/common.py b/xarray/core/common.py index bf15e9907c4..b1a513e05a0 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -193,10 +193,9 @@ def __init_subclass__(cls): """Verify that all subclasses explicitly define ``__slots__``. If they don't, raise error in the core xarray module and a FutureWarning in third-party extensions. - This check is only triggered in Python 3.6+. """ if not hasattr(object.__new__(cls), "__dict__"): - cls.__setattr__ = cls._setattr_slots + pass elif cls.__module__.startswith("xarray."): raise AttributeError("%s must explicitly define __slots__" % cls.__name__) else: @@ -230,12 +229,11 @@ def __getattr__(self, name: str) -> Any: "%r object has no attribute %r" % (type(self).__name__, name) ) - # This complicated three-method design boosts overall performance of simple - # operations - particularly DataArray methods that perform a _to_temp_dataset() - # round-trip - by a whopping 8% compared to a single method that checks - # hasattr(self, "__dict__") at runtime before every single assignment (like - # _setattr_py35 does). All of this is just temporary until the FutureWarning can be - # changed into a hard crash. + # This complicated two-method design boosts overall performance of simple operations + # - particularly DataArray methods that perform a _to_temp_dataset() round-trip - by + # a whopping 8% compared to a single method that checks hasattr(self, "__dict__") at + # runtime before every single assignment. All of this is just temporary until the + # FutureWarning can be changed into a hard crash. def _setattr_dict(self, name: str, value: Any) -> None: """Deprecated third party subclass (see ``__init_subclass__`` above) """ @@ -251,7 +249,7 @@ def _setattr_dict(self, name: str, value: Any) -> None: stacklevel=2, ) - def _setattr_slots(self, name: str, value: Any) -> None: + def __setattr__(self, name: str, value: Any) -> None: """Objects with ``__slots__`` raise AttributeError if you try setting an undeclared attribute. This is desirable, but the error message could use some improvement. @@ -269,14 +267,6 @@ def _setattr_slots(self, name: str, value: Any) -> None: % (name, type(self).__name__) ) from e - def _setattr_py35(self, name: str, value: Any) -> None: - if hasattr(self, "__dict__"): - return self._setattr_dict(name, value) - return self._setattr_slots(name, value) - - # Overridden in Python >=3.6 by __init_subclass__ - __setattr__ = _setattr_py35 - def __dir__(self) -> List[str]: """Provide method name lookup and completion. Only provide 'public' methods. @@ -392,7 +382,7 @@ def get_index(self, key: Hashable) -> pd.Index: def _calc_assign_results( self: C, kwargs: Mapping[Hashable, Union[T, Callable[[C], T]]] ) -> MutableMapping[Hashable, T]: - results = SortedKeysDict() # type: SortedKeysDict[Hashable, T] + results: MutableMapping[Hashable, T] = SortedKeysDict() for k, v in kwargs.items(): if callable(v): results[k] = v(self) @@ -1040,13 +1030,8 @@ def resample( grouper = CFTimeGrouper(freq, closed, label, base, loffset) else: - # TODO: to_offset() call required for pandas==0.19.2 grouper = pd.Grouper( - freq=freq, - closed=closed, - label=label, - base=base, - loffset=pd.tseries.frequencies.to_offset(loffset), + freq=freq, closed=closed, label=label, base=base, loffset=loffset ) group = DataArray( dim_coord, coords=dim_coord.coords, dims=dim_coord.dims, name=RESAMPLE_DIM diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 4b9428847f4..a55613dd4b4 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -5,12 +5,12 @@ import itertools import operator from collections import Counter, OrderedDict -from distutils.version import LooseVersion from typing import ( TYPE_CHECKING, AbstractSet, Any, Callable, + Hashable, Iterable, List, Mapping, @@ -33,7 +33,6 @@ from .coordinates import Coordinates # noqa from .dataset import Dataset -_DEFAULT_FROZEN_SET = frozenset() # type: frozenset _NO_FILL_VALUE = utils.ReprObject("") _DEFAULT_NAME = utils.ReprObject("") _JOINS_WITHOUT_FILL_VALUES = frozenset({"inner", "exact"}) @@ -492,8 +491,11 @@ def unified_dim_sizes( SLICE_NONE = slice(None) -def broadcast_compat_data(variable, broadcast_dims, core_dims): - # type: (Variable, tuple, tuple) -> Any +def broadcast_compat_data( + variable: Variable, + broadcast_dims: Tuple[Hashable, ...], + core_dims: Tuple[Hashable, ...], +) -> Any: data = variable.data old_dims = variable.dims @@ -654,7 +656,7 @@ def func(*arrays): def _apply_blockwise( func, args, input_dims, output_dims, signature, output_dtypes, output_sizes=None ): - from .dask_array_compat import blockwise + import dask.array if signature.num_outputs > 1: raise NotImplementedError( @@ -717,7 +719,7 @@ def _apply_blockwise( trimmed_dims = dims[-ndim:] if ndim else () blockwise_args.extend([arg, trimmed_dims]) - return blockwise( + return dask.array.blockwise( func, out_ind, *blockwise_args, @@ -995,13 +997,6 @@ def earth_mover_distance(first_samples, if vectorize: if signature.all_core_dims: - # we need the signature argument - if LooseVersion(np.__version__) < "1.12": # pragma: no cover - raise NotImplementedError( - "numpy 1.12 or newer required when using vectorize=True " - "in xarray.apply_ufunc with non-scalar output core " - "dimensions." - ) func = np.vectorize( func, otypes=output_dtypes, signature=signature.to_gufunc_string() ) @@ -1169,25 +1164,6 @@ def dot(*arrays, dims=None, **kwargs): ] output_core_dims = [tuple(d for d in all_dims if d not in dims + broadcast_dims)] - # older dask than 0.17.4, we use tensordot if possible. - if isinstance(arr.data, dask_array_type): - import dask - - if LooseVersion(dask.__version__) < LooseVersion("0.17.4"): - if len(broadcast_dims) == 0 and len(arrays) == 2: - axes = [ - [arr.get_axis_num(d) for d in arr.dims if d in dims] - for arr in arrays - ] - return apply_ufunc( - duck_array_ops.tensordot, - *arrays, - dask="allowed", - input_core_dims=input_core_dims, - output_core_dims=output_core_dims, - kwargs={"axes": axes} - ) - # construct einsum subscripts, such as '...abc,...ab->...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py deleted file mode 100644 index fe2cdc5c553..00000000000 --- a/xarray/core/dask_array_compat.py +++ /dev/null @@ -1,173 +0,0 @@ -from distutils.version import LooseVersion - -import dask.array as da -import numpy as np -from dask import __version__ as dask_version - -try: - blockwise = da.blockwise -except AttributeError: - blockwise = da.atop - - -try: - from dask.array import isin -except ImportError: # pragma: no cover - # Copied from dask v0.17.3. - # Used under the terms of Dask's license, see licenses/DASK_LICENSE. - - def _isin_kernel(element, test_elements, assume_unique=False): - values = np.in1d(element.ravel(), test_elements, assume_unique=assume_unique) - return values.reshape(element.shape + (1,) * test_elements.ndim) - - def isin(element, test_elements, assume_unique=False, invert=False): - element = da.asarray(element) - test_elements = da.asarray(test_elements) - element_axes = tuple(range(element.ndim)) - test_axes = tuple(i + element.ndim for i in range(test_elements.ndim)) - mapped = blockwise( - _isin_kernel, - element_axes + test_axes, - element, - element_axes, - test_elements, - test_axes, - adjust_chunks={axis: lambda _: 1 for axis in test_axes}, - dtype=bool, - assume_unique=assume_unique, - ) - result = mapped.any(axis=test_axes) - if invert: - result = ~result - return result - - -if LooseVersion(dask_version) > LooseVersion("0.19.2"): - gradient = da.gradient - -else: # pragma: no cover - # Copied from dask v0.19.2 - # Used under the terms of Dask's license, see licenses/DASK_LICENSE. - import math - from numbers import Integral, Real - - try: - AxisError = np.AxisError - except AttributeError: - try: - np.array([0]).sum(axis=5) - except Exception as e: - AxisError = type(e) - - def validate_axis(axis, ndim): - """ Validate an input to axis= keywords """ - if isinstance(axis, (tuple, list)): - return tuple(validate_axis(ax, ndim) for ax in axis) - if not isinstance(axis, Integral): - raise TypeError("Axis value must be an integer, got %s" % axis) - if axis < -ndim or axis >= ndim: - raise AxisError( - "Axis %d is out of bounds for array of dimension " "%d" % (axis, ndim) - ) - if axis < 0: - axis += ndim - return axis - - def _gradient_kernel(x, block_id, coord, axis, array_locs, grad_kwargs): - """ - x: nd-array - array of one block - coord: 1d-array or scalar - coordinate along which the gradient is computed. - axis: int - axis along which the gradient is computed - array_locs: - actual location along axis. None if coordinate is scalar - grad_kwargs: - keyword to be passed to np.gradient - """ - block_loc = block_id[axis] - if array_locs is not None: - coord = coord[array_locs[0][block_loc] : array_locs[1][block_loc]] - grad = np.gradient(x, coord, axis=axis, **grad_kwargs) - return grad - - def gradient(f, *varargs, axis=None, **kwargs): - f = da.asarray(f) - - kwargs["edge_order"] = math.ceil(kwargs.get("edge_order", 1)) - if kwargs["edge_order"] > 2: - raise ValueError("edge_order must be less than or equal to 2.") - - drop_result_list = False - if axis is None: - axis = tuple(range(f.ndim)) - elif isinstance(axis, Integral): - drop_result_list = True - axis = (axis,) - - axis = validate_axis(axis, f.ndim) - - if len(axis) != len(set(axis)): - raise ValueError("duplicate axes not allowed") - - axis = tuple(ax % f.ndim for ax in axis) - - if varargs == (): - varargs = (1,) - if len(varargs) == 1: - varargs = len(axis) * varargs - if len(varargs) != len(axis): - raise TypeError( - "Spacing must either be a single scalar, or a scalar / " - "1d-array per axis" - ) - - if issubclass(f.dtype.type, (np.bool8, Integral)): - f = f.astype(float) - elif issubclass(f.dtype.type, Real) and f.dtype.itemsize < 4: - f = f.astype(float) - - results = [] - for i, ax in enumerate(axis): - for c in f.chunks[ax]: - if np.min(c) < kwargs["edge_order"] + 1: - raise ValueError( - "Chunk size must be larger than edge_order + 1. " - "Minimum chunk for aixs {} is {}. Rechunk to " - "proceed.".format(np.min(c), ax) - ) - - if np.isscalar(varargs[i]): - array_locs = None - else: - if isinstance(varargs[i], da.Array): - raise NotImplementedError( - "dask array coordinated is not supported." - ) - # coordinate position for each block taking overlap into - # account - chunk = np.array(f.chunks[ax]) - array_loc_stop = np.cumsum(chunk) + 1 - array_loc_start = array_loc_stop - chunk - 2 - array_loc_stop[-1] -= 1 - array_loc_start[0] = 0 - array_locs = (array_loc_start, array_loc_stop) - - results.append( - f.map_overlap( - _gradient_kernel, - dtype=f.dtype, - depth={j: 1 if j == ax else 0 for j in range(f.ndim)}, - boundary="none", - coord=varargs[i], - axis=ax, - array_locs=array_locs, - grad_kwargs=kwargs, - ) - ) - - if drop_result_list: - results = results[0] - - return results diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 11fdb86e9b0..37f261cc3ad 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,26 +1,13 @@ -from distutils.version import LooseVersion - import numpy as np from . import dtypes, nputils -try: - import dask - import dask.array as da - - # Note: dask has used `ghost` before 0.18.2 - if LooseVersion(dask.__version__) <= LooseVersion("0.18.2"): - overlap = da.ghost.ghost - trim_internal = da.ghost.trim_internal - else: - overlap = da.overlap.overlap - trim_internal = da.overlap.trim_internal -except ImportError: - pass - def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): - """wrapper to apply bottleneck moving window funcs on dask arrays""" + """Wrapper to apply bottleneck moving window funcs on dask arrays + """ + import dask.array as da + dtype, fill_value = dtypes.maybe_promote(a.dtype) a = a.astype(dtype) # inputs for overlap @@ -30,18 +17,21 @@ def dask_rolling_wrapper(moving_func, a, window, min_count=None, axis=-1): depth[axis] = (window + 1) // 2 boundary = {d: fill_value for d in range(a.ndim)} # Create overlap array. - ag = overlap(a, depth=depth, boundary=boundary) + ag = da.overlap.overlap(a, depth=depth, boundary=boundary) # apply rolling func out = ag.map_blocks( moving_func, window, min_count=min_count, axis=axis, dtype=a.dtype ) # trim array - result = trim_internal(out, depth) + result = da.overlap.trim_internal(out, depth) return result def rolling_window(a, axis, window, center, fill_value): - """ Dask's equivalence to np.utils.rolling_window """ + """Dask's equivalence to np.utils.rolling_window + """ + import dask.array as da + orig_shape = a.shape if axis < 0: axis = a.ndim + axis @@ -59,7 +49,7 @@ def rolling_window(a, axis, window, center, fill_value): % (window, depth[axis], min(a.chunks[axis])) ) - # Although dask.overlap pads values to boundaries of the array, + # Although da.overlap pads values to boundaries of the array, # the size of the generated array is smaller than what we want # if center == False. if center: @@ -88,7 +78,7 @@ def rolling_window(a, axis, window, center, fill_value): boundary = {d: fill_value for d in range(a.ndim)} # create overlap arrays - ag = overlap(a, depth=depth, boundary=boundary) + ag = da.overlap.overlap(a, depth=depth, boundary=boundary) # apply rolling func def func(x, window, axis=-1): diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 7ad6f3cbae8..d536d0de2c5 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1,5 +1,4 @@ import functools -import sys import warnings from collections import OrderedDict from numbers import Number @@ -323,7 +322,7 @@ def __init__( if encoding is not None: warnings.warn( "The `encoding` argument to `DataArray` is deprecated, and . " - "will be removed in 0.14. " + "will be removed in 0.15. " "Instead, specify the encoding when writing to disk or " "set the `encoding` attribute directly.", FutureWarning, @@ -419,7 +418,7 @@ def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray": obj = self._replace(coords=coords) # switch from dimension to level names, if necessary - dim_names = {} # type: Dict[Any, str] + dim_names: Dict[Any, str] = {} for dim, idx in indexes.items(): if not isinstance(idx, pd.MultiIndex) and idx.name != dim: dim_names[dim] = idx.name @@ -1184,12 +1183,11 @@ def reindex_like( * None (default): don't fill gaps * pad / ffill: propagate last valid index value forward * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value (requires pandas>=0.16) + * nearest: use nearest valid index value tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - Requires pandas>=0.17. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -1250,7 +1248,7 @@ def reindex( * None (default): don't fill gaps * pad / ffill: propagate last valid index value forward * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value (requires pandas>=0.16) + * nearest: use nearest valid index value tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must @@ -1504,9 +1502,7 @@ def expand_dims( with length 1. If provided as a dict, then the keys are the new dimensions and the values are either integers (giving the length of the new dimensions) or sequence/ndarray (giving the coordinates of - the new dimensions). **WARNING** for python 3.5, if ``dim`` is - dict-like, then it must be an ``OrderedDict``. This is to ensure - that the order in which the dims are given is maintained. + the new dimensions). axis : integer, list (or tuple) of integers, or None Axis position(s) where new axis is to be inserted (position(s) on the result array). If a list (or tuple) of integers is passed, @@ -1517,8 +1513,7 @@ def expand_dims( The keywords are arbitrary dimensions being inserted and the values are either the lengths of the new dims (if int is given), or their coordinates. Note, this is an alternative to passing a dict to the - dim kwarg and will only be used if dim is None. **WARNING** for - python 3.5 ``dim_kwargs`` is not available. + dim kwarg and will only be used if dim is None. Returns ------- @@ -1534,16 +1529,6 @@ def expand_dims( elif dim is not None and not isinstance(dim, Mapping): dim = OrderedDict(((cast(Hashable, dim), 1),)) - # TODO: get rid of the below code block when python 3.5 is no longer - # supported. - python36_plus = sys.version_info[0] == 3 and sys.version_info[1] > 5 - not_ordereddict = dim is not None and not isinstance(dim, OrderedDict) - if not python36_plus and not_ordereddict: - raise TypeError("dim must be an OrderedDict for python <3.6") - elif not python36_plus and dim_kwargs: - raise ValueError("dim_kwargs isn't available for python <3.6") - dim_kwargs = OrderedDict(dim_kwargs) - dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") ds = self._to_temp_dataset().expand_dims(dim, axis) return self._from_temp_dataset(ds) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d394e05b07a..1d9ef6f7a72 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3,7 +3,6 @@ import sys import warnings from collections import OrderedDict, defaultdict -from distutils.version import LooseVersion from numbers import Number from pathlib import Path from typing import ( @@ -41,7 +40,6 @@ formatting, groupby, ops, - pdcompat, resample, rolling, utils, @@ -132,8 +130,9 @@ def _get_virtual_variable( raise KeyError(key) split_key = key.split(".", 1) + var_name: Optional[str] if len(split_key) == 2: - ref_name, var_name = split_key # type: str, Optional[str] + ref_name, var_name = split_key elif len(split_key) == 1: ref_name, var_name = key, None else: @@ -165,7 +164,7 @@ def calculate_dimensions(variables: Mapping[Hashable, Variable]) -> "Dict[Any, i Returns dictionary mapping from dimension names to sizes. Raises ValueError if any of the dimension sizes conflict. """ - dims = {} # type: Dict[Any, int] + dims: Dict[Any, int] = {} last_used = {} scalar_vars = {k for k, v in variables.items() if not v.dims} for k, var in variables.items(): @@ -197,15 +196,17 @@ def merge_indexes( Not public API. Used in Dataset and DataArray set_index methods. """ - vars_to_replace = {} # Dict[Any, Variable] - vars_to_remove = [] # type: list + vars_to_replace: Dict[Hashable, Variable] = {} + vars_to_remove: List[Hashable] = [] error_msg = "{} is not the name of an existing variable." for dim, var_names in indexes.items(): if isinstance(var_names, str) or not isinstance(var_names, Sequence): var_names = [var_names] - names, codes, levels = [], [], [] # type: (list, list, list) + names: List[Hashable] = [] + codes: List[List[int]] = [] + levels: List[List[int]] = [] current_index_variable = variables.get(dim) for n in var_names: @@ -225,13 +226,8 @@ def merge_indexes( if current_index_variable is not None and append: current_index = current_index_variable.to_index() if isinstance(current_index, pd.MultiIndex): - try: - current_codes = current_index.codes - except AttributeError: - # fpr pandas<0.24 - current_codes = current_index.labels names.extend(current_index.names) - codes.extend(current_codes) + codes.extend(current_index.codes) levels.extend(current_index.levels) else: names.append("%s_level_0" % dim) @@ -490,7 +486,7 @@ def __init__( if compat is not None: warnings.warn( "The `compat` argument to Dataset is deprecated and will be " - "removed in 0.14." + "removed in 0.15." "Instead, use `merge` to control how variables are combined", FutureWarning, stacklevel=2, @@ -965,7 +961,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, pd.Index]) -> "Dataset": obj = self._replace(variables, indexes=new_indexes) # switch from dimension to level names, if necessary - dim_names = {} # type: Dict[Hashable, str] + dim_names: Dict[Hashable, str] = {} for dim, idx in indexes.items(): if not isinstance(idx, pd.MultiIndex) and idx.name != dim: dim_names[dim] = idx.name @@ -1130,7 +1126,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": if (var_name,) == var.dims: indexes[var_name] = var.to_index() - needed_dims = set() # type: set + needed_dims: Set[Hashable] = set() for v in variables.values(): needed_dims.update(v.dims) @@ -1669,7 +1665,7 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]: """Block dimensions for this dataset's data or None if it's not a dask array. """ - chunks = {} # type: Dict[Hashable, Tuple[int, ...]] + chunks: Dict[Hashable, Tuple[int, ...]] = {} for v in self.variables.values(): if v.chunks is not None: for dim, c in zip(v.dims, v.chunks): @@ -1714,13 +1710,7 @@ def chunk( ------- chunked : xarray.Dataset """ - try: - from dask.base import tokenize - except ImportError: - # raise the usual error if dask is entirely missing - import dask # noqa: F401 - - raise ImportError("xarray requires dask version 0.9 or newer") + from dask.base import tokenize if isinstance(chunks, Number): chunks = dict.fromkeys(self.dims, chunks) @@ -1770,7 +1760,7 @@ def _validate_indexers( raise ValueError("dimensions %r do not exist" % invalid) # all indexers should be int, slice, np.ndarrays, or Variable - indexers_list = [] # type: List[Tuple[Any, Union[slice, Variable]]] + indexers_list: List[Tuple[Any, Union[slice, Variable]]] = [] for k, v in indexers.items(): if isinstance(v, slice): indexers_list.append((k, v)) @@ -1964,7 +1954,7 @@ def sel( carried out. See :ref:`indexing` for the details. One of indexers or indexers_kwargs must be provided. method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional - Method to use for inexact matches (requires pandas>=0.16): + Method to use for inexact matches: * None (default): only exact matches * pad / ffill: propagate last valid index value forward @@ -1974,7 +1964,6 @@ def sel( Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - Requires pandas>=0.17. drop : bool, optional If ``drop=True``, drop coordinates variables in `indexers` instead of making them scalar. @@ -2204,12 +2193,11 @@ def reindex_like( * None (default): don't fill gaps * pad / ffill: propagate last valid index value forward * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value (requires pandas>=0.16) + * nearest: use nearest valid index value tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - Requires pandas>=0.17. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -2265,12 +2253,11 @@ def reindex( * None (default): don't fill gaps * pad / ffill: propagate last valid index value forward * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value (requires pandas>=0.16) + * nearest: use nearest valid index value tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must satisfy the equation ``abs(index[indexer] - target) <= tolerance``. - Requires pandas>=0.17. copy : bool, optional If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed @@ -2925,14 +2912,6 @@ def expand_dims( expanded : same type as caller This object, but with an additional dimension(s). """ - # TODO: get rid of the below code block when python 3.5 is no longer - # supported. - if sys.version < "3.6": - if isinstance(dim, Mapping) and not isinstance(dim, OrderedDict): - raise TypeError("dim must be an OrderedDict for python <3.6") - if dim_kwargs: - raise ValueError("dim_kwargs isn't available for python <3.6") - if dim is None: pass elif isinstance(dim, Mapping): @@ -3186,13 +3165,6 @@ def _stack_once(self, dims, new_dim): # consider dropping levels that are unused? levels = [self.get_index(dim) for dim in dims] - if LooseVersion(pd.__version__) < LooseVersion("0.19.0"): - # RangeIndex levels in a MultiIndex are broken for appending in - # pandas before v0.19.0 - levels = [ - pd.Int64Index(level) if isinstance(level, pd.RangeIndex) else level - for level in levels - ] idx = utils.multiindex_from_product_levels(levels, names=dims) variables[new_dim] = IndexVariable(new_dim, idx) @@ -3360,12 +3332,7 @@ def ensure_stackable(val): def _unstack_once(self, dim: Hashable) -> "Dataset": index = self.get_index(dim) - # GH2619. For MultiIndex, we need to call remove_unused. - if LooseVersion(pd.__version__) >= "0.20": - index = index.remove_unused_levels() - else: # for pandas 0.19 - index = pdcompat.remove_unused_levels(index) - + index = index.remove_unused_levels() full_idx = pd.MultiIndex.from_product(index.levels, names=index.names) # take a shortcut in case the MultiIndex was not modified. @@ -4987,13 +4954,6 @@ def sortby(self, variables, ascending=True): for data_array in aligned_other_vars: if data_array.ndim != 1: raise ValueError("Input DataArray is not 1-D.") - if data_array.dtype == object and LooseVersion( - np.__version__ - ) < LooseVersion("1.11.0"): - raise NotImplementedError( - "sortby uses np.lexsort under the hood, which requires " - "numpy 1.11.0 or later to support object data-type." - ) (key,) = data_array.dims vars_by_dim[key].append(data_array) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index fcd0400566f..126168d418b 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -17,10 +17,8 @@ try: import dask.array as dask_array - from . import dask_array_compat except ImportError: dask_array = None # type: ignore - dask_array_compat = None # type: ignore def _dask_or_eager_func( @@ -120,9 +118,7 @@ def notnull(data): transpose = _dask_or_eager_func("transpose") _where = _dask_or_eager_func("where", array_args=slice(3)) -isin = _dask_or_eager_func( - "isin", eager_module=npcompat, dask_module=dask_array_compat, array_args=slice(2) -) +isin = _dask_or_eager_func("isin", array_args=slice(2)) take = _dask_or_eager_func("take") broadcast_to = _dask_or_eager_func("broadcast_to") @@ -133,15 +129,13 @@ def notnull(data): array_any = _dask_or_eager_func("any") tensordot = _dask_or_eager_func("tensordot", array_args=slice(2)) -einsum = _dask_or_eager_func( - "einsum", array_args=slice(1, None), requires_dask="0.17.3" -) +einsum = _dask_or_eager_func("einsum", array_args=slice(1, None)) def gradient(x, coord, axis, edge_order): if isinstance(x, dask_array_type): - return dask_array_compat.gradient(x, coord, axis=axis, edge_order=edge_order) - return npcompat.gradient(x, coord, axis=axis, edge_order=edge_order) + return dask_array.gradient(x, coord, axis=axis, edge_order=edge_order) + return np.gradient(x, coord, axis=axis, edge_order=edge_order) def trapz(y, x, axis): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index c6b2537c958..0c7f073819d 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -7,17 +7,12 @@ import numpy as np import pandas as pd +from pandas.errors import OutOfBoundsDatetime from .duck_array_ops import array_equiv from .options import OPTIONS from .pycompat import dask_array_type, sparse_array_type -try: - from pandas.errors import OutOfBoundsDatetime -except ImportError: - # pandas < 0.20 - from pandas.tslib import OutOfBoundsDatetime - def pretty_print(x, numchars): """Given an object `x`, call `str(x)` and format the returned string so diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 6d42c254438..010c4818ca5 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -96,28 +96,12 @@ def _is_nested_tuple(possible_tuple): ) -def _index_method_kwargs(method, tolerance): - # backwards compatibility for pandas<0.16 (method) or pandas<0.17 - # (tolerance) - kwargs = {} - if method is not None: - kwargs["method"] = method - if tolerance is not None: - kwargs["tolerance"] = tolerance - return kwargs - - -def get_loc(index, label, method=None, tolerance=None): - kwargs = _index_method_kwargs(method, tolerance) - return index.get_loc(label, **kwargs) - - def get_indexer_nd(index, labels, method=None, tolerance=None): - """ Call pd.Index.get_indexer(labels). """ - kwargs = _index_method_kwargs(method, tolerance) - + """Wrapper around :meth:`pandas.Index.get_indexer` supporting n-dimensional + labels + """ flat_labels = np.ravel(labels) - flat_indexer = index.get_indexer(flat_labels, **kwargs) + flat_indexer = index.get_indexer(flat_labels, method=method, tolerance=tolerance) indexer = flat_indexer.reshape(labels.shape) return indexer @@ -193,7 +177,9 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No if isinstance(index, pd.MultiIndex): indexer, new_index = index.get_loc_level(label.item(), level=0) else: - indexer = get_loc(index, label.item(), method, tolerance) + indexer = index.get_loc( + label.item(), method=method, tolerance=tolerance + ) elif label.dtype.kind == "b": indexer = label else: @@ -1382,7 +1368,6 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray: @property def shape(self) -> Tuple[int]: - # .shape is broken on pandas prior to v0.15.2 return (len(self.array),) def __getitem__( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 8159e8ebcf8..6eb0acd760e 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -342,8 +342,8 @@ def determine_coords( from .dataarray import DataArray from .dataset import Dataset - coord_names = set() # type: set - noncoord_names = set() # type: set + coord_names: Set[Hashable] = set() + noncoord_names: Set[Hashable] = set() for mapping in list_of_mappings: if isinstance(mapping, Dataset): diff --git a/xarray/core/missing.py b/xarray/core/missing.py index fdabdb156b6..dfe209e3f7e 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -17,10 +17,10 @@ class BaseInterpolator: """Generic interpolator class for normalizing interpolation methods """ - cons_kwargs = None # type: Dict[str, Any] - call_kwargs = None # type: Dict[str, Any] - f = None # type: Callable - method = None # type: str + cons_kwargs: Dict[str, Any] + call_kwargs: Dict[str, Any] + f: Callable + method: str def __call__(self, x): return self.f(x, **self.call_kwargs) diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index 22c14d9ff40..1018332df29 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -30,294 +30,10 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import builtins import operator -from distutils.version import LooseVersion from typing import Union import numpy as np -try: - from numpy import isin -except ImportError: - - def isin(element, test_elements, assume_unique=False, invert=False): - """ - Calculates `element in test_elements`, broadcasting over `element` - only. Returns a boolean array of the same shape as `element` that is - True where an element of `element` is in `test_elements` and False - otherwise. - - Parameters - ---------- - element : array_like - Input array. - test_elements : array_like - The values against which to test each value of `element`. - This argument is flattened if it is an array or array_like. - See notes for behavior with non-array-like parameters. - assume_unique : bool, optional - If True, the input arrays are both assumed to be unique, which - can speed up the calculation. Default is False. - invert : bool, optional - If True, the values in the returned array are inverted, as if - calculating `element not in test_elements`. Default is False. - ``np.isin(a, b, invert=True)`` is equivalent to (but faster - than) ``np.invert(np.isin(a, b))``. - - Returns - ------- - isin : ndarray, bool - Has the same shape as `element`. The values `element[isin]` - are in `test_elements`. - - See Also - -------- - in1d : Flattened version of this function. - numpy.lib.arraysetops : Module with a number of other functions for - performing set operations on arrays. - - Notes - ----- - - `isin` is an element-wise function version of the python keyword `in`. - ``isin(a, b)`` is roughly equivalent to - ``np.array([item in b for item in a])`` if `a` and `b` are 1-D - sequences. - - `element` and `test_elements` are converted to arrays if they are not - already. If `test_elements` is a set (or other non-sequence collection) - it will be converted to an object array with one element, rather than - an array of the values contained in `test_elements`. This is a - consequence of the `array` constructor's way of handling non-sequence - collections. Converting the set to a list usually gives the desired - behavior. - - .. versionadded:: 1.13.0 - - Examples - -------- - >>> element = 2*np.arange(4).reshape((2, 2)) - >>> element - array([[0, 2], - [4, 6]]) - >>> test_elements = [1, 2, 4, 8] - >>> mask = np.isin(element, test_elements) - >>> mask - array([[ False, True], - [ True, False]]) - >>> element[mask] - array([2, 4]) - >>> mask = np.isin(element, test_elements, invert=True) - >>> mask - array([[ True, False], - [ False, True]]) - >>> element[mask] - array([0, 6]) - - Because of how `array` handles sets, the following does not - work as expected: - - >>> test_set = {1, 2, 4, 8} - >>> np.isin(element, test_set) - array([[ False, False], - [ False, False]]) - - Casting the set to a list gives the expected result: - - >>> np.isin(element, list(test_set)) - array([[ False, True], - [ True, False]]) - """ - element = np.asarray(element) - return np.in1d( - element, test_elements, assume_unique=assume_unique, invert=invert - ).reshape(element.shape) - - -if LooseVersion(np.__version__) >= LooseVersion("1.13"): - gradient = np.gradient -else: - - def normalize_axis_tuple(axes, N): - if isinstance(axes, int): - axes = (axes,) - return tuple([N + a if a < 0 else a for a in axes]) - - def gradient(f, *varargs, axis=None, edge_order=1): - f = np.asanyarray(f) - N = f.ndim # number of dimensions - - axes = axis - del axis - - if axes is None: - axes = tuple(range(N)) - else: - axes = normalize_axis_tuple(axes, N) - - len_axes = len(axes) - n = len(varargs) - if n == 0: - # no spacing argument - use 1 in all axes - dx = [1.0] * len_axes - elif n == 1 and np.ndim(varargs[0]) == 0: - # single scalar for all axes - dx = varargs * len_axes - elif n == len_axes: - # scalar or 1d array for each axis - dx = list(varargs) - for i, distances in enumerate(dx): - if np.ndim(distances) == 0: - continue - elif np.ndim(distances) != 1: - raise ValueError("distances must be either scalars or 1d") - if len(distances) != f.shape[axes[i]]: - raise ValueError( - "when 1d, distances must match the " - "length of the corresponding dimension" - ) - diffx = np.diff(distances) - # if distances are constant reduce to the scalar case - # since it brings a consistent speedup - if (diffx == diffx[0]).all(): - diffx = diffx[0] - dx[i] = diffx - else: - raise TypeError("invalid number of arguments") - - if edge_order > 2: - raise ValueError("'edge_order' greater than 2 not supported") - - # use central differences on interior and one-sided differences on the - # endpoints. This preserves second order-accuracy over the full domain. - - outvals = [] - - # create slice objects --- initially all are [:, :, ..., :] - slice1 = [slice(None)] * N - slice2 = [slice(None)] * N - slice3 = [slice(None)] * N - slice4 = [slice(None)] * N - - otype = f.dtype.char - if otype not in ["f", "d", "F", "D", "m", "M"]: - otype = "d" - - # Difference of datetime64 elements results in timedelta64 - if otype == "M": - # Need to use the full dtype name because it contains unit - # information - otype = f.dtype.name.replace("datetime", "timedelta") - elif otype == "m": - # Needs to keep the specific units, can't be a general unit - otype = f.dtype - - # Convert datetime64 data into ints. Make dummy variable `y` - # that is a view of ints if the data is datetime64, otherwise - # just set y equal to the array `f`. - if f.dtype.char in ["M", "m"]: - y = f.view("int64") - else: - y = f - - for i, axis in enumerate(axes): - if y.shape[axis] < edge_order + 1: - raise ValueError( - "Shape of array too small to calculate a numerical " - "gradient, at least (edge_order + 1) elements are " - "required." - ) - # result allocation - out = np.empty_like(y, dtype=otype) - - uniform_spacing = np.ndim(dx[i]) == 0 - - # Numerical differentiation: 2nd order interior - slice1[axis] = slice(1, -1) - slice2[axis] = slice(None, -2) - slice3[axis] = slice(1, -1) - slice4[axis] = slice(2, None) - - if uniform_spacing: - out[slice1] = (f[slice4] - f[slice2]) / (2.0 * dx[i]) - else: - dx1 = dx[i][0:-1] - dx2 = dx[i][1:] - a = -(dx2) / (dx1 * (dx1 + dx2)) - b = (dx2 - dx1) / (dx1 * dx2) - c = dx1 / (dx2 * (dx1 + dx2)) - # fix the shape for broadcasting - shape = np.ones(N, dtype=int) - shape[axis] = -1 - a.shape = b.shape = c.shape = shape - # 1D equivalent -- - # out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:] - out[slice1] = a * f[slice2] + b * f[slice3] + c * f[slice4] - - # Numerical differentiation: 1st order edges - if edge_order == 1: - slice1[axis] = 0 - slice2[axis] = 1 - slice3[axis] = 0 - dx_0 = dx[i] if uniform_spacing else dx[i][0] - # 1D equivalent -- out[0] = (y[1] - y[0]) / (x[1] - x[0]) - out[slice1] = (y[slice2] - y[slice3]) / dx_0 - - slice1[axis] = -1 - slice2[axis] = -1 - slice3[axis] = -2 - dx_n = dx[i] if uniform_spacing else dx[i][-1] - # 1D equivalent -- out[-1] = (y[-1] - y[-2]) / (x[-1] - x[-2]) - out[slice1] = (y[slice2] - y[slice3]) / dx_n - - # Numerical differentiation: 2nd order edges - else: - slice1[axis] = 0 - slice2[axis] = 0 - slice3[axis] = 1 - slice4[axis] = 2 - if uniform_spacing: - a = -1.5 / dx[i] - b = 2.0 / dx[i] - c = -0.5 / dx[i] - else: - dx1 = dx[i][0] - dx2 = dx[i][1] - a = -(2.0 * dx1 + dx2) / (dx1 * (dx1 + dx2)) - b = (dx1 + dx2) / (dx1 * dx2) - c = -dx1 / (dx2 * (dx1 + dx2)) - # 1D equivalent -- out[0] = a * y[0] + b * y[1] + c * y[2] - out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4] - - slice1[axis] = -1 - slice2[axis] = -3 - slice3[axis] = -2 - slice4[axis] = -1 - if uniform_spacing: - a = 0.5 / dx[i] - b = -2.0 / dx[i] - c = 1.5 / dx[i] - else: - dx1 = dx[i][-2] - dx2 = dx[i][-1] - a = (dx2) / (dx1 * (dx1 + dx2)) - b = -(dx2 + dx1) / (dx1 * dx2) - c = (2.0 * dx2 + dx1) / (dx2 * (dx1 + dx2)) - # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1] - out[slice1] = a * y[slice2] + b * y[slice3] + c * y[slice4] - - outvals.append(out) - - # reset the slice object in this dimension to ":" - slice1[axis] = slice(None) - slice2[axis] = slice(None) - slice3[axis] = slice(None) - slice4[axis] = slice(None) - - if len_axes == 1: - return outvals[0] - else: - return outvals - # Vendored from NumPy 1.12; we need a version that support duck typing, even # on dask arrays with __array_function__ enabled. diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index 91998482e3e..7591fff3abe 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -39,9 +39,9 @@ from distutils.version import LooseVersion -import numpy as np import pandas as pd + # allow ourselves to type checks for Panel even after it's removed if LooseVersion(pd.__version__) < "0.25.0": Panel = pd.Panel @@ -51,78 +51,9 @@ class Panel: # type: ignore pass -# for pandas 0.19 -def remove_unused_levels(self): - """ - create a new MultiIndex from the current that removing - unused levels, meaning that they are not expressed in the labels - The resulting MultiIndex will have the same outward - appearance, meaning the same .values and ordering. It will also - be .equals() to the original. - .. versionadded:: 0.20.0 - Returns - ------- - MultiIndex - Examples - -------- - >>> i = pd.MultiIndex.from_product([range(2), list('ab')]) - MultiIndex(levels=[[0, 1], ['a', 'b']], - codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) - >>> i[2:] - MultiIndex(levels=[[0, 1], ['a', 'b']], - codes=[[1, 1], [0, 1]]) - The 0 from the first level is not represented - and can be removed - >>> i[2:].remove_unused_levels() - MultiIndex(levels=[[1], ['a', 'b']], - codes=[[0, 0], [0, 1]]) - """ - import pandas.core.algorithms as algos - - new_levels = [] - new_labels = [] - - changed = False - for lev, lab in zip(self.levels, self.labels): - - # Since few levels are typically unused, bincount() is more - # efficient than unique() - however it only accepts positive values - # (and drops order): - uniques = np.where(np.bincount(lab + 1) > 0)[0] - 1 - has_na = int(len(uniques) and (uniques[0] == -1)) - - if len(uniques) != len(lev) + has_na: - # We have unused levels - changed = True - - # Recalculate uniques, now preserving order. - # Can easily be cythonized by exploiting the already existing - # "uniques" and stop parsing "lab" when all items are found: - uniques = algos.unique(lab) - if has_na: - na_idx = np.where(uniques == -1)[0] - # Just ensure that -1 is in first position: - uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] +def count_not_none(*args) -> int: + """Compute the number of non-None arguments. - # labels get mapped from uniques to 0:len(uniques) - # -1 (if present) is mapped to last position - label_mapping = np.zeros(len(lev) + has_na) - # ... and reassigned value -1: - label_mapping[uniques] = np.arange(len(uniques)) - has_na - - lab = label_mapping[lab] - - # new levels are simple - lev = lev.take(uniques[has_na:]) - - new_levels.append(lev) - new_labels.append(lab) - - result = self._shallow_copy() - - if changed: - result._reset_identity() - result._set_levels(new_levels, validate=False) - result._set_labels(new_labels, validate=False) - - return result + Copied from pandas.core.common.count_not_none (not part of the public API) + """ + return sum([arg is not None for arg in args]) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index a812e7472ca..3e86ebbfd73 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,7 +1,5 @@ import functools -import warnings from collections import OrderedDict -from distutils.version import LooseVersion import numpy as np @@ -71,17 +69,6 @@ def __init__(self, obj, windows, min_periods=None, center=False): ------- rolling : type of input argument """ - - if bottleneck is not None and ( - LooseVersion(bottleneck.__version__) < LooseVersion("1.0") - ): - warnings.warn( - "xarray requires bottleneck version of 1.0 or " - "greater for rolling operations. Rolling " - "aggregation methods will use numpy instead" - "of bottleneck." - ) - if len(windows) != 1: raise ValueError("exactly one dim/window should be provided") @@ -332,14 +319,6 @@ def _bottleneck_reduce(self, func, **kwargs): padded = self.obj.variable if self.center: - if ( - LooseVersion(np.__version__) < LooseVersion("1.13") - and self.obj.dtype.kind == "b" - ): - # with numpy < 1.13 bottleneck cannot handle np.nan-Boolean - # mixed array correctly. We cast boolean array to float. - padded = padded.astype(float) - if isinstance(padded.data, dask_array_type): # Workaround to make the padded chunk size is larger than # self.window-1 diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 2139d246f46..ac6768e8a9c 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -1,5 +1,6 @@ import numpy as np +from .pdcompat import count_not_none from .pycompat import dask_array_type @@ -24,13 +25,11 @@ def move_exp_nanmean(array, *, axis, alpha): def _get_center_of_mass(comass, span, halflife, alpha): """ - Vendored from pandas.core.window._get_center_of_mass + Vendored from pandas.core.window.common._get_center_of_mass See licenses/PANDAS_LICENSE for the function's license """ - from pandas.core import common as com - - valid_count = com.count_not_none(comass, span, halflife, alpha) + valid_count = count_not_none(comass, span, halflife, alpha) if valid_count > 1: raise ValueError("comass, span, halflife, and alpha " "are mutually exclusive") diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 0d730edeaeb..12024ff8245 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -444,7 +444,7 @@ class OrderedSet(MutableSet[T]): __slots__ = ("_ordered_dict",) def __init__(self, values: AbstractSet[T] = None): - self._ordered_dict = OrderedDict() # type: MutableMapping[T, None] + self._ordered_dict: MutableMapping[T, None] = OrderedDict() if values is not None: # Disable type checking - both mypy and PyCharm believes that # we're altering the type of self in place (see signature of diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index f69a8af7a2f..e070ea16855 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -2,7 +2,6 @@ import textwrap import warnings from datetime import datetime -from distutils.version import LooseVersion from inspect import getfullargspec from typing import Any, Iterable, Mapping, Tuple, Union @@ -13,12 +12,9 @@ from ..core.utils import is_scalar try: - import nc_time_axis + import nc_time_axis # noqa: F401 - if LooseVersion(nc_time_axis.__version__) < LooseVersion("1.2.0"): - nc_time_axis_available = False - else: - nc_time_axis_available = True + nc_time_axis_available = True except ImportError: nc_time_axis_available = False @@ -52,15 +48,7 @@ def register_pandas_datetime_converter_if_needed(): # based on https://github.com/pandas-dev/pandas/pull/17710 global _registered if not _registered: - try: - from pandas.plotting import register_matplotlib_converters - - register_matplotlib_converters() - except ImportError: - # register_matplotlib_converters new in pandas 0.22 - from pandas.tseries import converter - - converter.register() + pd.plotting.register_matplotlib_converters() _registered = True diff --git a/xarray/testing.py b/xarray/testing.py index 787ec1aadb0..f01cbe896b9 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -1,6 +1,6 @@ """Testing functions exposed to the user API""" from collections import OrderedDict -from typing import Hashable, Union +from typing import Hashable, Set, Union import numpy as np import pandas as pd @@ -162,7 +162,7 @@ def _assert_indexes_invariants_checks(indexes, possible_coord_variables, dims): def _assert_variable_invariants(var: Variable, name: Hashable = None): if name is None: - name_or_empty = () # type: tuple + name_or_empty: tuple = () else: name_or_empty = (name,) assert isinstance(var._dims, tuple), name_or_empty + (var._dims,) @@ -212,7 +212,7 @@ def _assert_dataset_invariants(ds: Dataset): assert type(ds._dims) is dict, ds._dims assert all(isinstance(v, int) for v in ds._dims.values()), ds._dims - var_dims = set() # type: set + var_dims: Set[Hashable] = set() for v in ds._variables.values(): var_dims.update(v.dims) assert ds._dims.keys() == var_dims, (set(ds._dims), var_dims) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 4f5a3e37888..8b4d3073e1c 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -17,11 +17,7 @@ from xarray.core.options import set_options from xarray.plot.utils import import_seaborn -try: - from pandas.testing import assert_frame_equal -except ImportError: - # old location, for pandas < 0.20 - from pandas.util.testing import assert_frame_equal # noqa: F401 +from pandas.testing import assert_frame_equal # noqa: F401 # import mpl and change the backend before other mpl imports try: @@ -61,7 +57,6 @@ def LooseVersion(vstring): has_matplotlib, requires_matplotlib = _importorskip("matplotlib") -has_matplotlib2, requires_matplotlib2 = _importorskip("matplotlib", minversion="2") has_scipy, requires_scipy = _importorskip("scipy") has_pydap, requires_pydap = _importorskip("pydap.client") has_netCDF4, requires_netCDF4 = _importorskip("netCDF4") @@ -69,30 +64,17 @@ def LooseVersion(vstring): has_pynio, requires_pynio = _importorskip("Nio") has_pseudonetcdf, requires_pseudonetcdf = _importorskip("PseudoNetCDF") has_cftime, requires_cftime = _importorskip("cftime") -has_nc_time_axis, requires_nc_time_axis = _importorskip( - "nc_time_axis", minversion="1.2.0" -) -has_cftime_1_0_2_1, requires_cftime_1_0_2_1 = _importorskip( - "cftime", minversion="1.0.2.1" -) has_dask, requires_dask = _importorskip("dask") has_bottleneck, requires_bottleneck = _importorskip("bottleneck") +has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") has_rasterio, requires_rasterio = _importorskip("rasterio") -has_pathlib, requires_pathlib = _importorskip("pathlib") -has_zarr, requires_zarr = _importorskip("zarr", minversion="2.2") -has_np113, requires_np113 = _importorskip("numpy", minversion="1.13.0") +has_zarr, requires_zarr = _importorskip("zarr") has_iris, requires_iris = _importorskip("iris") has_cfgrib, requires_cfgrib = _importorskip("cfgrib") has_numbagg, requires_numbagg = _importorskip("numbagg") has_sparse, requires_sparse = _importorskip("sparse") # some special cases -has_h5netcdf07, requires_h5netcdf07 = _importorskip("h5netcdf", minversion="0.7") -has_h5py29, requires_h5py29 = _importorskip("h5py", minversion="2.9.0") -has_h5fileobj = has_h5netcdf07 and has_h5py29 -requires_h5fileobj = pytest.mark.skipif( - not has_h5fileobj, reason="requires h5py>2.9.0 & h5netcdf>0.7" -) has_scipy_or_netCDF4 = has_scipy or has_netCDF4 requires_scipy_or_netCDF4 = pytest.mark.skipif( not has_scipy_or_netCDF4, reason="requires scipy or netCDF4" @@ -101,8 +83,6 @@ def LooseVersion(vstring): requires_cftime_or_netCDF4 = pytest.mark.skipif( not has_cftime_or_netCDF4, reason="requires cftime or netCDF4" ) -if not has_pathlib: - has_pathlib, requires_pathlib = _importorskip("pathlib2") try: import_seaborn() has_seaborn = True @@ -116,10 +96,7 @@ def LooseVersion(vstring): if has_dask: import dask - if LooseVersion(dask.__version__) < "0.18": - dask.set_options(get=dask.get) - else: - dask.config.set(scheduler="single-threaded") + dask.config.set(scheduler="single-threaded") flaky = pytest.mark.flaky network = pytest.mark.network diff --git a/xarray/tests/test_accessor_str.py b/xarray/tests/test_accessor_str.py index 56bf6dbb3a2..5cd815eebf0 100644 --- a/xarray/tests/test_accessor_str.py +++ b/xarray/tests/test_accessor_str.py @@ -56,7 +56,7 @@ def dtype(request): def test_dask(): import dask.array as da - arr = da.from_array(["a", "b", "c"]) + arr = da.from_array(["a", "b", "c"], chunks=-1) xarr = xr.DataArray(arr) result = xarr.str.len().compute() diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 4645b4db796..0120e2ca0fe 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -14,6 +14,7 @@ import numpy as np import pandas as pd +from pandas.errors import OutOfBoundsDatetime import pytest import xarray as xr @@ -51,10 +52,8 @@ requires_cfgrib, requires_cftime, requires_dask, - requires_h5fileobj, requires_h5netcdf, requires_netCDF4, - requires_pathlib, requires_pseudonetcdf, requires_pydap, requires_pynio, @@ -80,13 +79,6 @@ except ImportError: pass -try: - from pandas.errors import OutOfBoundsDatetime -except ImportError: - # pandas < 0.20 - from pandas.tslib import OutOfBoundsDatetime - - ON_WINDOWS = sys.platform == "win32" @@ -233,8 +225,8 @@ class NetCDF3Only: class DatasetIOBase: - engine = None # type: Optional[str] - file_format = None # type: Optional[str] + engine: Optional[str] = None + file_format: Optional[str] = None def create_store(self): raise NotImplementedError() @@ -1355,19 +1347,6 @@ def test_unsorted_index_raises(self): except IndexError as err: assert "first by calling .load" in str(err) - def test_88_character_filename_segmentation_fault(self): - # should be fixed in netcdf4 v1.3.1 - with mock.patch("netCDF4.__version__", "1.2.4"): - with warnings.catch_warnings(): - message = ( - "A segmentation fault may occur when the " - "file path has exactly 88 characters" - ) - warnings.filterwarnings("error", message) - with pytest.raises(Warning): - # Need to construct 88 character filepath - xr.Dataset().to_netcdf("a" * (88 - len(os.getcwd()) - 1)) - def test_setncattr_string(self): list_of_strings = ["list", "of", "strings"] one_element_list_of_strings = ["one element"] @@ -2334,7 +2313,7 @@ def test_dump_encodings_h5py(self): assert actual.x.encoding["compression_opts"] is None -@requires_h5fileobj +@requires_h5netcdf class TestH5NetCDFFileObject(TestH5NetCDFData): engine = "h5netcdf" @@ -2754,7 +2733,6 @@ def test_open_mfdataset_2d(self): (2, 2, 2, 2), ) - @requires_pathlib def test_open_mfdataset_pathlib(self): original = Dataset({"foo": ("x", np.random.randn(10))}) with create_tmp_file() as tmp1: @@ -2768,7 +2746,6 @@ def test_open_mfdataset_pathlib(self): ) as actual: assert_identical(original, actual) - @requires_pathlib def test_open_mfdataset_2d_pathlib(self): original = Dataset({"foo": (["x", "y"], np.random.randn(10, 8))}) with create_tmp_file() as tmp1: @@ -2903,7 +2880,6 @@ def test_save_mfdataset_invalid_dataarray(self): with raises_regex(TypeError, "supports writing Dataset"): save_mfdataset([da], ["dataarray"]) - @requires_pathlib def test_save_mfdataset_pathlib_roundtrip(self): original = Dataset({"foo": ("x", np.random.randn(10))}) datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))] @@ -4231,7 +4207,6 @@ def test_dataarray_to_netcdf_return_bytes(self): output = data.to_netcdf() assert isinstance(output, bytes) - @requires_pathlib def test_dataarray_to_netcdf_no_name_pathlib(self): original_da = DataArray(np.arange(12).reshape((3, 4))) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index fcc9acf75bb..e49dc72abdd 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -15,13 +15,7 @@ ) from xarray.tests import assert_array_equal, assert_identical -from . import ( - has_cftime, - has_cftime_1_0_2_1, - has_cftime_or_netCDF4, - raises_regex, - requires_cftime, -) +from . import has_cftime, has_cftime_or_netCDF4, raises_regex, requires_cftime from .test_coding_times import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, @@ -175,14 +169,14 @@ def index_with_name(date_type): return CFTimeIndex(dates, name="foo") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize(("name", "expected_name"), [("bar", "bar"), (None, "foo")]) def test_constructor_with_name(index_with_name, name, expected_name): result = CFTimeIndex(index_with_name, name=name).name assert result == expected_name -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_assert_all_valid_date_type(date_type, index): import cftime @@ -203,7 +197,7 @@ def test_assert_all_valid_date_type(date_type, index): assert_all_valid_date_type(np.array([date_type(1, 1, 1), date_type(1, 2, 1)])) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ("field", "expected"), [ @@ -221,21 +215,21 @@ def test_cftimeindex_field_accessors(index, field, expected): assert_array_equal(result, expected) -@pytest.mark.skipif(not has_cftime_1_0_2_1, reason="cftime not installed") +@requires_cftime def test_cftimeindex_dayofyear_accessor(index): result = index.dayofyear expected = [date.dayofyr for date in index] assert_array_equal(result, expected) -@pytest.mark.skipif(not has_cftime_1_0_2_1, reason="cftime not installed") +@requires_cftime def test_cftimeindex_dayofweek_accessor(index): result = index.dayofweek expected = [date.dayofwk for date in index] assert_array_equal(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ("string", "date_args", "reso"), [ @@ -255,7 +249,7 @@ def test_parse_iso8601_with_reso(date_type, string, date_args, reso): assert result_reso == expected_reso -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_parse_string_to_bounds_year(date_type, dec_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 1, 1) @@ -265,7 +259,7 @@ def test_parse_string_to_bounds_year(date_type, dec_days): assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_parse_string_to_bounds_month_feb(date_type, feb_days): parsed = date_type(2, 2, 10, 6, 2, 8, 1) expected_start = date_type(2, 2, 1) @@ -275,7 +269,7 @@ def test_parse_string_to_bounds_month_feb(date_type, feb_days): assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_parse_string_to_bounds_month_dec(date_type, dec_days): parsed = date_type(2, 12, 1) expected_start = date_type(2, 12, 1) @@ -285,7 +279,7 @@ def test_parse_string_to_bounds_month_dec(date_type, dec_days): assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( ("reso", "ex_start_args", "ex_end_args"), [ @@ -307,13 +301,13 @@ def test_parsed_string_to_bounds_sub_monthly( assert result_end == expected_end -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_parsed_string_to_bounds_raises(date_type): with pytest.raises(KeyError): _parsed_string_to_bounds(date_type, "a", date_type(1, 1, 1)) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_get_loc(date_type, index): result = index.get_loc("0001") assert result == slice(0, 2) @@ -328,7 +322,7 @@ def test_get_loc(date_type, index): index.get_loc("1234") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("kind", ["loc", "getitem"]) def test_get_slice_bound(date_type, index, kind): result = index.get_slice_bound("0001", "left", kind) @@ -348,7 +342,7 @@ def test_get_slice_bound(date_type, index, kind): assert result == expected -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("kind", ["loc", "getitem"]) def test_get_slice_bound_decreasing_index(date_type, monotonic_decreasing_index, kind): result = monotonic_decreasing_index.get_slice_bound("0001", "left", kind) @@ -372,7 +366,7 @@ def test_get_slice_bound_decreasing_index(date_type, monotonic_decreasing_index, assert result == expected -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("kind", ["loc", "getitem"]) def test_get_slice_bound_length_one_index(date_type, length_one_index, kind): result = length_one_index.get_slice_bound("0001", "left", kind) @@ -392,19 +386,19 @@ def test_get_slice_bound_length_one_index(date_type, length_one_index, kind): assert result == expected -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_string_slice_length_one_index(length_one_index): da = xr.DataArray([1], coords=[length_one_index], dims=["time"]) result = da.sel(time=slice("0001", "0001")) assert_identical(result, da) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_date_type_property(date_type, index): assert index.date_type is date_type -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_contains(date_type, index): assert "0001-01-01" in index assert "0001" in index @@ -413,7 +407,7 @@ def test_contains(date_type, index): assert date_type(3, 1, 1) not in index -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_groupby(da): result = da.groupby("time.month").sum("time") expected = xr.DataArray([4, 6], coords=[[1, 2]], dims=["month"]) @@ -427,7 +421,7 @@ def test_groupby(da): } -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_arg", list(SEL_STRING_OR_LIST_TESTS.values()), @@ -439,7 +433,7 @@ def test_sel_string_or_list(da, index, sel_arg): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_sel_date_slice_or_list(da, index, date_type): expected = xr.DataArray([1, 2], coords=[index[:2]], dims=["time"]) result = da.sel(time=slice(date_type(1, 1, 1), date_type(1, 12, 30))) @@ -449,14 +443,14 @@ def test_sel_date_slice_or_list(da, index, date_type): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_sel_date_scalar(da, date_type, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.sel(time=date_type(1, 1, 1)) assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], @@ -471,7 +465,7 @@ def test_sel_date_scalar_nearest(da, date_type, index, sel_kwargs): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [{"method": "pad"}, {"method": "pad", "tolerance": timedelta(days=365)}], @@ -486,7 +480,7 @@ def test_sel_date_scalar_pad(da, date_type, index, sel_kwargs): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [{"method": "backfill"}, {"method": "backfill", "tolerance": timedelta(days=365)}], @@ -501,7 +495,7 @@ def test_sel_date_scalar_backfill(da, date_type, index, sel_kwargs): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [ @@ -515,7 +509,7 @@ def test_sel_date_scalar_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=date_type(1, 5, 1), **sel_kwargs) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [{"method": "nearest"}, {"method": "nearest", "tolerance": timedelta(days=70)}], @@ -534,7 +528,7 @@ def test_sel_date_list_nearest(da, date_type, index, sel_kwargs): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [{"method": "pad"}, {"method": "pad", "tolerance": timedelta(days=365)}], @@ -545,7 +539,7 @@ def test_sel_date_list_pad(da, date_type, index, sel_kwargs): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [{"method": "backfill"}, {"method": "backfill", "tolerance": timedelta(days=365)}], @@ -556,7 +550,7 @@ def test_sel_date_list_backfill(da, date_type, index, sel_kwargs): assert_identical(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize( "sel_kwargs", [ @@ -570,7 +564,7 @@ def test_sel_date_list_tolerance_raises(da, date_type, sel_kwargs): da.sel(time=[date_type(1, 2, 1), date_type(1, 5, 1)], **sel_kwargs) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_isel(da, index): expected = xr.DataArray(1).assign_coords(time=index[0]) result = da.isel(time=0) @@ -597,7 +591,7 @@ def range_args(date_type): ] -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_indexing_in_series_getitem(series, index, scalar_args, range_args): for arg in scalar_args: assert series[arg] == 1 @@ -607,7 +601,7 @@ def test_indexing_in_series_getitem(series, index, scalar_args, range_args): assert series[arg].equals(expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_indexing_in_series_loc(series, index, scalar_args, range_args): for arg in scalar_args: assert series.loc[arg] == 1 @@ -617,7 +611,7 @@ def test_indexing_in_series_loc(series, index, scalar_args, range_args): assert series.loc[arg].equals(expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_indexing_in_series_iloc(series, index): expected = 1 assert series.iloc[0] == expected @@ -626,7 +620,7 @@ def test_indexing_in_series_iloc(series, index): assert series.iloc[:2].equals(expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_series_dropna(index): series = pd.Series([0.0, 1.0, np.nan, np.nan], index=index) expected = series.iloc[:2] @@ -634,7 +628,7 @@ def test_series_dropna(index): assert result.equals(expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): expected = pd.Series([1], name=index[0]) for arg in scalar_args: @@ -647,7 +641,7 @@ def test_indexing_in_dataframe_loc(df, index, scalar_args, range_args): assert result.equals(expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_indexing_in_dataframe_iloc(df, index): expected = pd.Series([1], name=index[0]) result = df.iloc[0] @@ -676,13 +670,13 @@ def test_concat_cftimeindex(date_type): assert not isinstance(da.indexes["time"], CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_empty_cftimeindex(): index = CFTimeIndex([]) assert index.date_type is None -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftimeindex_add(index): date_type = index.date_type expected_dates = [ @@ -697,7 +691,7 @@ def test_cftimeindex_add(index): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_add_timedeltaindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) @@ -708,7 +702,7 @@ def test_cftimeindex_add_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftimeindex_radd(index): date_type = index.date_type expected_dates = [ @@ -723,7 +717,7 @@ def test_cftimeindex_radd(index): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_timedeltaindex_add_cftimeindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) @@ -734,7 +728,7 @@ def test_timedeltaindex_add_cftimeindex(calendar): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftimeindex_sub(index): date_type = index.date_type expected_dates = [ @@ -750,7 +744,7 @@ def test_cftimeindex_sub(index): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftimeindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) @@ -761,7 +755,7 @@ def test_cftimeindex_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_cftime_datetime(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) @@ -771,7 +765,7 @@ def test_cftimeindex_sub_cftime_datetime(calendar): assert isinstance(result, pd.TimedeltaIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftime_datetime_sub_cftimeindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) @@ -781,7 +775,7 @@ def test_cftime_datetime_sub_cftimeindex(calendar): assert isinstance(result, pd.TimedeltaIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _CFTIME_CALENDARS) def test_cftimeindex_sub_timedeltaindex(calendar): a = xr.cftime_range("2000", periods=5, calendar=calendar) @@ -792,13 +786,13 @@ def test_cftimeindex_sub_timedeltaindex(calendar): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftimeindex_rsub(index): with pytest.raises(TypeError): timedelta(days=1) - index -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("freq", ["D", timedelta(days=1)]) def test_cftimeindex_shift(index, freq): date_type = index.date_type @@ -814,14 +808,14 @@ def test_cftimeindex_shift(index, freq): assert isinstance(result, CFTimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftimeindex_shift_invalid_n(): index = xr.cftime_range("2000", periods=3) with pytest.raises(TypeError): index.shift("a", "D") -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime def test_cftimeindex_shift_invalid_freq(): index = xr.cftime_range("2000", periods=3) with pytest.raises(TypeError): @@ -850,18 +844,18 @@ def test_parse_array_of_cftime_strings(): np.testing.assert_array_equal(result, expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_strftime_of_cftime_array(calendar): date_format = "%Y%m%d%H%M" cf_values = xr.cftime_range("2000", periods=5, calendar=calendar) dt_values = pd.date_range("2000", periods=5) - expected = dt_values.strftime(date_format) + expected = pd.Index(dt_values.strftime(date_format)) result = cf_values.strftime(date_format) assert result.equals(expected) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) @pytest.mark.parametrize("unsafe", [False, True]) def test_to_datetimeindex(calendar, unsafe): @@ -879,7 +873,7 @@ def test_to_datetimeindex(calendar, unsafe): assert isinstance(result, pd.DatetimeIndex) -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", _ALL_CALENDARS) def test_to_datetimeindex_out_of_range(calendar): index = xr.cftime_range("0001", periods=5, calendar=calendar) @@ -887,7 +881,7 @@ def test_to_datetimeindex_out_of_range(calendar): index.to_datetimeindex() -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.parametrize("calendar", ["all_leap", "360_day"]) def test_to_datetimeindex_feb_29(calendar): index = xr.cftime_range("2001-02-28", periods=2, calendar=calendar) @@ -895,7 +889,7 @@ def test_to_datetimeindex_feb_29(calendar): index.to_datetimeindex() -@pytest.mark.skipif(not has_cftime, reason="cftime not installed") +@requires_cftime @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/24263") def test_multiindex(): index = xr.cftime_range("2001-01-01", periods=100, calendar="360_day") diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index bbc8dd82c95..c4f32795b59 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -8,7 +8,6 @@ from xarray.core.resample_cftime import CFTimeGrouper pytest.importorskip("cftime") -pytest.importorskip("pandas", minversion="0.24") # Create a list of pairs of similar-length initial and resample frequencies diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index a778ff8147f..406b9c1ba69 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -4,6 +4,8 @@ import numpy as np import pandas as pd import pytest +from pandas.errors import OutOfBoundsDatetime + from xarray import DataArray, Dataset, Variable, coding, decode_cf from xarray.coding.times import ( @@ -28,11 +30,6 @@ requires_cftime_or_netCDF4, ) -try: - from pandas.errors import OutOfBoundsDatetime -except ImportError: - # pandas < 0.20 - from pandas.tslib import OutOfBoundsDatetime _NON_STANDARD_CALENDARS_SET = { "noleap", @@ -119,7 +116,9 @@ def test_cf_datetime(num_dates, units, calendar): warnings.filterwarnings("ignore", "Unable to decode time axis") actual = coding.times.decode_cf_datetime(num_dates, units, calendar) - abs_diff = np.atleast_1d(abs(actual - expected)).astype(np.timedelta64) + abs_diff = np.asarray(abs(actual - expected)).ravel() + abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy() + # once we no longer support versions of netCDF4 older than 1.1.5, # we could do this check with near microsecond accuracy: # https://github.com/Unidata/netcdf4-python/issues/355 @@ -829,8 +828,7 @@ def test_encode_cf_datetime_overflow(shape): def test_encode_cf_datetime_pandas_min(): - # Test that encode_cf_datetime does not fail for versions - # of pandas < 0.21.1 (GH 2623). + # GH 2623 dates = pd.date_range("2000", periods=3) num, units, calendar = encode_cf_datetime(dates) expected_num = np.array([0.0, 1.0, 2.0]) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 6037669ac07..0d1e5951b32 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -714,7 +714,7 @@ def test_check_for_impossible_ordering(self): @pytest.mark.filterwarnings( - "ignore:In xarray version 0.14 `auto_combine` " "will be deprecated" + "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated" ) @pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer") @pytest.mark.filterwarnings("ignore:The datasets supplied") diff --git a/xarray/tests/test_computation.py b/xarray/tests/test_computation.py index 784a988b7cc..3df84c0460b 100644 --- a/xarray/tests/test_computation.py +++ b/xarray/tests/test_computation.py @@ -2,7 +2,6 @@ import operator import pickle from collections import OrderedDict -from distutils.version import LooseVersion import numpy as np import pandas as pd @@ -942,12 +941,6 @@ def test_dot(use_dask): assert (actual.data == np.einsum("ij,ijk->k", a, b)).all() assert isinstance(actual.variable.data, type(da_a.variable.data)) - if use_dask: - import dask - - if LooseVersion(dask.__version__) < LooseVersion("0.17.3"): - pytest.skip("needs dask.array.einsum") - # for only a single array is passed without dims argument, just return # as is actual = xr.dot(da_a) @@ -1008,7 +1001,7 @@ def test_dot(use_dask): assert (actual.data == np.zeros(actual.shape)).all() # Invalid cases - if not use_dask or LooseVersion(dask.__version__) > LooseVersion("0.17.4"): + if not use_dask: with pytest.raises(TypeError): xr.dot(da_a, dims="a", invalid=None) with pytest.raises(TypeError): diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 76b3ed1a8d6..c142ca7643b 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -46,16 +46,9 @@ def __call__(self, dsk, keys, **kwargs): return dask.get(dsk, keys, **kwargs) -def _set_dask_scheduler(scheduler=dask.get): - """ Backwards compatible way of setting scheduler. """ - if LooseVersion(dask.__version__) >= LooseVersion("0.18.0"): - return dask.config.set(scheduler=scheduler) - return dask.set_options(get=scheduler) - - def raise_if_dask_computes(max_computes=0): scheduler = CountingScheduler(max_computes) - return _set_dask_scheduler(scheduler) + return dask.config.set(scheduler=scheduler) def test_raise_if_dask_computes(): @@ -67,9 +60,7 @@ def test_raise_if_dask_computes(): class DaskTestCase: def assertLazyAnd(self, expected, actual, test): - with _set_dask_scheduler(dask.get): - # dask.get is the syncronous scheduler, which get's set also by - # dask.config.set(scheduler="syncronous") in current versions. + with dask.config.set(scheduler="synchronous"): test(actual, expected) if isinstance(actual, Dataset): @@ -512,10 +503,7 @@ def counting_get(*args, **kwargs): count[0] += 1 return dask.get(*args, **kwargs) - if dask.__version__ < "0.19.4": - ds.load(get=counting_get) - else: - ds.load(scheduler=counting_get) + ds.load(scheduler=counting_get) assert count[0] == 1 @@ -543,7 +531,7 @@ def test_dataarray_repr_legacy(self): {!r} Coordinates: - y (x) int64 dask.array + y (x) int64 dask.array Dimensions without coordinates: x""".format( data ) @@ -838,8 +826,6 @@ def build_dask_array(name): ) -# test both the perist method and the dask.persist function -# the dask.persist function requires a new version of dask @pytest.mark.parametrize( "persist", [lambda x: x.persist(), lambda x: dask.persist(x)[0]] ) @@ -892,21 +878,12 @@ def test_dataarray_with_dask_coords(): def test_basic_compute(): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk({"x": 2}) for get in [dask.threaded.get, dask.multiprocessing.get, dask.local.get_sync, None]: - with ( - dask.config.set(scheduler=get) - if LooseVersion(dask.__version__) >= LooseVersion("0.19.4") - else dask.config.set(scheduler=get) - if LooseVersion(dask.__version__) >= LooseVersion("0.18.0") - else dask.set_options(get=get) - ): + with dask.config.set(scheduler=get): ds.compute() ds.foo.compute() ds.foo.variable.compute() -@pytest.mark.skipif( - LooseVersion(dask.__version__) < LooseVersion("0.20.0"), reason="needs newer dask" -) def test_dask_layers_and_dependencies(): ds = Dataset({"foo": ("x", range(5)), "bar": ("x", range(5))}).chunk() diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 717025afb23..4bae0d864a3 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -26,7 +26,6 @@ requires_bottleneck, requires_dask, requires_iris, - requires_np113, requires_numbagg, requires_scipy, requires_sparse, @@ -159,9 +158,7 @@ def test_struct_array_dims(self): when dimension is a structured array. """ # GH837, GH861 - # checking array subraction when dims are the same - # note: names need to be in sorted order to align consistently with - # pandas < 0.24 and >= 0.24. + # checking array subtraction when dims are the same p_data = np.array( [("Abe", 180), ("Stacy", 150), ("Dick", 200)], dtype=[("name", "|S256"), ("height", object)], @@ -3372,7 +3369,7 @@ def test_to_pandas(self): # roundtrips for shape in [(3,), (3, 4), (3, 4, 5)]: - if len(shape) > 2 and not LooseVersion(pd.__version__) < "0.25.0": + if len(shape) > 2 and LooseVersion(pd.__version__) >= "0.25.0": continue dims = list("abc")[: len(shape)] da = DataArray(np.random.randn(*shape), dims=dims) @@ -4186,12 +4183,12 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods): assert_equal(actual, da["time"]) +@requires_dask @pytest.mark.parametrize("name", ("mean", "count")) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @pytest.mark.parametrize("window", (7, 8)) def test_rolling_wrapped_dask(da_dask, name, center, min_periods, window): - pytest.importorskip("dask.array") # dask version rolling_obj = da_dask.rolling(time=window, min_periods=min_periods, center=center) actual = getattr(rolling_obj, name)().load() @@ -4297,7 +4294,6 @@ def test_rolling_reduce(da, center, min_periods, window, name): assert actual.dims == expected.dims -@requires_np113 @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @@ -4658,7 +4654,6 @@ def test_no_dict(): d.__dict__ -@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") def test_subclass_slots(): """Test that DataArray subclasses must explicitly define ``__slots__``. diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 5d856c9f323..fdd5a419383 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -25,7 +25,7 @@ open_dataset, set_options, ) -from xarray.core import dtypes, indexing, npcompat, utils +from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like from xarray.core.npcompat import IS_NEP18_ACTIVE from xarray.core.pycompat import integer_types @@ -2142,9 +2142,7 @@ def test_drop_index_labels(self): expected = data.isel(x=slice(0, 0)) assert_identical(expected, actual) - # This exception raised by pandas changed from ValueError -> KeyError - # in pandas 0.23. - with pytest.raises((ValueError, KeyError)): + with pytest.raises(KeyError): # not contained in axis data.drop(["c"], dim="x") @@ -2492,13 +2490,8 @@ def test_expand_dims_error(self): ) with raises_regex(TypeError, "value of new dimension"): original.expand_dims(OrderedDict((("d", 3.2),))) - - # TODO: only the code under the if-statement is needed when python 3.5 - # is no longer supported. - python36_plus = sys.version_info[0] == 3 and sys.version_info[1] > 5 - if python36_plus: - with raises_regex(ValueError, "both keyword and positional"): - original.expand_dims(OrderedDict((("d", 4),)), e=4) + with raises_regex(ValueError, "both keyword and positional"): + original.expand_dims(OrderedDict((("d", 4),)), e=4) def test_expand_dims_int(self): original = Dataset( @@ -2605,21 +2598,6 @@ def test_expand_dims_mixed_int_and_coords(self): ) assert_identical(actual, expected) - @pytest.mark.skipif( - sys.version_info[:2] > (3, 5), - reason="we only raise these errors for Python 3.5", - ) - def test_expand_dims_kwargs_python35(self): - original = Dataset({"x": ("a", np.random.randn(3))}) - with raises_regex(ValueError, "dim_kwargs isn't"): - original.expand_dims(e=["l", "m", "n"]) - with raises_regex(TypeError, "must be an OrderedDict"): - original.expand_dims({"e": ["l", "m", "n"]}) - - @pytest.mark.skipif( - sys.version_info[:2] < (3, 6), - reason="keyword arguments are only ordered on Python 3.6+", - ) def test_expand_dims_kwargs_python36plus(self): original = Dataset( {"x": ("a", np.random.randn(3)), "y": (["b", "a"], np.random.randn(4, 3))}, @@ -5554,7 +5532,7 @@ def test_differentiate(dask, edge_order): # along x actual = da.differentiate("x", edge_order) expected_x = xr.DataArray( - npcompat.gradient(da, da["x"], axis=0, edge_order=edge_order), + np.gradient(da, da["x"], axis=0, edge_order=edge_order), dims=da.dims, coords=da.coords, ) @@ -5569,7 +5547,7 @@ def test_differentiate(dask, edge_order): # along y actual = da.differentiate("y", edge_order) expected_y = xr.DataArray( - npcompat.gradient(da, da["y"], axis=1, edge_order=edge_order), + np.gradient(da, da["y"], axis=1, edge_order=edge_order), dims=da.dims, coords=da.coords, ) @@ -5612,7 +5590,7 @@ def test_differentiate_datetime(dask): # along x actual = da.differentiate("x", edge_order=1, datetime_unit="D") expected_x = xr.DataArray( - npcompat.gradient( + np.gradient( da, da["x"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1 ), dims=da.dims, @@ -5649,7 +5627,7 @@ def test_differentiate_cftime(dask): da = da.chunk({"time": 4}) actual = da.differentiate("time", edge_order=1, datetime_unit="D") - expected_data = npcompat.gradient( + expected_data = np.gradient( da, da["time"].variable._to_numeric(datetime_unit="D"), axis=0, edge_order=1 ) expected = xr.DataArray(expected_data, coords=da.coords, dims=da.dims) @@ -5772,7 +5750,6 @@ def test_no_dict(): d.__dict__ -@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher") def test_subclass_slots(): """Test that Dataset subclasses must explicitly define ``__slots__``. diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index a3bea6db85f..b3c0ce37a54 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -3,8 +3,8 @@ import pytest -dask = pytest.importorskip("dask", minversion="0.18") # isort:skip -distributed = pytest.importorskip("distributed", minversion="1.21") # isort:skip +dask = pytest.importorskip("dask") # isort:skip +distributed = pytest.importorskip("distributed") # isort:skip from dask.distributed import Client, Lock from distributed.utils_test import cluster, gen_cluster diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index 766a391b57f..62ea19be97b 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -1,5 +1,4 @@ import warnings -from distutils.version import LooseVersion from textwrap import dedent import numpy as np @@ -28,7 +27,6 @@ arm_xfail, assert_array_equal, has_dask, - has_np113, raises_regex, requires_cftime, requires_dask, @@ -353,7 +351,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim): warnings.filterwarnings("ignore", "All-NaN slice") warnings.filterwarnings("ignore", "invalid value encountered in") - if has_np113 and da.dtype.kind == "O" and skipna: + if da.dtype.kind == "O" and skipna: # Numpy < 1.13 does not handle object-type array. try: if skipna: @@ -531,12 +529,8 @@ def test_min_count(dim_num, dtype, dask, func, aggdim): min_count = 3 actual = getattr(da, func)(dim=aggdim, skipna=True, min_count=min_count) - - if LooseVersion(pd.__version__) >= LooseVersion("0.22.0"): - # min_count is only implenented in pandas > 0.22 - expected = series_reduce(da, func, skipna=True, dim=aggdim, min_count=min_count) - assert_allclose(actual, expected) - + expected = series_reduce(da, func, skipna=True, dim=aggdim, min_count=min_count) + assert_allclose(actual, expected) assert_dask_array(actual, dask) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index ba108b2dbaf..ae405015659 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -83,8 +83,7 @@ def test_convert_label_indexer(self): indexing.convert_label_indexer(mindex, 0) with pytest.raises(ValueError): indexing.convert_label_indexer(index, {"three": 0}) - with pytest.raises((KeyError, IndexError)): - # pandas 0.21 changed this from KeyError to IndexError + with pytest.raises(IndexError): indexing.convert_label_indexer(mindex, (slice(None), 1, "no_level")) def test_convert_unsorted_datetime_index_raises(self): diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 99a72d68ad8..e3b29b86e4d 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -25,7 +25,6 @@ raises_regex, requires_cftime, requires_matplotlib, - requires_matplotlib2, requires_nc_time_axis, requires_seaborn, ) @@ -360,7 +359,6 @@ def test_convenient_facetgrid(self): d[0].plot(x="x", y="y", col="z", ax=plt.gca()) @pytest.mark.slow - @requires_matplotlib2 def test_subplot_kws(self): a = easy_array((10, 15, 4)) d = DataArray(a, dims=["y", "x", "z"]) @@ -1962,10 +1960,11 @@ def test_datetime_hue(self, hue_style): ds2.plot.scatter(x="A", y="B", hue="hue", hue_style=hue_style) def test_facetgrid_hue_style(self): - # Can't move this to pytest.mark.parametrize because py35-bare-minimum - # doesn't have mpl. - for hue_style, map_type in zip( - ["discrete", "continuous"], [list, mpl.collections.PathCollection] + # Can't move this to pytest.mark.parametrize because py36-bare-minimum + # doesn't have matplotlib. + for hue_style, map_type in ( + ("discrete", list), + ("continuous", mpl.collections.PathCollection), ): g = self.ds.plot.scatter( x="A", y="B", row="row", col="col", hue="hue", hue_style=hue_style diff --git a/xarray/tests/test_ufuncs.py b/xarray/tests/test_ufuncs.py index 1095cc360dd..26241152dfa 100644 --- a/xarray/tests/test_ufuncs.py +++ b/xarray/tests/test_ufuncs.py @@ -8,7 +8,7 @@ from . import assert_array_equal from . import assert_identical as assert_identical_ -from . import mock, raises_regex, requires_np113 +from . import mock, raises_regex def assert_identical(a, b): @@ -19,7 +19,6 @@ def assert_identical(a, b): assert_array_equal(a, b) -@requires_np113 def test_unary(): args = [ 0, @@ -32,7 +31,6 @@ def test_unary(): assert_identical(a + 1, np.cos(a)) -@requires_np113 def test_binary(): args = [ 0, @@ -49,7 +47,6 @@ def test_binary(): assert_identical(t2 + 1, np.maximum(t2 + 1, t1)) -@requires_np113 def test_binary_out(): args = [ 1, @@ -64,7 +61,6 @@ def test_binary_out(): assert_identical(actual_exponent, arg) -@requires_np113 def test_groupby(): ds = xr.Dataset({"a": ("x", [0, 0, 0])}, {"c": ("x", [0, 0, 1])}) ds_grouped = ds.groupby("c") @@ -87,7 +83,6 @@ def test_groupby(): np.maximum(ds.a.variable, ds_grouped) -@requires_np113 def test_alignment(): ds1 = xr.Dataset({"a": ("x", [1, 2])}, {"x": [0, 1]}) ds2 = xr.Dataset({"a": ("x", [2, 3]), "b": 4}, {"x": [1, 2]}) @@ -104,14 +99,12 @@ def test_alignment(): assert_identical_(actual, expected) -@requires_np113 def test_kwargs(): x = xr.DataArray(0) result = np.add(x, 1, dtype=np.float64) assert result.dtype == np.float64 -@requires_np113 def test_xarray_defers_to_unrecognized_type(): class Other: def __array_ufunc__(self, *args, **kwargs): @@ -123,7 +116,6 @@ def __array_ufunc__(self, *args, **kwargs): assert np.sin(xarray_obj, out=other) == "other" -@requires_np113 def test_xarray_handles_dask(): da = pytest.importorskip("dask.array") x = xr.DataArray(np.ones((2, 2)), dims=["x", "y"]) @@ -133,7 +125,6 @@ def test_xarray_handles_dask(): assert isinstance(result, xr.DataArray) -@requires_np113 def test_dask_defers_to_xarray(): da = pytest.importorskip("dask.array") x = xr.DataArray(np.ones((2, 2)), dims=["x", "y"]) @@ -143,14 +134,12 @@ def test_dask_defers_to_xarray(): assert isinstance(result, xr.DataArray) -@requires_np113 def test_gufunc_methods(): xarray_obj = xr.DataArray([1, 2, 3]) with raises_regex(NotImplementedError, "reduce method"): np.add.reduce(xarray_obj, 1) -@requires_np113 def test_out(): xarray_obj = xr.DataArray([1, 2, 3]) @@ -164,7 +153,6 @@ def test_out(): assert_identical(other, np.array([1, 2, 3])) -@requires_np113 def test_gufuncs(): xarray_obj = xr.DataArray([1, 2, 3]) fake_gufunc = mock.Mock(signature="(n)->()", autospec=np.sin) @@ -182,7 +170,6 @@ def test_xarray_ufuncs_deprecation(): assert len(record) == 0 -@requires_np113 @pytest.mark.filterwarnings("ignore::RuntimeWarning") @pytest.mark.parametrize( "name", diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 254983364f9..859306b88cb 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -73,9 +73,7 @@ def test_multiindex_from_product_levels(): [pd.Index(["b", "a"]), pd.Index([1, 3, 2])] ) np.testing.assert_array_equal( - # compat for pandas < 0.24 - result.codes if hasattr(result, "codes") else result.labels, - [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + result.codes, [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] ) np.testing.assert_array_equal(result.levels[0], ["b", "a"]) np.testing.assert_array_equal(result.levels[1], [1, 3, 2]) @@ -89,9 +87,7 @@ def test_multiindex_from_product_levels_non_unique(): [pd.Index(["b", "a"]), pd.Index([1, 1, 2])] ) np.testing.assert_array_equal( - # compat for pandas < 0.24 - result.codes if hasattr(result, "codes") else result.labels, - [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]], + result.codes, [[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1]] ) np.testing.assert_array_equal(result.levels[0], ["b", "a"]) np.testing.assert_array_equal(result.levels[1], [1, 2]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 7f9538c9ea9..172a23d9a76 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2,7 +2,6 @@ from collections import OrderedDict from copy import copy, deepcopy from datetime import datetime, timedelta -from distutils.version import LooseVersion from textwrap import dedent import numpy as np @@ -1837,13 +1836,6 @@ def test_getitem_fancy(self): def test_getitem_1d_fancy(self): super().test_getitem_1d_fancy() - def test_equals_all_dtypes(self): - import dask - - if "0.18.2" <= LooseVersion(dask.__version__) < "0.19.1": - pytest.xfail("https://github.com/pydata/xarray/issues/2318") - super().test_equals_all_dtypes() - def test_getitem_with_mask_nd_indexer(self): import dask.array as da