Skip to content

Commit

Permalink
Rolling minimum dependency versions policy (#3358)
Browse files Browse the repository at this point in the history
* - Downgrade numpy to 1.14, pandas to 0.20, scipy to 0.19 (24 months old)
- Downgrade dask to 1.1 (6 months old)
- Don't pin patch versions

* Apply rolling policy (see #3222)

* Automated tool to verify the minimum versions

* Drop Python 3.5

* lint

* Trivial cosmetic

* Cosmetic

* (temp) debug CI failure

* Parallelize versions check script

* Remove hacks for legacy dask

* Documentation

* Assorted cleanup

* Assorted cleanup

* Fix regression

* Cleanup

* type annotations upgraded to Python 3.6

* count_not_none backport

* pd.Index.equals on legacy pandas returned False when comparing vs. a ndarray

* Documentation

* pathlib cleanup

* Slide deprecations from 0.14 to 0.15

* More cleanups

* More cleanups

* Fix min_deps_check

* Fix min_deps_check

* Set policy of 12 months for pandas and scipy

* Cleanup

* Cleanup

* Sphinx fix

* Overhaul readthedocs environment

* Fix test crash

* Fix test crash

* Prune readthedocs environment

* Cleanup

* Hack around versioneer bug on readthedocs CI

* Code review

* Prevent random timeouts in the readthedocs CI

* What's New polish

* Merge from Master

* Trivial cosmetic

* Reimplement pandas.core.common.count_not_none
  • Loading branch information
crusaderky authored and Joe Hamman committed Oct 8, 2019
1 parent 3e2a754 commit 6fb272c
Show file tree
Hide file tree
Showing 70 changed files with 633 additions and 1,281 deletions.
22 changes: 19 additions & 3 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ jobs:
- job: Linux
strategy:
matrix:
py35-bare-minimum:
conda_env: py35-bare-minimum
py36-bare-minimum:
conda_env: py36-bare-minimum
py36-min-all-deps:
conda_env: py36-min-all-deps
py36-min-nep18:
Expand Down Expand Up @@ -82,13 +82,29 @@ jobs:
mypy .
displayName: mypy type checks
- job: MinimumVersionsPolicy
pool:
vmImage: 'ubuntu-16.04'
steps:
- template: ci/azure/add-conda-to-path.yml
- bash: |
conda install -y pyyaml
python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml
python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml
displayName: minimum versions policy
- job: Docs
pool:
vmImage: 'ubuntu-16.04'
steps:
- template: ci/azure/install.yml
parameters:
env_file: doc/environment.yml
env_file: ci/requirements/doc.yml
- bash: |
source activate xarray-tests
# Replicate the exact environment created by the readthedocs CI
conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme
displayName: Replicate readthedocs CI environment
- bash: |
source activate xarray-tests
cd doc
Expand Down
187 changes: 187 additions & 0 deletions ci/min_deps_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""Fetch from conda database all available versions of the xarray dependencies and their
publication date. Compare it against requirements/py36-min-all-deps.yml to verify the
policy on obsolete dependencies is being followed. Print a pretty report :)
"""
import subprocess
import sys
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timedelta
from typing import Dict, Iterator, Tuple

import yaml

IGNORE_DEPS = {
"black",
"coveralls",
"flake8",
"hypothesis",
"mypy",
"pip",
"pytest",
"pytest-cov",
"pytest-env",
}

POLICY_MONTHS = {"python": 42, "numpy": 24, "pandas": 12, "scipy": 12}
POLICY_MONTHS_DEFAULT = 6

has_errors = False


def error(msg: str) -> None:
global has_errors
has_errors = True
print("ERROR:", msg)


def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
"""Load requirements/py36-min-all-deps.yml
Yield (package name, major version, minor version)
"""
global has_errors

with open(fname) as fh:
contents = yaml.safe_load(fh)
for row in contents["dependencies"]:
if isinstance(row, dict) and list(row) == ["pip"]:
continue
pkg, eq, version = row.partition("=")
if pkg.rstrip("<>") in IGNORE_DEPS:
continue
if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
error("package should be pinned with exact version: " + row)
continue
try:
major, minor = version.split(".")
except ValueError:
error("expected major.minor (without patch): " + row)
continue
try:
yield pkg, int(major), int(minor)
except ValueError:
error("failed to parse version: " + row)


def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
"""Query the conda repository for a specific package
Return map of {(major version, minor version): publication date}
"""
stdout = subprocess.check_output(
["conda", "search", pkg, "--info", "-c", "defaults", "-c", "conda-forge"]
)
out = {} # type: Dict[Tuple[int, int], datetime]
major = None
minor = None

for row in stdout.decode("utf-8").splitlines():
label, _, value = row.partition(":")
label = label.strip()
if label == "file name":
value = value.strip()[len(pkg) :]
major, minor = value.split("-")[1].split(".")[:2]
major = int(major)
minor = int(minor)
if label == "timestamp":
assert major is not None
assert minor is not None
ts = datetime.strptime(value.split()[0].strip(), "%Y-%m-%d")

if (major, minor) in out:
out[major, minor] = min(out[major, minor], ts)
else:
out[major, minor] = ts

# Hardcoded fix to work around incorrect dates in conda
if pkg == "python":
out.update(
{
(2, 7): datetime(2010, 6, 3),
(3, 5): datetime(2015, 9, 13),
(3, 6): datetime(2016, 12, 23),
(3, 7): datetime(2018, 6, 27),
(3, 8): datetime(2019, 10, 14),
}
)

return out


def process_pkg(
pkg: str, req_major: int, req_minor: int
) -> Tuple[str, int, int, str, int, int, str, str]:
"""Compare package version from requirements file to available versions in conda.
Return row to build pandas dataframe:
- package name
- major version in requirements file
- minor version in requirements file
- publication date of version in requirements file (YYYY-MM-DD)
- major version suggested by policy
- minor version suggested by policy
- publication date of version suggested by policy (YYYY-MM-DD)
- status ("<", "=", "> (!)")
"""
print("Analyzing %s..." % pkg)
versions = query_conda(pkg)

try:
req_published = versions[req_major, req_minor]
except KeyError:
error("not found in conda: " + pkg)
return pkg, req_major, req_minor, "-", 0, 0, "-", "(!)"

policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
policy_published = datetime.now() - timedelta(days=policy_months * 30)

policy_major = req_major
policy_minor = req_minor
policy_published_actual = req_published
for (major, minor), published in reversed(sorted(versions.items())):
if published < policy_published:
break
policy_major = major
policy_minor = minor
policy_published_actual = published

if (req_major, req_minor) < (policy_major, policy_minor):
status = "<"
elif (req_major, req_minor) > (policy_major, policy_minor):
status = "> (!)"
error("Package is too new: " + pkg)
else:
status = "="

return (
pkg,
req_major,
req_minor,
req_published.strftime("%Y-%m-%d"),
policy_major,
policy_minor,
policy_published_actual.strftime("%Y-%m-%d"),
status,
)


def main() -> None:
fname = sys.argv[1]
with ThreadPoolExecutor(8) as ex:
futures = [
ex.submit(process_pkg, pkg, major, minor)
for pkg, major, minor in parse_requirements(fname)
]
rows = [f.result() for f in futures]

print("Package Required Policy Status")
print("------------- ----------------- ----------------- ------")
fmt = "{:13} {:>1d}.{:<2d} ({:10}) {:>1d}.{:<2d} ({:10}) {}"
for row in rows:
print(fmt.format(*row))

assert not has_errors


if __name__ == "__main__":
main()
21 changes: 21 additions & 0 deletions ci/requirements/doc.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: xarray-docs
channels:
# Don't change to pkgs/main, as it causes random timeouts in readthedocs
- conda-forge
dependencies:
- python=3.7
- bottleneck
- cartopy
- h5netcdf
- ipython
- iris
- netcdf4
- numpy
- numpydoc
- pandas<0.25 # Hack around https://github.com/pydata/xarray/issues/3369
- rasterio
- seaborn
- sphinx
- sphinx-gallery
- sphinx_rtd_theme
- zarr
15 changes: 0 additions & 15 deletions ci/requirements/py35-bare-minimum.yml

This file was deleted.

11 changes: 11 additions & 0 deletions ci/requirements/py36-bare-minimum.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: xarray-tests
channels:
- conda-forge
dependencies:
- python=3.6
- coveralls
- pytest
- pytest-cov
- pytest-env
- numpy=1.14
- pandas=0.24
61 changes: 33 additions & 28 deletions ci/requirements/py36-min-all-deps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,47 @@ name: xarray-tests
channels:
- conda-forge
dependencies:
- python=3.6.7
# MINIMUM VERSIONS POLICY: see doc/installing.rst
# Run ci/min_deps_check.py to verify that this file respects the policy.
# When upgrading python, numpy, or pandas, must also change
# doc/installing.rst and setup.py.
- python=3.6
- black
- boto3=1.9.235
- bottleneck=1.2.1
- cdms2=3.1.3
- cfgrib=0.9.7.2
- cftime=1.0.3.4
- boto3=1.9
- bottleneck=1.2
- cartopy=0.17
- cdms2=3.1
- cfgrib=0.9
- cftime=1.0
- coveralls
- dask=2.4.0
- distributed=2.4.0
- dask=1.2
- distributed=1.27
- flake8
- h5netcdf=0.7.4
- h5py=2.10.0
- hdf5=1.10.5
- h5netcdf=0.7
- h5py=2.9 # Policy allows for 2.10, but it's a conflict-fest
- hdf5=1.10
- hypothesis
- iris=2.2.0
- lxml=4.4.1 # optional dep of pydap
- matplotlib=3.1.1
- mypy==0.730 # Must match .pre-commit-config.yaml
- nc-time-axis=1.2.0
- netcdf4=1.5.1.2
- numba=0.45.1
- numpy=1.17.2
- pandas=0.25.1
- iris=2.2
- lxml=4.4 # Optional dep of pydap
- matplotlib=3.1
- mypy=0.730 # Must match .pre-commit-config.yaml
- nc-time-axis=1.2
- netcdf4=1.4
- numba=0.44
- numpy=1.14
- pandas=0.24
- pip
- pseudonetcdf=3.0.2
- pydap=3.2.2
- pynio=1.5.5
- pseudonetcdf=3.0
- pydap=3.2
- pynio=1.5
- pytest
- pytest-cov
- pytest-env
- rasterio=1.0.28
- scipy=1.3.1
- seaborn=0.9.0
- rasterio=1.0
- scipy=1.0 # Policy allows for 1.2, but scipy>=1.1 breaks numpy=1.14
- seaborn=0.9
# - sparse # See py36-min-nep18.yml
- toolz=0.10.0
- zarr=2.3.2
- toolz=0.10
- zarr=2.3
- pip:
- numbagg==0.1
12 changes: 6 additions & 6 deletions ci/requirements/py36-min-nep18.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ channels:
dependencies:
# Optional dependencies that require NEP18, such as sparse,
# require drastically newer packages than everything else
- python=3.6.7
- python=3.6
- coveralls
- dask=2.4.0
- distributed=2.4.0
- dask=2.4
- distributed=2.4
- numpy=1.17
- pandas=0.25
- pandas=0.24
- pytest
- pytest-cov
- pytest-env
- scipy=1.3
- sparse=0.8.0
- scipy=1.2
- sparse=0.8
Loading

0 comments on commit 6fb272c

Please sign in to comment.