Rolling minimum dependency versions policy (#3358)

* - Downgrade numpy to 1.14, pandas to 0.20, scipy to 0.19 (24 months old) - Downgrade dask to 1.1 (6 months old) - Don't pin patch versions * Apply rolling policy (see #3222) * Automated tool to verify the minimum versions * Drop Python 3.5 * lint * Trivial cosmetic * Cosmetic * (temp) debug CI failure * Parallelize versions check script * Remove hacks for legacy dask * Documentation * Assorted cleanup * Assorted cleanup * Fix regression * Cleanup * type annotations upgraded to Python 3.6 * count_not_none backport * pd.Index.equals on legacy pandas returned False when comparing vs. a ndarray * Documentation * pathlib cleanup * Slide deprecations from 0.14 to 0.15 * More cleanups * More cleanups * Fix min_deps_check * Fix min_deps_check * Set policy of 12 months for pandas and scipy * Cleanup * Cleanup * Sphinx fix * Overhaul readthedocs environment * Fix test crash * Fix test crash * Prune readthedocs environment * Cleanup * Hack around versioneer bug on readthedocs CI * Code review * Prevent random timeouts in the readthedocs CI * What's New polish * Merge from Master * Trivial cosmetic * Reimplement pandas.core.common.count_not_none
pydata · Oct 8, 2019 · 6fb272c · 6fb272c
1 parent 3e2a754
commit 6fb272c
Show file tree

Hide file tree

Showing 70 changed files with 633 additions and 1,281 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -8,8 +8,8 @@ jobs:
 - job: Linux
   strategy:
     matrix:
-      py35-bare-minimum:
-        conda_env: py35-bare-minimum
+      py36-bare-minimum:
+        conda_env: py36-bare-minimum
       py36-min-all-deps:
         conda_env: py36-min-all-deps
       py36-min-nep18:
@@ -82,13 +82,29 @@ jobs:
       mypy .
     displayName: mypy type checks
 
+- job: MinimumVersionsPolicy
+  pool:
+    vmImage: 'ubuntu-16.04'
+  steps:
+  - template: ci/azure/add-conda-to-path.yml
+  - bash: |
+      conda install -y pyyaml
+      python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml
+      python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml
+    displayName: minimum versions policy
+
 - job: Docs
   pool:
     vmImage: 'ubuntu-16.04'
   steps:
   - template: ci/azure/install.yml
     parameters:
-      env_file: doc/environment.yml
+      env_file: ci/requirements/doc.yml
+  - bash: |
+      source activate xarray-tests
+      # Replicate the exact environment created by the readthedocs CI
+      conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme
+    displayName: Replicate readthedocs CI environment
   - bash: |
       source activate xarray-tests
       cd doc

diff --git a/ci/min_deps_check.py b/ci/min_deps_check.py
@@ -0,0 +1,187 @@
+"""Fetch from conda database all available versions of the xarray dependencies and their
+publication date. Compare it against requirements/py36-min-all-deps.yml to verify the
+policy on obsolete dependencies is being followed. Print a pretty report :)
+"""
+import subprocess
+import sys
+from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime, timedelta
+from typing import Dict, Iterator, Tuple
+
+import yaml
+
+IGNORE_DEPS = {
+    "black",
+    "coveralls",
+    "flake8",
+    "hypothesis",
+    "mypy",
+    "pip",
+    "pytest",
+    "pytest-cov",
+    "pytest-env",
+}
+
+POLICY_MONTHS = {"python": 42, "numpy": 24, "pandas": 12, "scipy": 12}
+POLICY_MONTHS_DEFAULT = 6
+
+has_errors = False
+
+
+def error(msg: str) -> None:
+    global has_errors
+    has_errors = True
+    print("ERROR:", msg)
+
+
+def parse_requirements(fname) -> Iterator[Tuple[str, int, int]]:
+    """Load requirements/py36-min-all-deps.yml
+
+    Yield (package name, major version, minor version)
+    """
+    global has_errors
+
+    with open(fname) as fh:
+        contents = yaml.safe_load(fh)
+    for row in contents["dependencies"]:
+        if isinstance(row, dict) and list(row) == ["pip"]:
+            continue
+        pkg, eq, version = row.partition("=")
+        if pkg.rstrip("<>") in IGNORE_DEPS:
+            continue
+        if pkg.endswith("<") or pkg.endswith(">") or eq != "=":
+            error("package should be pinned with exact version: " + row)
+            continue
+        try:
+            major, minor = version.split(".")
+        except ValueError:
+            error("expected major.minor (without patch): " + row)
+            continue
+        try:
+            yield pkg, int(major), int(minor)
+        except ValueError:
+            error("failed to parse version: " + row)
+
+
+def query_conda(pkg: str) -> Dict[Tuple[int, int], datetime]:
+    """Query the conda repository for a specific package
+
+    Return map of {(major version, minor version): publication date}
+    """
+    stdout = subprocess.check_output(
+        ["conda", "search", pkg, "--info", "-c", "defaults", "-c", "conda-forge"]
+    )
+    out = {}  # type: Dict[Tuple[int, int], datetime]
+    major = None
+    minor = None
+
+    for row in stdout.decode("utf-8").splitlines():
+        label, _, value = row.partition(":")
+        label = label.strip()
+        if label == "file name":
+            value = value.strip()[len(pkg) :]
+            major, minor = value.split("-")[1].split(".")[:2]
+            major = int(major)
+            minor = int(minor)
+        if label == "timestamp":
+            assert major is not None
+            assert minor is not None
+            ts = datetime.strptime(value.split()[0].strip(), "%Y-%m-%d")
+
+            if (major, minor) in out:
+                out[major, minor] = min(out[major, minor], ts)
+            else:
+                out[major, minor] = ts
+
+    # Hardcoded fix to work around incorrect dates in conda
+    if pkg == "python":
+        out.update(
+            {
+                (2, 7): datetime(2010, 6, 3),
+                (3, 5): datetime(2015, 9, 13),
+                (3, 6): datetime(2016, 12, 23),
+                (3, 7): datetime(2018, 6, 27),
+                (3, 8): datetime(2019, 10, 14),
+            }
+        )
+
+    return out
+
+
+def process_pkg(
+    pkg: str, req_major: int, req_minor: int
+) -> Tuple[str, int, int, str, int, int, str, str]:
+    """Compare package version from requirements file to available versions in conda.
+    Return row to build pandas dataframe:
+
+    - package name
+    - major version in requirements file
+    - minor version in requirements file
+    - publication date of version in requirements file (YYYY-MM-DD)
+    - major version suggested by policy
+    - minor version suggested by policy
+    - publication date of version suggested by policy (YYYY-MM-DD)
+    - status ("<", "=", "> (!)")
+    """
+    print("Analyzing %s..." % pkg)
+    versions = query_conda(pkg)
+
+    try:
+        req_published = versions[req_major, req_minor]
+    except KeyError:
+        error("not found in conda: " + pkg)
+        return pkg, req_major, req_minor, "-", 0, 0, "-", "(!)"
+
+    policy_months = POLICY_MONTHS.get(pkg, POLICY_MONTHS_DEFAULT)
+    policy_published = datetime.now() - timedelta(days=policy_months * 30)
+
+    policy_major = req_major
+    policy_minor = req_minor
+    policy_published_actual = req_published
+    for (major, minor), published in reversed(sorted(versions.items())):
+        if published < policy_published:
+            break
+        policy_major = major
+        policy_minor = minor
+        policy_published_actual = published
+
+    if (req_major, req_minor) < (policy_major, policy_minor):
+        status = "<"
+    elif (req_major, req_minor) > (policy_major, policy_minor):
+        status = "> (!)"
+        error("Package is too new: " + pkg)
+    else:
+        status = "="
+
+    return (
+        pkg,
+        req_major,
+        req_minor,
+        req_published.strftime("%Y-%m-%d"),
+        policy_major,
+        policy_minor,
+        policy_published_actual.strftime("%Y-%m-%d"),
+        status,
+    )
+
+
+def main() -> None:
+    fname = sys.argv[1]
+    with ThreadPoolExecutor(8) as ex:
+        futures = [
+            ex.submit(process_pkg, pkg, major, minor)
+            for pkg, major, minor in parse_requirements(fname)
+        ]
+        rows = [f.result() for f in futures]
+
+    print("Package       Required          Policy            Status")
+    print("------------- ----------------- ----------------- ------")
+    fmt = "{:13} {:>1d}.{:<2d} ({:10}) {:>1d}.{:<2d} ({:10}) {}"
+    for row in rows:
+        print(fmt.format(*row))
+
+    assert not has_errors
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -0,0 +1,21 @@
+name: xarray-docs
+channels:
+  # Don't change to pkgs/main, as it causes random timeouts in readthedocs
+  - conda-forge
+dependencies:
+  - python=3.7
+  - bottleneck
+  - cartopy
+  - h5netcdf
+  - ipython
+  - iris
+  - netcdf4
+  - numpy
+  - numpydoc
+  - pandas<0.25  # Hack around https://github.com/pydata/xarray/issues/3369
+  - rasterio
+  - seaborn
+  - sphinx
+  - sphinx-gallery
+  - sphinx_rtd_theme
+  - zarr
diff --git a/ci/requirements/py35-bare-minimum.yml b/ci/requirements/py35-bare-minimum.yml
diff --git a/ci/requirements/py36-bare-minimum.yml b/ci/requirements/py36-bare-minimum.yml
@@ -0,0 +1,11 @@
+name: xarray-tests
+channels:
+  - conda-forge
+dependencies:
+  - python=3.6
+  - coveralls
+  - pytest
+  - pytest-cov
+  - pytest-env
+  - numpy=1.14
+  - pandas=0.24
diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
@@ -2,42 +2,47 @@ name: xarray-tests
 channels:
   - conda-forge
 dependencies:
-  - python=3.6.7
+  # MINIMUM VERSIONS POLICY: see doc/installing.rst
+  # Run ci/min_deps_check.py to verify that this file respects the policy.
+  # When upgrading python, numpy, or pandas, must also change
+  # doc/installing.rst and setup.py.
+  - python=3.6
   - black
-  - boto3=1.9.235
-  - bottleneck=1.2.1
-  - cdms2=3.1.3
-  - cfgrib=0.9.7.2
-  - cftime=1.0.3.4
+  - boto3=1.9
+  - bottleneck=1.2
+  - cartopy=0.17
+  - cdms2=3.1
+  - cfgrib=0.9
+  - cftime=1.0
   - coveralls
-  - dask=2.4.0
-  - distributed=2.4.0
+  - dask=1.2
+  - distributed=1.27
   - flake8
-  - h5netcdf=0.7.4
-  - h5py=2.10.0
-  - hdf5=1.10.5
+  - h5netcdf=0.7
+  - h5py=2.9  # Policy allows for 2.10, but it's a conflict-fest
+  - hdf5=1.10
   - hypothesis
-  - iris=2.2.0
-  - lxml=4.4.1  # optional dep of pydap
-  - matplotlib=3.1.1
-  - mypy==0.730  # Must match .pre-commit-config.yaml
-  - nc-time-axis=1.2.0
-  - netcdf4=1.5.1.2
-  - numba=0.45.1
-  - numpy=1.17.2
-  - pandas=0.25.1
+  - iris=2.2
+  - lxml=4.4  # Optional dep of pydap
+  - matplotlib=3.1
+  - mypy=0.730  # Must match .pre-commit-config.yaml
+  - nc-time-axis=1.2
+  - netcdf4=1.4
+  - numba=0.44
+  - numpy=1.14
+  - pandas=0.24
   - pip
-  - pseudonetcdf=3.0.2
-  - pydap=3.2.2
-  - pynio=1.5.5
+  - pseudonetcdf=3.0
+  - pydap=3.2
+  - pynio=1.5
   - pytest
   - pytest-cov
   - pytest-env
-  - rasterio=1.0.28
-  - scipy=1.3.1
-  - seaborn=0.9.0
+  - rasterio=1.0
+  - scipy=1.0  # Policy allows for 1.2, but scipy>=1.1 breaks numpy=1.14
+  - seaborn=0.9
   # - sparse  # See py36-min-nep18.yml
-  - toolz=0.10.0
-  - zarr=2.3.2
+  - toolz=0.10
+  - zarr=2.3
   - pip:
     - numbagg==0.1
diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
@@ -4,14 +4,14 @@ channels:
 dependencies:
   # Optional dependencies that require NEP18, such as sparse,
   # require drastically newer packages than everything else
-  - python=3.6.7
+  - python=3.6
   - coveralls
-  - dask=2.4.0
-  - distributed=2.4.0
+  - dask=2.4
+  - distributed=2.4
   - numpy=1.17
-  - pandas=0.25
+  - pandas=0.24
   - pytest
   - pytest-cov
   - pytest-env
-  - scipy=1.3
-  - sparse=0.8.0
+  - scipy=1.2
+  - sparse=0.8