Merge branch 'main' into open-groups-zarr

pydata · Sep 9, 2024 · 7a6fd8c · 7a6fd8c
2 parents 746977a + cea354f
commit 7a6fd8c
Show file tree

Hide file tree

Showing 71 changed files with 1,166 additions and 602 deletions.
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -34,7 +34,7 @@ jobs:
           # add "build" because of https://github.com/airspeed-velocity/asv/issues/1385
           create-args: >-
             asv
-            build
+            python-build
             mamba
 
 

diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml
@@ -123,11 +123,11 @@ jobs:
           python xarray/util/print_versions.py
       - name: Install mypy
         run: |
-          python -m pip install "mypy<1.9" --force-reinstall
+          python -m pip install "mypy" --force-reinstall
 
       - name: Run mypy
         run: |
-          python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/
+          python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
         uses: codecov/[email protected]
@@ -138,7 +138,7 @@ jobs:
           name: codecov-umbrella
           fail_ci_if_error: false
 
-  mypy39:
+  mypy-min:
     name: Mypy 3.10
     runs-on: "ubuntu-latest"
     needs: detect-ci-trigger
@@ -177,32 +177,30 @@ jobs:
           python xarray/util/print_versions.py
       - name: Install mypy
         run: |
-          python -m pip install "mypy<1.9" --force-reinstall
+          python -m pip install "mypy" --force-reinstall
 
       - name: Run mypy
         run: |
-          python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report xarray/
+          python -m mypy --install-types --non-interactive --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
         uses: codecov/[email protected]
         with:
           file: mypy_report/cobertura.xml
-          flags: mypy39
+          flags: mypy-min
           env_vars: PYTHON_VERSION
           name: codecov-umbrella
           fail_ci_if_error: false
 
-
-
   pyright:
     name: Pyright
     runs-on: "ubuntu-latest"
     needs: detect-ci-trigger
     if: |
-        always()
-        && (
-            contains( github.event.pull_request.labels.*.name, 'run-pyright')
-        )
+      always()
+      && (
+          contains( github.event.pull_request.labels.*.name, 'run-pyright')
+      )
     defaults:
       run:
         shell: bash -l {0}
@@ -258,10 +256,10 @@ jobs:
     runs-on: "ubuntu-latest"
     needs: detect-ci-trigger
     if: |
-        always()
-        && (
-            contains( github.event.pull_request.labels.*.name, 'run-pyright')
-        )
+      always()
+      && (
+          contains( github.event.pull_request.labels.*.name, 'run-pyright')
+      )
     defaults:
       run:
         shell: bash -l {0}
@@ -312,8 +310,6 @@ jobs:
           name: codecov-umbrella
           fail_ci_if_error: false
 
-
-
   min-version-policy:
     name: Minimum Version Policy
     runs-on: "ubuntu-latest"

diff --git a/.github/workflows/pypi-release.yaml b/.github/workflows/pypi-release.yaml
@@ -88,7 +88,7 @@ jobs:
           path: dist
       - name: Publish package to TestPyPI
         if: github.event_name == 'push'
-        uses: pypa/gh-action-pypi-publish@v1.9.0
+        uses: pypa/gh-action-pypi-publish@v1.10.0
         with:
           repository_url: https://test.pypi.org/legacy/
           verbose: true
@@ -111,6 +111,6 @@ jobs:
           name: releases
           path: dist
       - name: Publish package to PyPI
-        uses: pypa/gh-action-pypi-publish@v1.9.0
+        uses: pypa/gh-action-pypi-publish@v1.10.0
         with:
           verbose: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -13,24 +13,24 @@ repos:
       - id: mixed-line-ending
   - repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: 'v0.5.0'
+    rev: 'v0.6.3'
     hooks:
       - id: ruff
         args: ["--fix", "--show-fixes"]
   # https://github.com/python/black#version-control-integration
   - repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 24.4.2
+    rev: 24.8.0
     hooks:
       - id: black-jupyter
   - repo: https://github.com/keewis/blackdoc
     rev: v0.3.9
     hooks:
       - id: blackdoc
         exclude: "generate_aggregations.py"
-        additional_dependencies: ["black==24.4.2"]
+        additional_dependencies: ["black==24.8.0"]
       - id: blackdoc-autoupdate-black
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.1
+    rev: v1.11.2
     hooks:
       - id: mypy
         # Copied from setup.cfg
@@ -41,7 +41,7 @@ repos:
         additional_dependencies: [
             # Type stubs
             types-python-dateutil,
-            types-pkg_resources,
+            types-setuptools,
             types-PyYAML,
             types-pytz,
             typing-extensions>=4.1.0,

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -1,9 +1,9 @@
 version: 2
 
 build:
-  os: ubuntu-22.04
+  os: ubuntu-lts-latest
   tools:
-    python: mambaforge-4.10
+    python: mambaforge-latest
   jobs:
     post_checkout:
       - (git --no-pager log --pretty="tformat:%s" -1 | grep -vqF "[skip-rtd]") || exit 183

diff --git a/asv_bench/benchmarks/datatree.py b/asv_bench/benchmarks/datatree.py
@@ -0,0 +1,15 @@
+import xarray as xr
+from xarray.core.datatree import DataTree
+
+
+class Datatree:
+    def setup(self):
+        run1 = DataTree.from_dict({"run1": xr.Dataset({"a": 1})})
+        self.d_few = {"run1": run1}
+        self.d_many = {f"run{i}": run1.copy() for i in range(100)}
+
+    def time_from_dict_few(self):
+        DataTree.from_dict(self.d_few)
+
+    def time_from_dict_many(self):
+        DataTree.from_dict(self.d_many)
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -1,4 +1,5 @@
 # import flox to avoid the cost of first import
+import cftime
 import flox.xarray  # noqa
 import numpy as np
 import pandas as pd
@@ -96,7 +97,7 @@ def setup(self, *args, **kwargs):
 
         requires_dask()
         super().setup(**kwargs)
-        self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
+        self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dask_dataframe()
         self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
 
     def time_binary_op_2d(self):
@@ -169,7 +170,21 @@ class GroupByLongTime:
     def setup(self, use_cftime, use_flox):
         arr = np.random.randn(10, 10, 365 * 30)
         time = xr.date_range("2000", periods=30 * 365, use_cftime=use_cftime)
-        self.da = xr.DataArray(arr, dims=("y", "x", "time"), coords={"time": time})
+
+        # GH9426 - deep-copying CFTime object arrays is weirdly slow
+        asda = xr.DataArray(time)
+        labeled_time = []
+        for year, month in zip(asda.dt.year, asda.dt.month):
+            labeled_time.append(cftime.datetime(year, month, 1))
+
+        self.da = xr.DataArray(
+            arr,
+            dims=("y", "x", "time"),
+            coords={"time": time, "time2": ("time", labeled_time)},
+        )
+
+    def time_setup(self, use_cftime, use_flox):
+        self.da.groupby("time.month")
 
     def time_mean(self, use_cftime, use_flox):
         with xr.set_options(use_flox=use_flox):

diff --git a/ci/requirements/bare-minimum.yml b/ci/requirements/bare-minimum.yml
@@ -11,6 +11,6 @@ dependencies:
   - pytest-env
   - pytest-xdist
   - pytest-timeout
-  - numpy=1.23
+  - numpy=1.24
   - packaging=23.1
-  - pandas=2.0
+  - pandas=2.1
diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml
@@ -4,7 +4,7 @@ channels:
   - conda-forge
   - nodefaults
 dependencies:
-  - python=3.10
+  - python=3.12
   - bottleneck
   - cartopy
   - cfgrib
@@ -40,6 +40,7 @@ dependencies:
   - sphinx-design
   - sphinx-inline-tabs
   - sphinx>=5.0
+  - sphinx-remove-toctrees
   - sphinxext-opengraph
   - sphinxext-rediraffe
   - zarr>=2.10

diff --git a/ci/requirements/min-all-deps.yml b/ci/requirements/min-all-deps.yml
@@ -9,37 +9,37 @@ dependencies:
   # doc/user-guide/installing.rst, doc/user-guide/plotting.rst and setup.py.
   - python=3.10
   - array-api-strict=1.0  # dependency for testing the array api compat
-  - boto3=1.26
+  - boto3=1.28
   - bottleneck=1.3
-  - cartopy=0.21
+  - cartopy=0.22
   - cftime=1.6
   - coveralls
-  - dask-core=2023.4
-  - distributed=2023.4
+  - dask-core=2023.9
+  - distributed=2023.9
   # Flox > 0.8 has a bug with numbagg versions
   # It will require numbagg > 0.6
   # so we should just skip that series eventually
   # or keep flox pinned for longer than necessary
   - flox=0.7
-  - h5netcdf=1.1
+  - h5netcdf=1.2
   # h5py and hdf5 tend to cause conflicts
   # for e.g. hdf5 1.12 conflicts with h5py=3.1
   # prioritize bumping other packages instead
   - h5py=3.8
   - hdf5=1.12
   - hypothesis
-  - iris=3.4
+  - iris=3.7
   - lxml=4.9  # Optional dep of pydap
   - matplotlib-base=3.7
   - nc-time-axis=1.4
   # netcdf follows a 1.major.minor[.patch] convention
   # (see https://github.com/Unidata/netcdf4-python/issues/1090)
   - netcdf4=1.6.0
-  - numba=0.56
+  - numba=0.57
   - numbagg=0.2.1
-  - numpy=1.23
+  - numpy=1.24
   - packaging=23.1
-  - pandas=2.0
+  - pandas=2.1
   - pint=0.22
   - pip
   - pydap=3.4
@@ -49,9 +49,9 @@ dependencies:
   - pytest-xdist
   - pytest-timeout
   - rasterio=1.3
-  - scipy=1.10
+  - scipy=1.11
   - seaborn=0.12
   - sparse=0.14
   - toolz=0.12
-  - typing_extensions=4.5
-  - zarr=2.14
+  - typing_extensions=4.7
+  - zarr=2.16
diff --git a/design_notes/flexible_indexes_notes.md b/design_notes/flexible_indexes_notes.md
@@ -71,7 +71,7 @@ An `XarrayIndex` subclass must/should/may implement the following properties/met
 - a `data` property to access index's data and map it to coordinate data (see [Section 4](#4-indexvariable))
 - a `__getitem__()` implementation to propagate the index through DataArray/Dataset indexing operations
 - `equals()`, `union()` and `intersection()` methods for data alignment (see [Section 2.6](#26-using-indexes-for-data-alignment))
-- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coodinates))
+- Xarray coordinate getters (see [Section 2.2.4](#224-implicit-coordinates))
 - a method that may return a new index and that will be called when one of the corresponding coordinates is dropped from the Dataset/DataArray (multi-coordinate indexes)
 - `encode()`/`decode()` methods that would allow storage-agnostic serialization and fast-path reconstruction of the underlying index object(s) (see [Section 2.8](#28-index-encoding))
 - one or more "non-standard" methods or properties that could be leveraged in Xarray 3rd-party extensions like Dataset/DataArray accessors (see [Section 2.7](#27-using-indexes-for-other-purposes))

diff --git a/design_notes/grouper_objects.md b/design_notes/grouper_objects.md
@@ -166,7 +166,7 @@ where `|` represents chunk boundaries. A simple rechunking to
 ```
 000|111122|3333
 ```
-would make this resampling reduction an embarassingly parallel blockwise problem.
+would make this resampling reduction an embarrassingly parallel blockwise problem.
 
 Similarly consider monthly-mean climatologies for which the month numbers might be
 ```

diff --git a/design_notes/named_array_design_doc.md b/design_notes/named_array_design_doc.md
@@ -258,7 +258,7 @@ Questions:
    Variable.coarsen_reshape
    Variable.rolling_window
 
-   Variable.set_dims # split this into broadcas_to and expand_dims
+   Variable.set_dims # split this into broadcast_to and expand_dims
 
 
 # Reordering/Reshaping

diff --git a/doc/conf.py b/doc/conf.py
@@ -88,6 +88,7 @@
     "sphinxext.rediraffe",
     "sphinx_design",
     "sphinx_inline_tabs",
+    "sphinx_remove_toctrees",
 ]
 
 
@@ -198,6 +199,8 @@
 # The master toctree document.
 master_doc = "index"
 
+remove_from_toctrees = ["generated/*"]
+
 # General information about the project.
 project = "xarray"
 copyright = f"2014-{datetime.datetime.now().year}, xarray Developers"
@@ -244,6 +247,7 @@
     repository_url="https://github.com/pydata/xarray",
     repository_branch="main",
     navigation_with_keys=False,  # pydata/pydata-sphinx-theme#1492
+    navigation_depth=4,
     path_to_docs="doc",
     use_edit_page_button=True,
     use_repository_button=True,

diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst
@@ -298,7 +298,7 @@ Automatic parallelization with ``apply_ufunc`` and ``map_blocks``
 
 .. tip::
 
-   Some problems can become embarassingly parallel and thus easy to parallelize
+   Some problems can become embarrassingly parallel and thus easy to parallelize
    automatically by rechunking to a frequency, e.g. ``ds.chunk(time=TimeResampler("YE"))``.
    See :py:meth:`Dataset.chunk` for more.
 
@@ -559,7 +559,7 @@ larger chunksizes.
 
 .. tip::
 
-   Many time domain problems become amenable to an embarassingly parallel or blockwise solution
+   Many time domain problems become amenable to an embarrassingly parallel or blockwise solution
    (e.g. using :py:func:`xarray.map_blocks`, :py:func:`dask.array.map_blocks`, or
    :py:func:`dask.array.blockwise`) by rechunking to a frequency along the time dimension.
    Provide :py:class:`xarray.groupers.TimeResampler` objects to :py:meth:`Dataset.chunk` to do so.

diff --git a/doc/user-guide/data-structures.rst b/doc/user-guide/data-structures.rst
@@ -289,7 +289,7 @@ pressure that were made under various conditions:
 * the measurements were made on four different days;
 * they were made at two separate locations, which we will represent using
   their latitude and longitude; and
-* they were made using instruments by three different manufacutrers, which we
+* they were made using instruments by three different manufacturers, which we
   will refer to as `'manufac1'`, `'manufac2'`, and `'manufac3'`.
 
 .. ipython:: python

diff --git a/doc/user-guide/groupby.rst b/doc/user-guide/groupby.rst
@@ -305,6 +305,12 @@ Use grouper objects to group by multiple dimensions:
 
     from xarray.groupers import UniqueGrouper
 
+    da.groupby(["lat", "lon"]).sum()
+
+The above is sugar for using ``UniqueGrouper`` objects directly:
+
+.. ipython:: python
+
     da.groupby(lat=UniqueGrouper(), lon=UniqueGrouper()).sum()