From ea9b39d7653ebb35c01c2d7860600c91d07c37aa Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Thu, 6 Jun 2024 18:12:41 +0000
Subject: [PATCH 01/15] Update min python version, allow python 3.12

---
 .github/workflows/py-dependency-check.yml     |  4 +-
 .github/workflows/py-unittests.yml            |  4 +-
 api/python/cellxgene_census/pyproject.toml    | 11 +++--
 .../tests/experimental/ml/test_pytorch.py     | 40 ++++++++-----------
 api/python/notebooks/README.md                |  2 +-
 docs/cellxgene_census_docsite_installation.md |  2 +-
 6 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/.github/workflows/py-dependency-check.yml b/.github/workflows/py-dependency-check.yml
index daa85c09a..d992f36e1 100644
--- a/.github/workflows/py-dependency-check.yml
+++ b/.github/workflows/py-dependency-check.yml
@@ -22,10 +22,10 @@ jobs:
       fail-fast: false  # don't fail-fast, as errors are often specific to a single cell in the matrix
       matrix:
         os: [single-cell-8c64g-runner, macos-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
         exclude:
           - os: macos-latest
-            python-version: "3.8"
+            python-version: "3.10"
 
     runs-on: ${{matrix.os}}
 
diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index 115083909..a11c6d24c 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -18,10 +18,10 @@ jobs:
       fail-fast: false  # Don't stop the workflow if one of the jobs fails
       matrix:
         os: [single-cell-8c64g-runner, macos-latest]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.10", "3.11", "3.12"]
         exclude:
           - os: macos-latest
-            python-version: "3.8"
+            python-version: "3.10"
 
     runs-on: ${{matrix.os}}
 
diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml
index 3fa7a9c92..8c9fa9026 100644
--- a/api/python/cellxgene_census/pyproject.toml
+++ b/api/python/cellxgene_census/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
 ]
 license = { text = "MIT" }
 readme = "README.md"
-requires-python = ">= 3.8, < 3.12"
+requires-python = ">= 3.10, < 3.13"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
@@ -22,10 +22,9 @@ classifiers = [
     "Topic :: Scientific/Engineering :: Bio-Informatics",
     "Operating System :: POSIX :: Linux",
     "Operating System :: MacOS :: MacOS X",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
 ]
 dependencies= [
     # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to
@@ -33,7 +32,7 @@ dependencies= [
     # Make sure this version does not fall behind the builder's tiledbsoma version.
     "tiledbsoma==1.11.4",
     "anndata",
-    "numpy>=1.21,<2.0",
+    "numpy>=1.23,<2.0",
     "requests",
     "typing_extensions",
     "s3fs>=2021.06.1",
@@ -43,7 +42,7 @@ dependencies= [
 experimental = [
     "torch~=2.2.0",
     "torchdata~=0.7",
-    "scikit-learn~=1.0",
+    "scikit-learn>=1.2",
     "scikit-misc>=0.2",  # scikit-misc 0.3 dropped Python 3.8 support
     "psutil~=5.0",
     "datasets~=2.0",
@@ -78,7 +77,7 @@ root = "../../.."
 [tool.ruff]
 line-length = 120
 src = ["api/python/cellxgene_census/src"]
-target-version = "py38"
+target-version = "py310"
 
 [tool.ruff.lint]
 select = [
diff --git a/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py b/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py
index b3ac43fc4..396973a9e 100644
--- a/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py
+++ b/api/python/cellxgene_census/tests/experimental/ml/test_pytorch.py
@@ -1,6 +1,5 @@
 import pathlib
-import sys
-from typing import Callable, List, Optional, Sequence, Union
+from collections.abc import Callable, Sequence
 from unittest.mock import patch
 
 import numpy as np
@@ -49,17 +48,17 @@ def pytorch_seq_x_value_gen(obs_range: range, var_range: range) -> spmatrix:
 
 
 @pytest.fixture
-def X_layer_names() -> List[str]:
+def X_layer_names() -> list[str]:
     return ["raw"]
 
 
 @pytest.fixture
-def obsp_layer_names() -> Optional[List[str]]:
+def obsp_layer_names() -> list[str] | None:
     return None
 
 
 @pytest.fixture
-def varp_layer_names() -> Optional[List[str]]:
+def varp_layer_names() -> list[str] | None:
     return None
 
 
@@ -99,8 +98,8 @@ def add_sparse_array(
 @pytest.fixture(scope="function")
 def soma_experiment(
     tmp_path: pathlib.Path,
-    obs_range: Union[int, range],
-    var_range: Union[int, range],
+    obs_range: int | range,
+    var_range: int | range,
     X_value_gen: Callable[[range, range], sparse.spmatrix],
     obsp_layer_names: Sequence[str],
     varp_layer_names: Sequence[str],
@@ -363,10 +362,6 @@ def test_encoders(soma_experiment: Experiment) -> None:
 
 
 @pytest.mark.experimental
-@pytest.mark.skipif(
-    (sys.version_info.major, sys.version_info.minor) == (3, 9),
-    reason="fails intermittently with OOM error for 3.9",
-)
 # noinspection PyTestParametrized
 @pytest.mark.parametrize("obs_range,var_range,X_value_gen", [(6, 3, pytorch_x_value_gen)])
 def test_multiprocessing__returns_full_result(soma_experiment: Experiment) -> None:
@@ -398,11 +393,11 @@ def test_distributed__returns_data_partition_for_rank(
     """Tests pytorch._partition_obs_joinids() behavior in a simulated PyTorch distributed processing mode,
     using mocks to avoid having to do real PyTorch distributed setup."""
 
-    with patch("cellxgene_census.experimental.ml.pytorch.dist.is_initialized") as mock_dist_is_initialized, patch(
-        "cellxgene_census.experimental.ml.pytorch.dist.get_rank"
-    ) as mock_dist_get_rank, patch(
-        "cellxgene_census.experimental.ml.pytorch.dist.get_world_size"
-    ) as mock_dist_get_world_size:
+    with (
+        patch("cellxgene_census.experimental.ml.pytorch.dist.is_initialized") as mock_dist_is_initialized,
+        patch("cellxgene_census.experimental.ml.pytorch.dist.get_rank") as mock_dist_get_rank,
+        patch("cellxgene_census.experimental.ml.pytorch.dist.get_world_size") as mock_dist_get_world_size,
+    ):
         mock_dist_is_initialized.return_value = True
         mock_dist_get_rank.return_value = 1
         mock_dist_get_world_size.return_value = 3
@@ -433,13 +428,12 @@ def test_distributed_and_multiprocessing__returns_data_partition_for_rank(
     DataLoader multiprocessing mode, using mocks to avoid having to do distributed pytorch
     setup or real DataLoader multiprocessing."""
 
-    with patch("torch.utils.data.get_worker_info") as mock_get_worker_info, patch(
-        "cellxgene_census.experimental.ml.pytorch.dist.is_initialized"
-    ) as mock_dist_is_initialized, patch(
-        "cellxgene_census.experimental.ml.pytorch.dist.get_rank"
-    ) as mock_dist_get_rank, patch(
-        "cellxgene_census.experimental.ml.pytorch.dist.get_world_size"
-    ) as mock_dist_get_world_size:
+    with (
+        patch("torch.utils.data.get_worker_info") as mock_get_worker_info,
+        patch("cellxgene_census.experimental.ml.pytorch.dist.is_initialized") as mock_dist_is_initialized,
+        patch("cellxgene_census.experimental.ml.pytorch.dist.get_rank") as mock_dist_get_rank,
+        patch("cellxgene_census.experimental.ml.pytorch.dist.get_world_size") as mock_dist_get_world_size,
+    ):
         mock_get_worker_info.return_value = WorkerInfo(id=1, num_workers=2, seed=1234)
         mock_dist_is_initialized.return_value = True
         mock_dist_get_rank.return_value = 1
diff --git a/api/python/notebooks/README.md b/api/python/notebooks/README.md
index 2b1c683ef..cdf89656d 100644
--- a/api/python/notebooks/README.md
+++ b/api/python/notebooks/README.md
@@ -9,7 +9,7 @@ Demonstration notebooks for the CZ CELLxGENE Discover Census. There are two kind
 
 You must be on a Linux or MacOS system, with the following installed:
 
-* Python 3.8 to 3.11
+* Python 3.10 to 3.12
 * Jupyter or some other means of running notebooks (e.g., vscode)
 
 For now, it is recommended that you do all this on a host with sufficient memory,
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index 4654eb37a..0cfbd969b 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -4,7 +4,7 @@
 
 The Census API requires a Linux or MacOS system with:
 
-- Python 3.8 to Python 3.11. Or R, supported versions TBD.
+- Python 3.10 to Python 3.12. Or R, supported versions TBD.
 - Recommended: >16 GB of memory.
 - Recommended: >5 Mbps internet connection.
 - Recommended: for increased performance use the API through a AWS-EC2 instance from the region `us-west-2`. The Census data builds are hosted in a AWS-S3 bucket in that region.

From aa014d2e585e937e118a731cf19960df33234250 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Thu, 6 Jun 2024 20:43:26 +0000
Subject: [PATCH 02/15] try forcing pyarrow 14

---
 .github/workflows/py-unittests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index a11c6d24c..139a65dac 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -41,6 +41,7 @@ jobs:
           pip install --use-pep517 accumulation-tree # Geneformer dependency needs --use-pep517 for Cython
           GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ./api/python/cellxgene_census/scripts/requirements-dev.txt
           pip install -e './api/python/cellxgene_census/[experimental]'
+          pip install "pyarrow~=14.0" # TODO: Remove this before merging!
       - name: Report Dependency Versions
         run: pip list
       - name: Test with pytest (API, main tests)

From b596f68a57fb304723a774d369d33a7898fddd7b Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Thu, 6 Jun 2024 21:02:15 +0000
Subject: [PATCH 03/15] try forcing pyarrow 14, but better

---
 .github/workflows/py-unittests.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index 139a65dac..15aa924aa 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -40,8 +40,7 @@ jobs:
           python -m pip install -U pip setuptools wheel
           pip install --use-pep517 accumulation-tree # Geneformer dependency needs --use-pep517 for Cython
           GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ./api/python/cellxgene_census/scripts/requirements-dev.txt
-          pip install -e './api/python/cellxgene_census/[experimental]'
-          pip install "pyarrow~=14.0" # TODO: Remove this before merging!
+          pip install -e './api/python/cellxgene_census/[experimental]' "pyarrow~=14.0"  # TODO: remove pyarrow specification
       - name: Report Dependency Versions
         run: pip list
       - name: Test with pytest (API, main tests)

From 3e287ad8437eb49bed74f5a03ac2c8bf3f6d7af3 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Thu, 6 Jun 2024 21:18:59 +0000
Subject: [PATCH 04/15] nvm

---
 .github/workflows/py-unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index 15aa924aa..a11c6d24c 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install -U pip setuptools wheel
           pip install --use-pep517 accumulation-tree # Geneformer dependency needs --use-pep517 for Cython
           GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ./api/python/cellxgene_census/scripts/requirements-dev.txt
-          pip install -e './api/python/cellxgene_census/[experimental]' "pyarrow~=14.0"  # TODO: remove pyarrow specification
+          pip install -e './api/python/cellxgene_census/[experimental]'
       - name: Report Dependency Versions
         run: pip list
       - name: Test with pytest (API, main tests)

From 2ea553a2632db5160c5ffa2fda1cec54aff6ebea Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Fri, 7 Jun 2024 18:55:41 +0000
Subject: [PATCH 05/15] Drop 3.12

---
 .github/workflows/py-dependency-check.yml     | 2 +-
 .github/workflows/py-unittests.yml            | 2 +-
 api/python/cellxgene_census/pyproject.toml    | 3 +--
 api/python/notebooks/README.md                | 2 +-
 docs/cellxgene_census_docsite_installation.md | 2 +-
 5 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/py-dependency-check.yml b/.github/workflows/py-dependency-check.yml
index d992f36e1..e49a3496c 100644
--- a/.github/workflows/py-dependency-check.yml
+++ b/.github/workflows/py-dependency-check.yml
@@ -22,7 +22,7 @@ jobs:
       fail-fast: false  # don't fail-fast, as errors are often specific to a single cell in the matrix
       matrix:
         os: [single-cell-8c64g-runner, macos-latest]
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11"]
         exclude:
           - os: macos-latest
             python-version: "3.10"
diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index a11c6d24c..1a12accac 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -18,7 +18,7 @@ jobs:
       fail-fast: false  # Don't stop the workflow if one of the jobs fails
       matrix:
         os: [single-cell-8c64g-runner, macos-latest]
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.10", "3.11"]
         exclude:
           - os: macos-latest
             python-version: "3.10"
diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml
index 8c9fa9026..8dba82461 100644
--- a/api/python/cellxgene_census/pyproject.toml
+++ b/api/python/cellxgene_census/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
 ]
 license = { text = "MIT" }
 readme = "README.md"
-requires-python = ">= 3.10, < 3.13"
+requires-python = ">= 3.10, < 3.12"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
@@ -24,7 +24,6 @@ classifiers = [
     "Operating System :: MacOS :: MacOS X",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
-    "Programming Language :: Python :: 3.12",
 ]
 dependencies= [
     # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to
diff --git a/api/python/notebooks/README.md b/api/python/notebooks/README.md
index cdf89656d..a53e477ab 100644
--- a/api/python/notebooks/README.md
+++ b/api/python/notebooks/README.md
@@ -9,7 +9,7 @@ Demonstration notebooks for the CZ CELLxGENE Discover Census. There are two kind
 
 You must be on a Linux or MacOS system, with the following installed:
 
-* Python 3.10 to 3.12
+* Python 3.10 to 3.11
 * Jupyter or some other means of running notebooks (e.g., vscode)
 
 For now, it is recommended that you do all this on a host with sufficient memory,
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index 0cfbd969b..41347e9a3 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -4,7 +4,7 @@
 
 The Census API requires a Linux or MacOS system with:
 
-- Python 3.10 to Python 3.12. Or R, supported versions TBD.
+- Python 3.10 to Python 3.11. Or R, supported versions TBD.
 - Recommended: >16 GB of memory.
 - Recommended: >5 Mbps internet connection.
 - Recommended: for increased performance use the API through a AWS-EC2 instance from the region `us-west-2`. The Census data builds are hosted in a AWS-S3 bucket in that region.

From b889636f8220ecda657ae95be28dcad930df1c21 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Tue, 2 Jul 2024 21:08:53 +0000
Subject: [PATCH 06/15] Ignore bugbear rule for zip

---
 api/python/cellxgene_census/pyproject.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml
index 277f1282a..653a9f850 100644
--- a/api/python/cellxgene_census/pyproject.toml
+++ b/api/python/cellxgene_census/pyproject.toml
@@ -123,6 +123,8 @@ ignore = [
     "D205",
     # Prefer absolute imports over relative imports from parent modules TODO: enable
     "TID252",
+    # It's okay to use zip without the strict kwarg. In fact, numba doesn't like it when you use it
+    "B905",
 ]
 
 [tool.ruff.lint.pydocstyle]

From d3908058f97ce499188925dbcdc6b29dc9db236d Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Tue, 2 Jul 2024 21:12:49 +0000
Subject: [PATCH 07/15] Automatted formatting

---
 .../src/cellxgene_census/_get_anndata.py      | 49 +++++++--------
 .../src/cellxgene_census/_open.py             | 18 +++---
 .../cellxgene_census/_release_directory.py    | 35 ++++++-----
 .../experimental/_embedding.py                |  6 +-
 .../ml/huggingface/cell_dataset_builder.py    | 13 ++--
 .../ml/huggingface/geneformer_tokenizer.py    | 11 ++--
 .../experimental/ml/pytorch.py                | 59 ++++++++++---------
 .../experimental/pp/_highly_variable_genes.py |  4 +-
 .../experimental/pp/_online.py                | 12 ++--
 .../experimental/pp/_stats.py                 |  3 +-
 .../experimental/util/_eager_iter.py          | 11 ++--
 .../tests/experimental/pp/test_stats.py       |  8 +--
 .../cellxgene_census/tests/test_acceptance.py | 25 ++++----
 .../tests/test_get_anndata.py                 | 16 ++---
 .../cellxgene_census/tests/test_lts_compat.py |  3 +-
 15 files changed, 139 insertions(+), 134 deletions(-)

diff --git a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
index e37337184..9d7a5c41b 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
@@ -7,7 +7,8 @@
 Methods to retrieve slices of the census as AnnData objects.
 """
 
-from typing import Literal, Optional, Sequence
+from collections.abc import Sequence
+from typing import Literal
 from warnings import warn
 
 import anndata
@@ -27,20 +28,20 @@ def get_anndata(
     organism: str,
     measurement_name: str = "RNA",
     X_name: str = "raw",
-    X_layers: Optional[Sequence[str]] = (),
-    obsm_layers: Optional[Sequence[str]] = (),
-    obsp_layers: Optional[Sequence[str]] = (),
-    varm_layers: Optional[Sequence[str]] = (),
-    varp_layers: Optional[Sequence[str]] = (),
-    obs_value_filter: Optional[str] = None,
-    obs_coords: Optional[SparseDFCoord] = None,
-    var_value_filter: Optional[str] = None,
-    var_coords: Optional[SparseDFCoord] = None,
-    column_names: Optional[soma.AxisColumnNames] = None,
-    obs_embeddings: Optional[Sequence[str]] = (),
-    var_embeddings: Optional[Sequence[str]] = (),
-    obs_column_names: Optional[Sequence[str]] = None,
-    var_column_names: Optional[Sequence[str]] = None,
+    X_layers: Sequence[str] | None = (),
+    obsm_layers: Sequence[str] | None = (),
+    obsp_layers: Sequence[str] | None = (),
+    varm_layers: Sequence[str] | None = (),
+    varp_layers: Sequence[str] | None = (),
+    obs_value_filter: str | None = None,
+    obs_coords: SparseDFCoord | None = None,
+    var_value_filter: str | None = None,
+    var_coords: SparseDFCoord | None = None,
+    column_names: soma.AxisColumnNames | None = None,
+    obs_embeddings: Sequence[str] | None = (),
+    var_embeddings: Sequence[str] | None = (),
+    obs_column_names: Sequence[str] | None = None,
+    var_column_names: Sequence[str] | None = None,
 ) -> anndata.AnnData:
     """Convenience wrapper around :class:`tiledbsoma.Experiment` query, to build and execute a query,
     and return it as an :class:`anndata.AnnData` object.
@@ -176,9 +177,9 @@ def _get_axis_metadata(
     axis: Literal["obs", "var"],
     organism: str,
     *,
-    value_filter: Optional[str] = None,
-    coords: Optional[SparseDFCoord] = slice(None),
-    column_names: Optional[Sequence[str]] = None,
+    value_filter: str | None = None,
+    coords: SparseDFCoord | None = slice(None),
+    column_names: Sequence[str] | None = None,
 ) -> pd.DataFrame:
     exp = _get_experiment(census, organism)
     coords = (slice(None),) if coords is None else (coords,)
@@ -198,9 +199,9 @@ def get_obs(
     census: soma.Collection,
     organism: str,
     *,
-    value_filter: Optional[str] = None,
-    coords: Optional[SparseDFCoord] = slice(None),
-    column_names: Optional[Sequence[str]] = None,
+    value_filter: str | None = None,
+    coords: SparseDFCoord | None = slice(None),
+    column_names: Sequence[str] | None = None,
 ) -> pd.DataFrame:
     """Get the observation metadata for a query on the census.
 
@@ -230,9 +231,9 @@ def get_var(
     census: soma.Collection,
     organism: str,
     *,
-    value_filter: Optional[str] = None,
-    coords: Optional[SparseDFCoord] = slice(None),
-    column_names: Optional[Sequence[str]] = None,
+    value_filter: str | None = None,
+    coords: SparseDFCoord | None = slice(None),
+    column_names: Sequence[str] | None = None,
 ) -> pd.DataFrame:
     """Get the variable metadata for a query on the census.
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py
index 640d2d9a6..eb49d96d5 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/_open.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/_open.py
@@ -10,7 +10,7 @@
 import logging
 import os.path
 import urllib.parse
-from typing import Any, Dict, Optional, get_args
+from typing import Any, get_args
 
 import s3fs
 import tiledbsoma as soma
@@ -28,7 +28,7 @@
 
 DEFAULT_CENSUS_VERSION = "stable"
 
-DEFAULT_TILEDB_CONFIGURATION: Dict[str, Any] = {
+DEFAULT_TILEDB_CONFIGURATION: dict[str, Any] = {
     # https://docs.tiledb.com/main/how-to/configuration#configuration-parameters
     "py.init_buffer_bytes": 1 * 1024**3,
     "soma.init_buffer_bytes": 1 * 1024**3,
@@ -67,7 +67,7 @@ def _resolve_census_locator(locator: CensusLocator, mirror: CensusMirror) -> Res
 
 def _open_soma(
     locator: ResolvedCensusLocator,
-    context: Optional[soma.options.SOMATileDBContext] = None,
+    context: soma.options.SOMATileDBContext | None = None,
 ) -> soma.Collection:
     """Private. Merge config defaults and return open census as a soma Collection/context."""
     # if no user-defined context, cellxgene_census defaults take precedence over SOMA defaults
@@ -81,7 +81,7 @@ def _open_soma(
     return soma.open(locator["uri"], mode="r", soma_type=soma.Collection, context=context)
 
 
-def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) -> soma.options.SOMATileDBContext:
+def get_default_soma_context(tiledb_config: dict[str, Any] | None = None) -> soma.options.SOMATileDBContext:
     """Return a :class:`tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
     user. The customized context can then be passed to :func:`cellxgene_census.open_soma` with the ``context``
     argument or to :meth:`somacore.SOMAObject.open` with the ``context`` argument, such as
@@ -126,11 +126,11 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->
 
 def open_soma(
     *,
-    census_version: Optional[str] = DEFAULT_CENSUS_VERSION,
-    mirror: Optional[str] = None,
-    uri: Optional[str] = None,
-    tiledb_config: Optional[Dict[str, Any]] = None,
-    context: Optional[soma.options.SOMATileDBContext] = None,
+    census_version: str | None = DEFAULT_CENSUS_VERSION,
+    mirror: str | None = None,
+    uri: str | None = None,
+    tiledb_config: dict[str, Any] | None = None,
+    context: soma.options.SOMATileDBContext | None = None,
 ) -> soma.Collection:
     """Open the Census by version or URI.
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py
index a5da8f08b..c33c5fd7a 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py
@@ -7,9 +7,8 @@
 Methods to retrieve information about versions of the publicly hosted Census object.
 """
 
-import typing
 from collections import OrderedDict
-from typing import Any, Dict, Literal, Optional, Union, cast
+from typing import Any, Literal, cast
 
 import requests
 from typing_extensions import NotRequired, TypedDict
@@ -35,7 +34,7 @@ class CensusLocator(TypedDict):
 
     uri: str
     relative_uri: str
-    s3_region: Optional[str]
+    s3_region: str | None
 
 
 class CensusVersionRetraction(TypedDict):
@@ -53,13 +52,13 @@ class CensusVersionRetraction(TypedDict):
     """
 
     date: str
-    reason: Optional[str]
-    info_url: Optional[str]
-    replaced_by: Optional[str]
+    reason: str | None
+    info_url: str | None
+    replaced_by: str | None
 
 
 ReleaseFlag = Literal["lts", "retracted"]
-ReleaseFlags = Dict[ReleaseFlag, bool]
+ReleaseFlags = dict[ReleaseFlag, bool]
 
 
 class CensusVersionDescription(TypedDict):
@@ -80,7 +79,7 @@ class CensusVersionDescription(TypedDict):
             If retracted, details of the retraction.
     """
 
-    release_date: Optional[str]
+    release_date: str | None
     release_build: str
     soma: CensusLocator
     h5ads: CensusLocator
@@ -88,7 +87,7 @@ class CensusVersionDescription(TypedDict):
     retraction: NotRequired[CensusVersionRetraction]
 
 
-CensusDirectory = Dict[CensusVersionName, Union[CensusVersionName, CensusVersionDescription]]
+CensusDirectory = dict[CensusVersionName, CensusVersionName | CensusVersionDescription]
 
 """
 A provider identifies a storage medium for the Census, which can either be a cloud provider or a local file.
@@ -130,10 +129,10 @@ class CensusMirror(TypedDict):
 
     provider: Provider
     base_uri: str
-    region: Optional[str]
+    region: str | None
 
 
-CensusMirrors = Dict[CensusMirrorName, Union[CensusMirrorName, CensusMirror]]
+CensusMirrors = dict[CensusMirrorName, CensusMirrorName | CensusMirror]
 
 
 class ResolvedCensusLocator(TypedDict):
@@ -152,7 +151,7 @@ class ResolvedCensusLocator(TypedDict):
     """
 
     uri: str
-    region: Optional[str]
+    region: str | None
     provider: str
 
 
@@ -197,8 +196,8 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript
 
 
 def get_census_version_directory(
-    *, lts: Optional[bool] = None, retracted: Optional[bool] = False
-) -> Dict[CensusVersionName, CensusVersionDescription]:
+    *, lts: bool | None = None, retracted: bool | None = False
+) -> dict[CensusVersionName, CensusVersionDescription]:
     """Get the directory of Census versions currently available, optionally filtering by specified
     flags. If a filtering flag is not specified, Census versions will not be filtered by that flag.
     Defaults to including both "long-term stable" (LTS) and weekly Census versions, and excluding
@@ -355,7 +354,7 @@ def get_census_version_directory(
 
     directory: dict[str, str | dict[str, Any]] = response.json()
     directory_out: CensusDirectory = {}
-    aliases: typing.Set[CensusVersionName] = set()
+    aliases: set[CensusVersionName] = set()
 
     # Resolve all aliases for easier use
     for census_version_name in list(directory.keys()):
@@ -398,7 +397,7 @@ def get_census_version_directory(
         directory_out[census_version_name] = census_version_description.copy()
 
     # Cast is safe, as we have removed all aliases
-    unordered_directory = cast(Dict[CensusVersionName, CensusVersionDescription], directory_out)
+    unordered_directory = cast(dict[CensusVersionName, CensusVersionDescription], directory_out)
 
     # Sort by aliases and release date, descending
     aliased_releases = [(k, v) for k, v in unordered_directory.items() if k in aliases]
@@ -414,7 +413,7 @@ def get_census_version_directory(
     return ordered_directory
 
 
-def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
+def get_census_mirror_directory() -> dict[CensusMirrorName, CensusMirror]:
     """Get the directory of Census mirrors currently available.
 
     Returns:
@@ -426,7 +425,7 @@ def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
     """
     mirrors = _get_census_mirrors()
     del mirrors["default"]
-    return cast(Dict[CensusMirrorName, CensusMirror], mirrors)
+    return cast(dict[CensusMirrorName, CensusMirror], mirrors)
 
 
 def _get_census_mirrors() -> CensusMirrors:
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
index da40da331..afd4b2552 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
@@ -8,7 +8,7 @@
 
 import json
 import warnings
-from typing import Any, Dict, cast
+from typing import Any, cast
 
 import numpy as np
 import numpy.typing as npt
@@ -53,7 +53,7 @@ def get_embedding_metadata(embedding_uri: str, context: soma.options.SOMATileDBC
         embedding_metadata = json.loads(E.metadata["CxG_embedding_info"])
         assert isinstance(embedding_metadata, dict)
 
-    return cast(Dict[str, Any], embedding_metadata)
+    return cast(dict[str, Any], embedding_metadata)
 
 
 def _get_embedding(
@@ -192,7 +192,7 @@ def get_embedding_metadata_by_name(
     response = requests.get(CELL_CENSUS_EMBEDDINGS_MANIFEST_URL)
     response.raise_for_status()
 
-    manifest = cast(Dict[str, Dict[str, Any]], response.json())
+    manifest = cast(dict[str, dict[str, Any]], response.json())
     embeddings = []
     for _, obj in manifest.items():
         if (
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py
index 6b274e8fd..910abc015 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/cell_dataset_builder.py
@@ -1,6 +1,7 @@
 import uuid
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Generator, Optional
+from collections.abc import Generator
+from typing import Any
 
 import scipy.sparse
 from datasets import Dataset
@@ -37,7 +38,7 @@ def __init__(
         measurement_name: str = "RNA",
         layer_name: str = "raw",
         *,
-        block_size: Optional[int] = None,
+        block_size: int | None = None,
         **kwargs: Any,
     ):
         """Initialize the CellDatasetBuilder to process the results of a Census
@@ -55,13 +56,13 @@ def __init__(
         self.layer_name = layer_name
         self.block_size = block_size
 
-    def build(self, from_generator_kwargs: Optional[Dict[str, Any]] = None) -> Dataset:
+    def build(self, from_generator_kwargs: dict[str, Any] | None = None) -> Dataset:
         """Build the dataset from query results.
 
         - `from_generator_kwargs`: kwargs passed through to `Dataset.from_generator()`
         """
 
-        def gen() -> Generator[Dict[str, Any], None, None]:
+        def gen() -> Generator[dict[str, Any], None, None]:
             for Xblock, (block_cell_joinids, _) in (
                 self.X(self.layer_name).blockwise(axis=0, reindex_disable_on_axis=[1], size=self.block_size).scipy()
             ):
@@ -72,7 +73,7 @@ def gen() -> Generator[Dict[str, Any], None, None]:
         return Dataset.from_generator(_DatasetGeneratorPickleHack(gen), **(from_generator_kwargs or {}))
 
     @abstractmethod
-    def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> Dict[str, Any]:
+    def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> dict[str, Any]:
         """Abstract method to process the X row for one cell into a Dataset item.
 
         - `cell_joinid`: The cell `soma_joinid`.
@@ -85,7 +86,7 @@ def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> Dict[str
 class _DatasetGeneratorPickleHack:
     """SEE: https://github.com/huggingface/datasets/issues/6194."""
 
-    def __init__(self, generator: Any, generator_id: Optional[str] = None) -> None:
+    def __init__(self, generator: Any, generator_id: str | None = None) -> None:
         self.generator = generator
         self.generator_id = generator_id if generator_id is not None else str(uuid.uuid4())
 
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py
index 3c8310fe1..224565400 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/huggingface/geneformer_tokenizer.py
@@ -1,5 +1,6 @@
 import pickle
-from typing import Any, Dict, Optional, Sequence, Set
+from collections.abc import Sequence
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
@@ -42,7 +43,7 @@ class GeneformerTokenizer(CellDatasetBuilder):
     - and the specified `obs_column_names` (cell metadata from the experiment obs dataframe)
     """
 
-    obs_column_names: Set[str]
+    obs_column_names: set[str]
     max_input_tokens: int
 
     # set of gene soma_joinids corresponding to genes modeled by Geneformer:
@@ -54,8 +55,8 @@ def __init__(
         self,
         experiment: tiledbsoma.Experiment,
         *,
-        obs_column_names: Optional[Sequence[str]] = None,
-        obs_attributes: Optional[Sequence[str]] = None,
+        obs_column_names: Sequence[str] | None = None,
+        obs_attributes: Sequence[str] | None = None,
         max_input_tokens: int = 2048,
         token_dictionary_file: str = "",
         gene_median_file: str = "",
@@ -152,7 +153,7 @@ def __enter__(self) -> "GeneformerTokenizer":
         self.obs_df = self.obs(column_names=obs_column_names).concat().to_pandas().set_index("soma_joinid")
         return self
 
-    def cell_item(self, cell_joinid: int, cell_Xrow: scipy.sparse.csr_matrix) -> Dict[str, Any]:
+    def cell_item(self, cell_joinid: int, cell_Xrow: scipy.sparse.csr_matrix) -> dict[str, Any]:
         """Given the expression vector for one cell, compute the Dataset item providing
         the Geneformer inputs (token sequence and metadata).
         """
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
index 6bf9aa30c..c17c29818 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py
@@ -1,11 +1,12 @@
 import gc
 import logging
 import os
+from collections.abc import Iterator, Sequence
 from contextlib import contextmanager
 from datetime import timedelta
 from math import ceil
 from time import time
-from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
@@ -31,7 +32,7 @@
 pytorch_logger = logging.getLogger("cellxgene_census.experimental.pytorch")
 
 # TODO: Rename to reflect the correct order of the Tensors within the tuple: (X, obs)
-ObsAndXDatum = Tuple[Tensor, Tensor]
+ObsAndXDatum = tuple[Tensor, Tensor]
 """Return type of ``ExperimentDataPipe`` that pairs a Tensor of ``obs`` row(s) with a Tensor of ``X`` matrix row(s).
 The Tensors are rank 1 if ``batch_size`` is 1, otherwise the Tensors are rank 2."""
 
@@ -53,7 +54,7 @@ def __len__(self) -> int:
         return len(self.obs)
 
 
-Encoders = Dict[str, LabelEncoder]
+Encoders = dict[str, LabelEncoder]
 """A dictionary of ``LabelEncoder``s keyed by the ``obs`` column name."""
 
 
@@ -92,7 +93,7 @@ def __add__(self, other: "Stats") -> "Stats":
 @contextmanager
 def _open_experiment(
     uri: str,
-    aws_region: Optional[str] = None,
+    aws_region: str | None = None,
 ) -> soma.Experiment:
     """Internal method for opening a SOMA ``Experiment`` as a context manager."""
     context = get_default_soma_context().replace(tiledb_config={"vfs.s3.region": aws_region} if aws_region else {})
@@ -119,10 +120,10 @@ def __init__(
         obs: soma.DataFrame,
         X: soma.SparseNDArray,
         obs_column_names: Sequence[str],
-        obs_joinids_chunked: List[npt.NDArray[np.int64]],
+        obs_joinids_chunked: list[npt.NDArray[np.int64]],
         var_joinids: npt.NDArray[np.int64],
-        shuffle_chunk_count: Optional[int] = None,
-        shuffle_rng: Optional[Generator] = None,
+        shuffle_chunk_count: int | None = None,
+        shuffle_rng: Generator | None = None,
     ):
         self.obs = obs
         self.X = X
@@ -191,7 +192,7 @@ def __next__(self) -> _SOMAChunk:
         return _SOMAChunk(obs=obs_batch, X=X_batch, stats=stats)
 
 
-def list_split(arr_list: List[Any], sublist_len: int) -> List[List[Any]]:
+def list_split(arr_list: list[Any], sublist_len: int) -> list[list[Any]]:
     """Splits a python list into a list of sublists where each sublist is of size `sublist_len`.
     TODO: Replace with `itertools.batched` when Python 3.12 becomes the minimum supported version.
     """
@@ -208,7 +209,7 @@ def list_split(arr_list: List[Any], sublist_len: int) -> List[List[Any]]:
     return result
 
 
-def run_gc() -> Tuple[Tuple[Any, Any, Any], Tuple[Any, Any, Any]]:  # noqa: D103
+def run_gc() -> tuple[tuple[Any, Any, Any], tuple[Any, Any, Any]]:  # noqa: D103
     proc = psutil.Process(os.getpid())
 
     pre_gc = proc.memory_full_info(), psutil.virtual_memory(), psutil.swap_memory()
@@ -234,7 +235,7 @@ class _ObsAndXIterator(Iterator[ObsAndXDatum]):
     soma_chunk_iter: Iterator[_SOMAChunk]
     """The iterator for SOMA chunks of paired obs and X data"""
 
-    soma_chunk: Optional[_SOMAChunk]
+    soma_chunk: _SOMAChunk | None
     """The current SOMA chunk of obs and X data"""
 
     i: int = -1
@@ -245,15 +246,15 @@ def __init__(
         obs: soma.DataFrame,
         X: soma.SparseNDArray,
         obs_column_names: Sequence[str],
-        obs_joinids_chunked: List[npt.NDArray[np.int64]],
+        obs_joinids_chunked: list[npt.NDArray[np.int64]],
         var_joinids: npt.NDArray[np.int64],
         batch_size: int,
-        encoders: Dict[str, LabelEncoder],
+        encoders: dict[str, LabelEncoder],
         stats: Stats,
         return_sparse_X: bool,
         use_eager_fetch: bool,
-        shuffle_chunk_count: Optional[int] = None,
-        shuffle_rng: Optional[Generator] = None,
+        shuffle_chunk_count: int | None = None,
+        shuffle_rng: Generator | None = None,
     ) -> None:
         self.soma_chunk_iter = _ObsAndXSOMAIterator(
             obs, X, obs_column_names, obs_joinids_chunked, var_joinids, shuffle_chunk_count, shuffle_rng
@@ -392,15 +393,15 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsAndXDatum]]):  # type: ig
 
     _initialized: bool
 
-    _obs_joinids: Optional[npt.NDArray[np.int64]]
+    _obs_joinids: npt.NDArray[np.int64] | None
 
-    _var_joinids: Optional[npt.NDArray[np.int64]]
+    _var_joinids: npt.NDArray[np.int64] | None
 
-    _encoders: Optional[Encoders]
+    _encoders: Encoders | None
 
     _stats: Stats
 
-    _shuffle_rng: Optional[Generator]
+    _shuffle_rng: Generator | None
 
     # TODO: Consider adding another convenience method wrapper to construct this object whose signature is more closely
     #  aligned with get_anndata() params (i.e. "exploded" AxisQuery params).
@@ -409,16 +410,16 @@ def __init__(
         experiment: soma.Experiment,
         measurement_name: str = "RNA",
         X_name: str = "raw",
-        obs_query: Optional[soma.AxisQuery] = None,
-        var_query: Optional[soma.AxisQuery] = None,
+        obs_query: soma.AxisQuery | None = None,
+        var_query: soma.AxisQuery | None = None,
         obs_column_names: Sequence[str] = (),
         batch_size: int = 1,
         shuffle: bool = True,
-        seed: Optional[int] = None,
+        seed: int | None = None,
         return_sparse_X: bool = False,
-        soma_chunk_size: Optional[int] = 64,
+        soma_chunk_size: int | None = 64,
         use_eager_fetch: bool = True,
-        shuffle_chunk_count: Optional[int] = 2000,
+        shuffle_chunk_count: int | None = 2000,
     ) -> None:
         r"""Construct a new ``ExperimentDataPipe``.
 
@@ -526,10 +527,10 @@ def _init(self) -> None:
 
     @staticmethod
     def _subset_ids_to_partition(
-        ids_chunked: List[npt.NDArray[np.int64]],
+        ids_chunked: list[npt.NDArray[np.int64]],
         partition_index: int,
         num_partitions: int,
-    ) -> List[npt.NDArray[np.int64]]:
+    ) -> list[npt.NDArray[np.int64]]:
         """Returns a single partition of the obs_joinids_chunked (a 2D ndarray), based upon the current process's distributed rank and world
         size.
         """
@@ -552,7 +553,7 @@ def _compute_partitions(
         loader_partitions: int,
         dist_partition: int,
         num_dist_partitions: int,
-    ) -> Tuple[int, int]:
+    ) -> tuple[int, int]:
         # NOTE: Can alternately use a `worker_init_fn` to split among workers split workload
         total_partitions = num_dist_partitions * loader_partitions
         partition = dist_partition * loader_partitions + loader_partition
@@ -595,7 +596,7 @@ def __iter__(self) -> Iterator[ObsAndXDatum]:
             dist_partition=dist.get_rank() if dist.is_initialized() else 0,
             num_dist_partitions=dist.get_world_size() if dist.is_initialized() else 1,
         )
-        obs_joinids_chunked_partition: List[npt.NDArray[np.int64]] = self._subset_ids_to_partition(
+        obs_joinids_chunked_partition: list[npt.NDArray[np.int64]] = self._subset_ids_to_partition(
             obs_joinids_chunked, partition, partitions
         )
 
@@ -622,7 +623,7 @@ def __iter__(self) -> Iterator[ObsAndXDatum]:
             )
 
     @staticmethod
-    def _chunk_ids(ids: npt.NDArray[np.int64], chunk_size: int) -> List[npt.NDArray[np.int64]]:
+    def _chunk_ids(ids: npt.NDArray[np.int64], chunk_size: int) -> list[npt.NDArray[np.int64]]:
         num_chunks = max(1, ceil(len(ids) / chunk_size))
         pytorch_logger.debug(f"Shuffling {len(ids)} obs joinids into {num_chunks} chunks of {chunk_size}")
         return np.array_split(ids, num_chunks)
@@ -663,7 +664,7 @@ def stats(self) -> Stats:
         return self._stats
 
     @property
-    def shape(self) -> Tuple[int, int]:
+    def shape(self) -> tuple[int, int]:
         """Get the shape of the data that will be returned by this :class:`cellxgene_census.experimental.ml.pytorch.ExperimentDataPipe`.
         This is the number of obs (cell) and var (feature) counts in the returned data. If used in multiprocessing mode
         (i.e. :class:`torch.utils.data.DataLoader` instantiated with num_workers > 0), the obs (cell) count will reflect
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
index c47ad9f1e..188513c65 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_highly_variable_genes.py
@@ -1,14 +1,14 @@
 from __future__ import annotations
 
 import os
+from collections.abc import Callable, Sequence
 from concurrent import futures
-from typing import Any, Callable, Sequence, cast
+from typing import Any, Literal, cast
 
 import numpy as np
 import pandas as pd
 import tiledbsoma as soma
 from somacore.options import SparseDFCoord
-from typing_extensions import Literal
 
 from ..._experiment import _get_experiment
 from ..util._eager_iter import _EagerIterator
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py
index 2eaf71a7b..13bbe76c5 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_online.py
@@ -1,5 +1,3 @@
-from typing import Optional, Tuple
-
 import numba
 import numpy as np
 import numpy.typing as npt
@@ -43,7 +41,7 @@ def update(
         self,
         var_vec: npt.NDArray[np.int64],
         val_vec: npt.NDArray[np.float32],
-        batch_vec: Optional[npt.NDArray[np.int64]] = None,
+        batch_vec: npt.NDArray[np.int64] | None = None,
     ) -> None:
         if self.n_batches == 1:
             assert batch_vec is None
@@ -54,7 +52,7 @@ def update(
 
     def finalize(
         self,
-    ) -> Tuple[
+    ) -> tuple[
         npt.NDArray[np.float64],
         npt.NDArray[np.float64],
         npt.NDArray[np.float64],
@@ -125,7 +123,7 @@ def update(
         self,
         var_vec: npt.NDArray[np.int64],
         val_vec: npt.NDArray[np.float32],
-        batch_vec: Optional[npt.NDArray[np.int64]] = None,
+        batch_vec: npt.NDArray[np.int64] | None = None,
     ) -> None:
         if self.n_batches == 1:
             assert batch_vec is None
@@ -147,7 +145,7 @@ def update(
                 self.clip_val,
             )
 
-    def finalize(self) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+    def finalize(self) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
         return self.counts_sum, self.squared_counts_sum
 
 
@@ -282,7 +280,7 @@ def _mbomv_combine_batches(
     n_samples: npt.NDArray[np.int64],
     u: npt.NDArray[np.float64],
     M2: npt.NDArray[np.float64],
-) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
+) -> tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]:
     """Combine all batches using Chan's parallel adaptation of Welford's.
 
     Returns tuple of (u, M2).
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
index a5930525a..8cef3e24e 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_stats.py
@@ -1,7 +1,8 @@
 from __future__ import annotations
 
+from collections.abc import Generator
 from concurrent import futures
-from typing import Any, Generator
+from typing import Any
 
 import numpy as np
 import numpy.typing as npt
diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py
index 6ee5db37a..9c229b889 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/util/_eager_iter.py
@@ -1,9 +1,10 @@
 import logging
 import threading
 from collections import deque
+from collections.abc import Iterator
 from concurrent import futures
 from concurrent.futures import Future
-from typing import Deque, Iterator, Optional, TypeVar
+from typing import TypeVar
 
 util_logger = logging.getLogger("cellxgene_census.experimental.util")
 
@@ -14,13 +15,13 @@ class _EagerIterator(Iterator[_T]):
     def __init__(
         self,
         iterator: Iterator[_T],
-        pool: Optional[futures.Executor] = None,
+        pool: futures.Executor | None = None,
     ):
         super().__init__()
         self.iterator = iterator
         self._pool = pool or futures.ThreadPoolExecutor()
         self._own_pool = pool is None
-        self._future: Optional[Future[_T]] = None
+        self._future: Future[_T] | None = None
         self._begin_next()
 
     def _begin_next(self) -> None:
@@ -56,14 +57,14 @@ def __init__(
         self,
         iterator: Iterator[_T],
         max_pending: int = 1,
-        pool: Optional[futures.Executor] = None,
+        pool: futures.Executor | None = None,
     ):
         super().__init__()
         self.iterator = iterator
         self.max_pending = max_pending
         self._pool = pool or futures.ThreadPoolExecutor()
         self._own_pool = pool is None
-        self._pending_results: Deque[futures.Future[_T]] = deque()
+        self._pending_results: deque[futures.Future[_T]] = deque()
         self._lock = threading.Lock()
         self._begin_next()
 
diff --git a/api/python/cellxgene_census/tests/experimental/pp/test_stats.py b/api/python/cellxgene_census/tests/experimental/pp/test_stats.py
index 3c113ea07..ecc410d0e 100644
--- a/api/python/cellxgene_census/tests/experimental/pp/test_stats.py
+++ b/api/python/cellxgene_census/tests/experimental/pp/test_stats.py
@@ -1,4 +1,4 @@
-from typing import Any, Tuple, Union
+from typing import Any
 
 import numpy as np
 import numpy.ma as ma
@@ -11,7 +11,7 @@
 from cellxgene_census.experimental import pp
 
 
-def var(X: Union[sparse.csc_matrix, sparse.csr_matrix], axis: int = 0, ddof: int = 1) -> Any:
+def var(X: sparse.csc_matrix | sparse.csr_matrix, axis: int = 0, ddof: int = 1) -> Any:
     """
     Variance of a sparse matrix calculated as mean(X**2) - mean(X)**2
     with Bessel's correction applied for unbiased estimate
@@ -52,7 +52,7 @@ def test_mean_variance(
     calc_mean: bool,
     calc_variance: bool,
     small_mem_context: soma.SOMATileDBContext,
-    obs_coords: Tuple[None, slice],
+    obs_coords: tuple[None, slice],
 ) -> None:
     with cellxgene_census.open_soma(census_version="latest", context=small_mem_context) as census:
         with census["census_data"][experiment_name].axis_query(
@@ -119,7 +119,7 @@ def test_mean_variance_nnz_only(
     calc_mean: bool,
     calc_variance: bool,
     small_mem_context: soma.SOMATileDBContext,
-    obs_coords: Tuple[None, slice],
+    obs_coords: tuple[None, slice],
 ) -> None:
     # Note: since this test requires materializing the matrix in memory to compute the mean/variance,
     # we're going to use a coord slice based approach. This will ensure the matrix can fit in memory.
diff --git a/api/python/cellxgene_census/tests/test_acceptance.py b/api/python/cellxgene_census/tests/test_acceptance.py
index d4587e03e..bd01b840b 100644
--- a/api/python/cellxgene_census/tests/test_acceptance.py
+++ b/api/python/cellxgene_census/tests/test_acceptance.py
@@ -11,7 +11,8 @@
 See README.md for historical data.
 """
 
-from typing import Any, Dict, Iterator, Optional, Tuple
+from collections.abc import Iterator
+from typing import Any
 
 import pyarrow as pa
 import pytest
@@ -21,7 +22,7 @@
 from cellxgene_census._open import DEFAULT_TILEDB_CONFIGURATION
 
 
-def make_context(census_version: str, config: Optional[Dict[str, Any]] = None) -> soma.SOMATileDBContext:
+def make_context(census_version: str, config: dict[str, Any] | None = None) -> soma.SOMATileDBContext:
     config = config or {}
     version = cellxgene_census.get_census_version_description(census_version)
     s3_region = version["soma"].get("s3_region", "us-west-2")
@@ -51,7 +52,7 @@ def test_load_axes(organism: str) -> None:
     del var_df
 
 
-def table_iter_is_ok(tbl_iter: Iterator[pa.Table], stop_after: Optional[int] = 2) -> bool:
+def table_iter_is_ok(tbl_iter: Iterator[pa.Table], stop_after: int | None = 2) -> bool:
     """
     Utility that verifies that the value is an iterator of pa.Table.
 
@@ -78,7 +79,7 @@ def table_iter_is_ok(tbl_iter: Iterator[pa.Table], stop_after: Optional[int] = 2
         pytest.param(None, DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive),
     ],
 )
-def test_incremental_read_obs(organism: str, stop_after: Optional[int], ctx_config: Optional[Dict[str, Any]]) -> None:
+def test_incremental_read_obs(organism: str, stop_after: int | None, ctx_config: dict[str, Any] | None) -> None:
     """Verify that obs, var and X[raw] can be read incrementally, i.e., in chunks"""
 
     # ctx_config=None open census with a small (default) TileDB buffer size, which reduces
@@ -101,7 +102,7 @@ def test_incremental_read_obs(organism: str, stop_after: Optional[int], ctx_conf
         pytest.param(None, DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive),
     ],
 )
-def test_incremental_read_var(organism: str, stop_after: Optional[int], ctx_config: Optional[Dict[str, Any]]) -> None:
+def test_incremental_read_var(organism: str, stop_after: int | None, ctx_config: dict[str, Any] | None) -> None:
     """Verify that var can be read incrementally, i.e., in chunks"""
 
     # ctx_config=None open census with a small (default) TileDB buffer size, which reduces
@@ -143,9 +144,9 @@ def test_incremental_read_var(organism: str, stop_after: Optional[int], ctx_conf
 )
 def test_incremental_read_X(
     organism: str,
-    stop_after: Optional[int],
-    ctx_config: Optional[Dict[str, Any]],
-    coords: Optional[Tuple[slice, slice]],
+    stop_after: int | None,
+    ctx_config: dict[str, Any] | None,
+    coords: tuple[slice, slice] | None,
 ) -> None:
     """Verify that obs, var and X[raw] can be read incrementally, i.e., in chunks"""
 
@@ -165,7 +166,7 @@ def test_incremental_read_X(
     ["tissue=='aorta'", pytest.param("tissue=='brain'", marks=pytest.mark.expensive)],
 )
 @pytest.mark.parametrize("stop_after", [2, pytest.param(None, marks=pytest.mark.expensive)])
-def test_incremental_query(organism: str, obs_value_filter: str, stop_after: Optional[int]) -> None:
+def test_incremental_query(organism: str, obs_value_filter: str, stop_after: int | None) -> None:
     """Verify incremental read of query result."""
     # use default TileDB configuration
     with cellxgene_census.open_soma(census_version="latest") as census:
@@ -260,9 +261,9 @@ def test_incremental_query(organism: str, obs_value_filter: str, stop_after: Opt
 )
 def test_get_anndata(
     organism: str,
-    obs_value_filter: Optional[str],
-    obs_coords: Optional[slice],
-    ctx_config: Optional[Dict[str, Any]],
+    obs_value_filter: str | None,
+    obs_coords: slice | None,
+    ctx_config: dict[str, Any] | None,
 ) -> None:
     """Verify query and read into AnnData"""
     ctx_config = ctx_config or {}
diff --git a/api/python/cellxgene_census/tests/test_get_anndata.py b/api/python/cellxgene_census/tests/test_get_anndata.py
index 71ef42eb7..edc7f1e0a 100644
--- a/api/python/cellxgene_census/tests/test_get_anndata.py
+++ b/api/python/cellxgene_census/tests/test_get_anndata.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Literal
+from typing import Any, Literal
 
 import numpy as np
 import pandas as pd
@@ -93,7 +93,7 @@ def test_get_anndata_x_layer(census: soma.Collection, layer: str) -> None:
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("layers", [["raw", "normalized"], ["normalized", "raw"]])
-def test_get_anndata_two_layers(census: soma.Collection, layers: List[str]) -> None:
+def test_get_anndata_two_layers(census: soma.Collection, layers: list[str]) -> None:
     ad_primary_layer_in_X = cellxgene_census.get_anndata(
         census,
         organism="Homo sapiens",
@@ -175,7 +175,7 @@ def test_get_anndata_obsm_one_layer(lts_census: soma.Collection, obsm_layer: str
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("obsm_layers", [["scvi", "geneformer"]])
-def test_get_anndata_obsm_two_layers(lts_census: soma.Collection, obsm_layers: List[str]) -> None:
+def test_get_anndata_obsm_two_layers(lts_census: soma.Collection, obsm_layers: list[str]) -> None:
     # NOTE: this test will break after next LTS release (>2023-12-15), since scvi and geneformer
     # won't be distributed as part of `obsm_layers` anymore. Delete this test when it happens.
     ad = cellxgene_census.get_anndata(
@@ -195,7 +195,7 @@ def test_get_anndata_obsm_two_layers(lts_census: soma.Collection, obsm_layers: L
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("obs_embeddings", [["scvi", "geneformer", "uce"]])
-def test_get_anndata_obs_embeddings(lts_census: soma.Collection, obs_embeddings: List[str]) -> None:
+def test_get_anndata_obs_embeddings(lts_census: soma.Collection, obs_embeddings: list[str]) -> None:
     # NOTE: when the next LTS gets released (>2023-12-15), embeddings may or may not be available,
     # so this test could require adjustments.
     ad = cellxgene_census.get_anndata(
@@ -216,7 +216,7 @@ def test_get_anndata_obs_embeddings(lts_census: soma.Collection, obs_embeddings:
 
 @pytest.mark.live_corpus
 @pytest.mark.parametrize("var_embeddings", [["nmf"]])
-def test_get_anndata_var_embeddings(lts_census: soma.Collection, var_embeddings: List[str]) -> None:
+def test_get_anndata_var_embeddings(lts_census: soma.Collection, var_embeddings: list[str]) -> None:
     # NOTE: when the next LTS gets released (>2023-12-15), embeddings may or may not be available,
     # so this test could require adjustments.
 
@@ -311,7 +311,7 @@ def test_deprecated_column_api(census: soma.Collection) -> None:
     pd.testing.assert_frame_equal(ad_curr.var, ad_prev.var)
 
 
-def _map_to_get_anndata_args(query: Dict[str, Any], axis: Literal["obs", "var"]) -> Dict[str, Any]:
+def _map_to_get_anndata_args(query: dict[str, Any], axis: Literal["obs", "var"]) -> dict[str, Any]:
     """Helper to map arguments of get_obs/ get_var to get_anndata."""
     result = {}
     if "coords" in query:
@@ -344,7 +344,7 @@ def _map_to_get_anndata_args(query: Dict[str, Any], axis: Literal["obs", "var"])
         pytest.param({"value_filter": "tissue_general == 'vasculature'"}, id="value_filter"),
     ],
 )
-def test_get_obs(lts_census: soma.Collection, query: Dict[str, Any]) -> None:
+def test_get_obs(lts_census: soma.Collection, query: dict[str, Any]) -> None:
     adata_obs = cellxgene_census.get_anndata(
         lts_census, organism="Mus musculus", **_map_to_get_anndata_args(query, "obs")
     ).obs
@@ -370,7 +370,7 @@ def test_get_obs(lts_census: soma.Collection, query: Dict[str, Any]) -> None:
         pytest.param({"value_filter": "feature_name in ['Gm53058', '0610010K14Rik']"}, id="value_filter"),
     ],
 )
-def test_get_var(lts_census: soma.Collection, query: Dict[str, Any]) -> None:
+def test_get_var(lts_census: soma.Collection, query: dict[str, Any]) -> None:
     adata_var = cellxgene_census.get_anndata(
         lts_census, organism="Mus musculus", obs_coords=slice(0), **_map_to_get_anndata_args(query, "var")
     ).var
diff --git a/api/python/cellxgene_census/tests/test_lts_compat.py b/api/python/cellxgene_census/tests/test_lts_compat.py
index dbe646cdd..2212f3076 100644
--- a/api/python/cellxgene_census/tests/test_lts_compat.py
+++ b/api/python/cellxgene_census/tests/test_lts_compat.py
@@ -9,7 +9,8 @@
 from __future__ import annotations
 
 from collections import deque
-from typing import Iterator, Literal, Sequence, Union, get_args
+from collections.abc import Iterator, Sequence
+from typing import Literal, Union, get_args
 
 import pyarrow as pa
 import pytest

From 51556b7aa70fd2c4d8cb95f303993bbad54e6368 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Tue, 2 Jul 2024 21:14:53 +0000
Subject: [PATCH 08/15] Manual fixes

---
 .../src/cellxgene_census/experimental/_embedding.py |  2 +-
 .../cellxgene_census/tests/test_lts_compat.py       | 13 ++++---------
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
index afd4b2552..20a593996 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/_embedding.py
@@ -65,7 +65,7 @@ def _get_embedding(
     context: soma.options.SOMATileDBContext | None = None,
 ) -> npt.NDArray[np.float32]:
     """Private. Like get_embedding, but accepts a Census object and a Census directory."""
-    if isinstance(obs_soma_joinids, (pa.Array, pa.ChunkedArray, pd.Series)):
+    if isinstance(obs_soma_joinids, pa.Array | pa.ChunkedArray | pd.Series):
         obs_soma_joinids = obs_soma_joinids.to_numpy()
     assert isinstance(obs_soma_joinids, np.ndarray)
     if obs_soma_joinids.dtype != np.int64:
diff --git a/api/python/cellxgene_census/tests/test_lts_compat.py b/api/python/cellxgene_census/tests/test_lts_compat.py
index 2212f3076..2c486d541 100644
--- a/api/python/cellxgene_census/tests/test_lts_compat.py
+++ b/api/python/cellxgene_census/tests/test_lts_compat.py
@@ -10,7 +10,7 @@
 
 from collections import deque
 from collections.abc import Iterator, Sequence
-from typing import Literal, Union, get_args
+from typing import Literal, TypeAlias, get_args
 
 import pyarrow as pa
 import pytest
@@ -28,14 +28,9 @@
 ]
 CollectionTypeNames = ["SOMACollection", "SOMAExperiment", "SOMAMeasurement"]
 
-SOMATypes = Union[
-    soma.Collection,
-    soma.DataFrame,
-    soma.SparseNDArray,
-    soma.DenseNDArray,
-    soma.Experiment,
-    soma.Measurement,
-]
+SOMATypes: TypeAlias = (
+    soma.Collection | soma.DataFrame | soma.SparseNDArray | soma.DenseNDArray | soma.Experiment | soma.Measurement
+)
 
 
 def walk_census(

From dcf94bd5aa83cf2b9260b70e001a31db27304487 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 3 Jul 2024 19:07:44 +0000
Subject: [PATCH 09/15] linting

---
 api/python/cellxgene_census/tests/test_user_agent.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/api/python/cellxgene_census/tests/test_user_agent.py b/api/python/cellxgene_census/tests/test_user_agent.py
index dc410df9a..41612c649 100644
--- a/api/python/cellxgene_census/tests/test_user_agent.py
+++ b/api/python/cellxgene_census/tests/test_user_agent.py
@@ -3,9 +3,10 @@
 
 import json
 import os
+from collections.abc import Callable
 from functools import partial
 from pathlib import Path
-from typing import TYPE_CHECKING, Callable
+from typing import TYPE_CHECKING
 
 import numpy as np
 import proxy

From c5c77d6e9f25a20f299dcabd4b9d4da9b2b269c3 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Mon, 8 Jul 2024 21:33:24 +0000
Subject: [PATCH 10/15] pre-commit

---
 .../src/cellxgene_census/experimental/ml/encoders.py     | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py
index 3d4fc4dc5..0be576ef6 100644
--- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py
+++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/encoders.py
@@ -1,6 +1,5 @@
 import abc
 import functools
-from typing import List
 
 import numpy.typing as npt
 import pandas as pd
@@ -47,7 +46,7 @@ def name(self) -> str:
 
     @property
     @abc.abstractmethod
-    def columns(self) -> List[str]:
+    def columns(self) -> list[str]:
         """Columns in ``obs`` that the encoder will be applied to."""
         pass
 
@@ -77,7 +76,7 @@ def name(self) -> str:
         return self.col
 
     @property
-    def columns(self) -> List[str]:
+    def columns(self) -> list[str]:
         """Columns in ``obs`` that the encoder will be applied to."""
         return [self.col]
 
@@ -90,7 +89,7 @@ def classes_(self):  # type: ignore
 class BatchEncoder(Encoder):
     """An encoder that concatenates and encodes several ``obs`` columns."""
 
-    def __init__(self, cols: List[str], name: str = "batch"):
+    def __init__(self, cols: list[str], name: str = "batch"):
         self.cols = cols
         from sklearn.preprocessing import LabelEncoder
 
@@ -115,7 +114,7 @@ def fit(self, obs: pd.DataFrame) -> None:
         self._encoder.fit(arr.unique())
 
     @property
-    def columns(self) -> List[str]:
+    def columns(self) -> list[str]:
         """Columns in ``obs`` that the encoder will be applied to."""
         return self.cols
 

From 628073a0a421d792ef28a64ca7a01a539c474c06 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 4 Sep 2024 22:46:15 +0000
Subject: [PATCH 11/15] Try allowing installs on 3.12

---
 api/python/cellxgene_census/pyproject.toml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/api/python/cellxgene_census/pyproject.toml b/api/python/cellxgene_census/pyproject.toml
index 3a7d41250..e9aa1979d 100644
--- a/api/python/cellxgene_census/pyproject.toml
+++ b/api/python/cellxgene_census/pyproject.toml
@@ -11,7 +11,7 @@ authors = [
 ]
 license = { text = "MIT" }
 readme = "README.md"
-requires-python = ">= 3.10, < 3.12"
+requires-python = ">= 3.10, < 3.13"
 classifiers = [
     "Development Status :: 4 - Beta",
     "Intended Audience :: Developers",
@@ -24,12 +24,13 @@ classifiers = [
     "Operating System :: MacOS :: MacOS X",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
 ]
 dependencies= [
     # NOTE: the tiledbsoma version must be >= to the version used in the Census builder, to
     # ensure that the assets are readable (tiledbsoma supports backward compatible reading).
     # Make sure this version does not fall behind the builder's tiledbsoma version.
-    "tiledbsoma~=1.12.3",
+    "tiledbsoma>=1.12.3",
     "anndata",
     "numpy>=1.23,<2.0",
     "requests",

From 4cac29220b4a57687d1e0e5f8f07759b630f28f7 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 4 Sep 2024 23:13:02 +0000
Subject: [PATCH 12/15] Update test for missing creds

---
 api/python/cellxgene_census/tests/test_open.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/api/python/cellxgene_census/tests/test_open.py b/api/python/cellxgene_census/tests/test_open.py
index df20b3337..5945ea9e4 100644
--- a/api/python/cellxgene_census/tests/test_open.py
+++ b/api/python/cellxgene_census/tests/test_open.py
@@ -442,8 +442,8 @@ def test_opening_census_without_anon_access_fails_with_bogus_creds() -> None:
     os.environ["AWS_SECRET_ACCESS_KEY"] = "fake_key"
     # Passing an empty context
     with pytest.raises(
-        tiledb.TileDBError,
-        match=r"The AWS Access Key Id you provided does not exist in our records",
+        (tiledb.TileDBError, soma.DoesNotExistError),
+        match=r"does not exist",
     ):
         cellxgene_census.open_soma(census_version="latest", context=soma.SOMATileDBContext())
 

From 6eec02d243fe32818f0f39cb0afda6b2d48f2002 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 4 Sep 2024 23:23:25 +0000
Subject: [PATCH 13/15] Update skipped testing env to macos+python=3.12

---
 .github/workflows/py-unittests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/py-unittests.yml b/.github/workflows/py-unittests.yml
index 044ce3b6e..e5ba6f0eb 100644
--- a/.github/workflows/py-unittests.yml
+++ b/.github/workflows/py-unittests.yml
@@ -24,7 +24,7 @@ jobs:
         python-version: ["3.10", "3.11", "3.12"]
         exclude:
           - os: macos-latest
-            python-version: "3.10"
+            python-version: "3.12"
 
     runs-on: ${{matrix.os}}
 

From a3523b6ace47750bdd892436247ab2cadd73a55b Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Wed, 4 Sep 2024 23:43:50 +0000
Subject: [PATCH 14/15] Skip correct python+os combo for dep check

---
 .github/workflows/py-dependency-check.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/py-dependency-check.yml b/.github/workflows/py-dependency-check.yml
index aefd78f70..010409042 100644
--- a/.github/workflows/py-dependency-check.yml
+++ b/.github/workflows/py-dependency-check.yml
@@ -28,7 +28,7 @@ jobs:
         python-version: ["3.10", "3.11", "3.12"]
         exclude:
           - os: macos-latest
-            python-version: "3.10"
+            python-version: "3.12"
 
     runs-on: ${{matrix.os}}
 

From 57e5d3b229e90e1911adbae41626126a9a1b2272 Mon Sep 17 00:00:00 2001
From: Isaac Virshup <ivirshup@gmail.com>
Date: Thu, 5 Sep 2024 10:55:21 -0700
Subject: [PATCH 15/15] Correct max version

---
 .github/workflows/py-dependency-check.yml     | 2 +-
 api/python/notebooks/README.md                | 2 +-
 docs/cellxgene_census_docsite_installation.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/py-dependency-check.yml b/.github/workflows/py-dependency-check.yml
index aefd78f70..010409042 100644
--- a/.github/workflows/py-dependency-check.yml
+++ b/.github/workflows/py-dependency-check.yml
@@ -28,7 +28,7 @@ jobs:
         python-version: ["3.10", "3.11", "3.12"]
         exclude:
           - os: macos-latest
-            python-version: "3.10"
+            python-version: "3.12"
 
     runs-on: ${{matrix.os}}
 
diff --git a/api/python/notebooks/README.md b/api/python/notebooks/README.md
index a53e477ab..cdf89656d 100644
--- a/api/python/notebooks/README.md
+++ b/api/python/notebooks/README.md
@@ -9,7 +9,7 @@ Demonstration notebooks for the CZ CELLxGENE Discover Census. There are two kind
 
 You must be on a Linux or MacOS system, with the following installed:
 
-* Python 3.10 to 3.11
+* Python 3.10 to 3.12
 * Jupyter or some other means of running notebooks (e.g., vscode)
 
 For now, it is recommended that you do all this on a host with sufficient memory,
diff --git a/docs/cellxgene_census_docsite_installation.md b/docs/cellxgene_census_docsite_installation.md
index 41347e9a3..0cfbd969b 100644
--- a/docs/cellxgene_census_docsite_installation.md
+++ b/docs/cellxgene_census_docsite_installation.md
@@ -4,7 +4,7 @@
 
 The Census API requires a Linux or MacOS system with:
 
-- Python 3.10 to Python 3.11. Or R, supported versions TBD.
+- Python 3.10 to Python 3.12. Or R, supported versions TBD.
 - Recommended: >16 GB of memory.
 - Recommended: >5 Mbps internet connection.
 - Recommended: for increased performance use the API through a AWS-EC2 instance from the region `us-west-2`. The Census data builds are hosted in a AWS-S3 bucket in that region.