From 737f2706c6dbd0668a497a7d8d68ef03cca3b991 Mon Sep 17 00:00:00 2001 From: Bruce Martin Date: Wed, 29 Mar 2023 13:03:16 -0700 Subject: [PATCH] acceptance test run (#310) * acceptance test run * improve acceptance test; first run re-done * add acceptance tests for 10K, 100K and 1M cell queries by coord * update test ids * mark more tests as expensive * comments * run acceptance test --- api/python/cell_census/tests/README.md | 168 +++++++++++++++++- .../cell_census/tests/test_acceptance.py | 58 ++++-- 2 files changed, 203 insertions(+), 23 deletions(-) diff --git a/api/python/cell_census/tests/README.md b/api/python/cell_census/tests/README.md index 5932ab8c7..03cb3be16 100644 --- a/api/python/cell_census/tests/README.md +++ b/api/python/cell_census/tests/README.md @@ -35,14 +35,164 @@ You can also combine them, e.g., These tests are periodically run, and are not part of CI due to their overhead. -When run, please record the results below and commit to git: +When run, please record the results in this file (below) and commit the change to git. Please include the following information: - date -- host / instance type -- Python & package versions and OS (tip: use tiledbsoma.show_package_versions()) -- the Cell Census version used for the test (i.e., the version aliased as `latest`) -- full output of: `pytest --durations=0 --expensive ./api/python/cell_census/tests/` - -## YYYY-MM-DD - -TBD +- config: + - EC2 instance type and any system config (i.e., swap) + - host and OS as reported by `uname -a` + - Python & package versions and OS - suggest capturing the output of `tiledbsoma.show_package_versions()` + - The Cell Census version used for the test (i.e., the version aliased as `latest`). This can be easily captured using `cell_census.get_census_version_description('latest')` + - the cell_census package version (ie., `cell_census.__version__`) +- any run notes +- full output of: `pytest -v --durations=0 --expensive ./api/python/cell_census/tests/` + +## 2023-03-29 + +**Config** + +- Host: EC2 instance type: `r6id.x32xlarge`, all nvme mounted as swap. +- Uname: Linux bruce.aegea 5.15.0-1033-aws #37~20.04.1-Ubuntu SMP Fri Mar 17 11:39:30 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux +- Python & census versions: + +``` +In [1]: import cell_census, tiledbsoma + +In [2]: tiledbsoma.show_package_versions() +tiledbsoma.__version__ 1.2.1 +TileDB-Py tiledb.version() (0, 21, 1) +TileDB core version 2.15.0 +libtiledbsoma version() libtiledbsoma=;libtiledb=2.15.0 +python version 3.9.16.final.0 +OS version Linux 5.15.0-1033-aws + +In [3]: cell_census.get_census_version_description('latest') +Out[3]: +{'release_date': None, + 'release_build': '2023-03-16', + 'soma': {'uri': 's3://cellxgene-data-public/cell-census/2023-03-16/soma/', + 's3_region': 'us-west-2'}, + 'h5ads': {'uri': 's3://cellxgene-data-public/cell-census/2023-03-16/h5ads/', + 's3_region': 'us-west-2'}} + +In [4]: cell_census.__version__ +Out[4]: '0.12.0' +``` + +**Run notes:** + +The test `test_acceptance.py::test_get_anndata[None-homo_sapiens]` manifest a large amount of paging activity. + +**Pytest output:** + +``` +$ pytest -v --durations=0 --expensive ./api/python/cell_census/tests/ +==================================================== test session starts ===================================================== +platform linux -- Python 3.9.16, pytest-7.2.2, pluggy-1.0.0 -- /home/bruce/cell-census/venv/bin/python +cachedir: .pytest_cache +rootdir: /home/bruce/cell-census/api/python/cell_census, configfile: pyproject.toml +plugins: requests-mock-1.10.0, anyio-3.6.2 +collected 45 items + +api/python/cell_census/tests/test_acceptance.py::test_load_axes[homo_sapiens] PASSED [ 2%] +api/python/cell_census/tests/test_acceptance.py::test_load_axes[mus_musculus] PASSED [ 4%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_read[homo_sapiens] PASSED [ 6%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_read[mus_musculus] PASSED [ 8%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[2-tissue=='aorta'-homo_sapiens] PASSED [ 11%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[2-tissue=='aorta'-mus_musculus] PASSED [ 13%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[2-tissue=='brain'-homo_sapiens] PASSED [ 15%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[2-tissue=='brain'-mus_musculus] PASSED [ 17%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[None-tissue=='aorta'-homo_sapiens] PASSED [ 20%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[None-tissue=='aorta'-mus_musculus] PASSED [ 22%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[None-tissue=='brain'-homo_sapiens] PASSED [ 24%] +api/python/cell_census/tests/test_acceptance.py::test_incremental_query[None-tissue=='brain'-mus_musculus] PASSED [ 26%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[tissue=='aorta'-None-ctx_config0-homo_sapiens] PASSED [ 28%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[tissue=='aorta'-None-ctx_config0-mus_musculus] PASSED [ 31%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[First 10K cells-homo_sapiens] PASSED [ 33%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[First 10K cells-mus_musculus] PASSED [ 35%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[First 100K cells-homo_sapiens] PASSED [ 37%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[First 100K cells-mus_musculus] PASSED [ 40%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[First 1M cells-homo_sapiens] PASSED [ 42%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[First 1M cells-mus_musculus] PASSED [ 44%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[cell_type=='neuron'-None-ctx_config4-homo_sapiens] PASSED [ 46%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[cell_type=='neuron'-None-ctx_config4-mus_musculus] PASSED [ 48%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[tissue=='brain'-None-ctx_config5-homo_sapiens] PASSED [ 51%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[tissue=='brain'-None-ctx_config5-mus_musculus] PASSED [ 53%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[is_primary_data==True-None-ctx_config6-homo_sapiens] PASSED [ 55%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[is_primary_data==True-None-ctx_config6-mus_musculus] PASSED [ 57%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[None-None-ctx_config7-homo_sapiens] PASSED [ 60%] +api/python/cell_census/tests/test_acceptance.py::test_get_anndata[None-None-ctx_config7-mus_musculus] PASSED [ 62%] +api/python/cell_census/tests/test_directory.py::test_get_census_version_directory PASSED [ 64%] +api/python/cell_census/tests/test_directory.py::test_get_census_version_description_errors PASSED [ 66%] +api/python/cell_census/tests/test_directory.py::test_live_directory_contents PASSED [ 68%] +api/python/cell_census/tests/test_get_anndata.py::test_get_anndata_value_filter PASSED [ 71%] +api/python/cell_census/tests/test_get_anndata.py::test_get_anndata_coords PASSED [ 73%] +api/python/cell_census/tests/test_get_anndata.py::test_get_anndata_allows_missing_obs_or_var_filter PASSED [ 75%] +api/python/cell_census/tests/test_get_helpers.py::test_get_experiment PASSED [ 77%] +api/python/cell_census/tests/test_get_helpers.py::test_get_presence_matrix[homo_sapiens] PASSED [ 80%] +api/python/cell_census/tests/test_get_helpers.py::test_get_presence_matrix[mus_musculus] PASSED [ 82%] +api/python/cell_census/tests/test_open.py::test_open_soma_latest PASSED [ 84%] +api/python/cell_census/tests/test_open.py::test_open_soma_with_context PASSED [ 86%] +api/python/cell_census/tests/test_open.py::test_open_soma_errors PASSED [ 88%] +api/python/cell_census/tests/test_open.py::test_get_source_h5ad_uri PASSED [ 91%] +api/python/cell_census/tests/test_open.py::test_get_source_h5ad_uri_errors PASSED [ 93%] +api/python/cell_census/tests/test_open.py::test_download_source_h5ad PASSED [ 95%] +api/python/cell_census/tests/test_open.py::test_download_source_h5ad_errors PASSED [ 97%] +api/python/cell_census/tests/test_util.py::test_uri_join PASSED [100%] + +===================================================== slowest durations ====================================================== +5455.14s call tests/test_acceptance.py::test_get_anndata[None-None-ctx_config7-homo_sapiens] +1388.18s call tests/test_acceptance.py::test_get_anndata[is_primary_data==True-None-ctx_config6-homo_sapiens] +400.45s call tests/test_acceptance.py::test_get_anndata[cell_type=='neuron'-None-ctx_config4-homo_sapiens] +183.85s call tests/test_acceptance.py::test_get_anndata[None-None-ctx_config7-mus_musculus] +110.33s call tests/test_acceptance.py::test_get_anndata[is_primary_data==True-None-ctx_config6-mus_musculus] +63.52s call tests/test_acceptance.py::test_get_anndata[First 1M cells-mus_musculus] +44.27s call tests/test_acceptance.py::test_get_anndata[First 1M cells-homo_sapiens] +35.95s call tests/test_acceptance.py::test_get_anndata[tissue=='brain'-None-ctx_config5-homo_sapiens] +25.85s call tests/test_acceptance.py::test_incremental_query[None-tissue=='brain'-homo_sapiens] +24.19s call tests/test_acceptance.py::test_get_anndata[cell_type=='neuron'-None-ctx_config4-mus_musculus] +22.38s call tests/test_acceptance.py::test_incremental_query[2-tissue=='brain'-homo_sapiens] +13.23s call tests/test_acceptance.py::test_get_anndata[tissue=='brain'-None-ctx_config5-mus_musculus] +11.56s call tests/test_get_anndata.py::test_get_anndata_allows_missing_obs_or_var_filter +9.32s call tests/test_acceptance.py::test_incremental_query[2-tissue=='brain'-mus_musculus] +9.31s call tests/test_acceptance.py::test_get_anndata[First 100K cells-homo_sapiens] +8.39s call tests/test_acceptance.py::test_incremental_read[homo_sapiens] +8.14s call tests/test_acceptance.py::test_incremental_query[2-tissue=='aorta'-homo_sapiens] +7.60s call tests/test_acceptance.py::test_get_anndata[tissue=='aorta'-None-ctx_config0-homo_sapiens] +7.25s call tests/test_acceptance.py::test_incremental_query[None-tissue=='brain'-mus_musculus] +7.25s call tests/test_acceptance.py::test_incremental_query[None-tissue=='aorta'-homo_sapiens] +7.23s call tests/test_acceptance.py::test_load_axes[homo_sapiens] +6.91s call tests/test_acceptance.py::test_get_anndata[First 100K cells-mus_musculus] +6.25s setup tests/test_open.py::test_download_source_h5ad +5.88s call tests/test_acceptance.py::test_incremental_query[None-tissue=='aorta'-mus_musculus] +5.58s call tests/test_acceptance.py::test_incremental_query[2-tissue=='aorta'-mus_musculus] +5.14s call tests/test_directory.py::test_live_directory_contents +5.13s call tests/test_acceptance.py::test_get_anndata[First 10K cells-homo_sapiens] +4.89s call tests/test_open.py::test_get_source_h5ad_uri +4.59s call tests/test_open.py::test_open_soma_latest +4.35s call tests/test_acceptance.py::test_incremental_read[mus_musculus] +4.23s call tests/test_get_anndata.py::test_get_anndata_value_filter +3.96s call tests/test_acceptance.py::test_get_anndata[tissue=='aorta'-None-ctx_config0-mus_musculus] +3.66s call tests/test_get_helpers.py::test_get_presence_matrix[homo_sapiens] +3.37s call tests/test_acceptance.py::test_get_anndata[First 10K cells-mus_musculus] +2.97s call tests/test_get_helpers.py::test_get_presence_matrix[mus_musculus] +2.62s call tests/test_get_anndata.py::test_get_anndata_coords +2.35s call tests/test_open.py::test_download_source_h5ad +2.04s call tests/test_acceptance.py::test_load_axes[mus_musculus] +1.94s setup tests/test_get_anndata.py::test_get_anndata_coords +1.21s call tests/test_open.py::test_get_source_h5ad_uri_errors +0.99s setup tests/test_open.py::test_download_source_h5ad_errors +0.55s call tests/test_get_helpers.py::test_get_experiment +0.51s call tests/test_open.py::test_open_soma_with_context +0.25s setup tests/test_get_anndata.py::test_get_anndata_value_filter +0.23s setup tests/test_get_anndata.py::test_get_anndata_allows_missing_obs_or_var_filter +0.06s call tests/test_directory.py::test_get_census_version_description_errors +0.04s setup tests/test_directory.py::test_get_census_version_directory +0.02s call tests/test_directory.py::test_get_census_version_directory +0.01s teardown tests/test_acceptance.py::test_get_anndata[is_primary_data==True-None-ctx_config6-homo_sapiens] +0.01s setup tests/test_acceptance.py::test_get_anndata[is_primary_data==True-None-ctx_config6-mus_musculus] +0.01s teardown tests/test_acceptance.py::test_get_anndata[None-None-ctx_config7-homo_sapiens] + +(84 durations < 0.005s hidden. Use -vv to show these durations.) +============================================== 45 passed in 7924.13s (2:12:04) =============================================== +``` diff --git a/api/python/cell_census/tests/test_acceptance.py b/api/python/cell_census/tests/test_acceptance.py index a4af92ecb..0f18f2694 100644 --- a/api/python/cell_census/tests/test_acceptance.py +++ b/api/python/cell_census/tests/test_acceptance.py @@ -10,7 +10,7 @@ See README.md for historical data. """ -from typing import Iterator, Optional +from typing import Any, Dict, Iterator, Optional import pyarrow as pa import pytest @@ -18,6 +18,15 @@ import tiledbsoma as soma import cell_census +from cell_census._open import DEFAULT_TILEDB_CONFIGURATION + + +def make_context(census_version: str, config: Optional[Dict[str, Any]] = None) -> soma.SOMATileDBContext: + config = config or {} + version = cell_census.get_census_version_description(census_version) + s3_region = version["soma"].get("s3_region", "us-west-2") + config.update({"vfs.s3.region": s3_region}) + return soma.options.SOMATileDBContext(tiledb_ctx=tiledb.Ctx(config)) @pytest.mark.live_corpus @@ -66,10 +75,7 @@ def test_incremental_read(organism: str) -> None: # open census with a small (default) TileDB buffer size, which reduces # memory use, and makes it feasible to run in a GHA. - version = cell_census.get_census_version_description("latest") - s3_region = version["soma"].get("s3_region") - context = soma.options.SOMATileDBContext(tiledb_ctx=tiledb.Ctx({"vfs.s3.region": s3_region})) - + context = make_context("latest") with cell_census.open_soma(census_version="latest", context=context) as census: assert table_iter_is_ok(census["census_data"][organism].obs.read(column_names=["soma_joinid", "tissue"])) assert table_iter_is_ok( @@ -97,19 +103,43 @@ def test_incremental_query(organism: str, obs_value_filter: str, stop_after: Opt @pytest.mark.live_corpus -@pytest.mark.expensive @pytest.mark.parametrize("organism", ["homo_sapiens", "mus_musculus"]) @pytest.mark.parametrize( - "obs_value_filter", + ("obs_value_filter", "obs_coords", "ctx_config"), [ - "tissue == 'aorta'", - pytest.param("cell_type == 'neuron'", marks=pytest.mark.expensive), # very common cell type - pytest.param("tissue == 'brain'", marks=pytest.mark.expensive), # very common tissue - pytest.param(None, marks=pytest.mark.expensive), # whole enchilada + # small query, should be runable in CI + pytest.param("tissue=='aorta'", None, DEFAULT_TILEDB_CONFIGURATION), + # 10K cells, also small enough to run in CI + pytest.param(None, slice(0, 10_000), DEFAULT_TILEDB_CONFIGURATION, id="First 10K cells"), + # 100K cells, standard buffer size + pytest.param( + None, slice(0, 100_000), DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive, id="First 100K cells" + ), + # 1M cells, standard buffer size + pytest.param( + None, slice(0, 1_000_000), DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive, id="First 1M cells" + ), + # very common cell type, with standard buffer size + pytest.param("cell_type=='neuron'", None, DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive), + # very common tissue, with standard buffer size + pytest.param("tissue=='brain'", None, DEFAULT_TILEDB_CONFIGURATION, marks=pytest.mark.expensive), + # all primary cells, with big buffer size + pytest.param( + "is_primary_data==True", None, {"soma.init_buffer_bytes": 4 * 1024**3}, marks=pytest.mark.expensive + ), + # the whole enchilada, with big buffer size + pytest.param(None, None, {"soma.init_buffer_bytes": 4 * 1024**3}, marks=pytest.mark.expensive), ], ) -def test_get_anndata(organism: str, obs_value_filter: str) -> None: +def test_get_anndata( + organism: str, + obs_value_filter: Optional[str], + obs_coords: Optional[slice], + ctx_config: Optional[Dict[str, Any]], +) -> None: """Verify query and read into AnnData""" - with cell_census.open_soma(census_version="latest") as census: - ad = cell_census.get_anndata(census, organism, obs_value_filter=obs_value_filter) + ctx_config = ctx_config or {} + context = make_context("latest", ctx_config) + with cell_census.open_soma(census_version="latest", context=context) as census: + ad = cell_census.get_anndata(census, organism, obs_value_filter=obs_value_filter, obs_coords=obs_coords) assert ad is not None