Skip to content

Commit

Permalink
open_soma() alias handling changes (#435)
Browse files Browse the repository at this point in the history
- default to stable release
- notify user when an alias is used
- notify user if stable alias is not defined
  • Loading branch information
atolopko-czi authored May 1, 2023
1 parent 7099aea commit 2f9b51b
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 16 deletions.
32 changes: 29 additions & 3 deletions api/python/cellxgene_census/src/cellxgene_census/_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
Contains methods to open publicly hosted versions of Census object and access its source datasets.
"""

import logging
import os.path
import urllib.parse
from typing import Any, Dict, Optional
Expand All @@ -26,6 +26,10 @@
"vfs.s3.ca_file": certifi.where(),
}

api_logger = logging.getLogger("cellxgene_census")
api_logger.setLevel(logging.INFO)
api_logger.addHandler(logging.StreamHandler())


def _open_soma(locator: CensusLocator, context: Optional[soma.options.SOMATileDBContext] = None) -> soma.Collection:
"""Private. Merge config defaults and return open census as a soma Collection/context."""
Expand All @@ -50,7 +54,7 @@ def _open_soma(locator: CensusLocator, context: Optional[soma.options.SOMATileDB

def open_soma(
*,
census_version: Optional[str] = "latest",
census_version: Optional[str] = "stable",
uri: Optional[str] = None,
context: Optional[soma.options.SOMATileDBContext] = None,
) -> soma.Collection:
Expand Down Expand Up @@ -111,7 +115,29 @@ def open_soma(
if census_version is None:
raise ValueError("Must specify either a census version or an explicit URI.")

description = get_census_version_description(census_version) # raises
try:
description = get_census_version_description(census_version) # raises
except KeyError:
# TODO: After the first "stable" is available, this conditional can be removed (keep the 'else' logic)
if census_version == "stable":
description = get_census_version_description("latest")
api_logger.warning(
f'The "{census_version}" Census version is not yet available. Using "latest" Census version '
f"instead."
)
else:
raise ValueError(
f'The "{census_version}" Census version is not valid. Use get_census_version_directory() to retrieve '
f"available versions."
) from None

if description["alias"]:
api_logger.info(
f"The \"{description['alias']}\" release is currently {description['release_build']}. Specify "
f"'census_version=\"{description['release_build']}\"' in future calls to open_soma() to ensure data "
"consistency."
)

return _open_soma(description["soma"], context)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
Methods to retrieve information about versions of the publicly hosted Census object.
"""

from typing import Dict, Optional, Union, cast

import requests
Expand All @@ -31,6 +30,7 @@
"release_build": str, # date of build
"soma": CensusLocator, # SOMA objects locator
"h5ads": CensusLocator, # source H5ADs locator
"alias": Optional[str], # the alias of this entry
},
)
CensusDirectory = Dict[CensusVersionName, Union[CensusVersionName, CensusVersionDescription]]
Expand Down Expand Up @@ -117,6 +117,7 @@ def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescr
for census_version in list(directory.keys()):
# Strings are aliases for other census_version
points_at = directory[census_version]
alias = census_version if isinstance(points_at, str) else None
while isinstance(points_at, str):
# resolve aliases
if points_at not in directory:
Expand All @@ -127,7 +128,8 @@ def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescr
points_at = directory[points_at]

if isinstance(points_at, dict):
directory[census_version] = points_at
directory[census_version] = points_at.copy()
cast(CensusVersionDescription, directory[census_version])["alias"] = alias

# Cast is safe, as we have removed all aliases
return cast(Dict[CensusVersionName, CensusVersionDescription], directory)
12 changes: 7 additions & 5 deletions api/python/cellxgene_census/tests/test_directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from cellxgene_census._release_directory import CELL_CENSUS_RELEASE_DIRECTORY_URL

DIRECTORY_JSON = {
"stable": "2022-10-01",
"latest": "2022-11-01",
"2022-11-01": {
"release_date": "2022-11-30",
Expand Down Expand Up @@ -52,12 +53,13 @@ def test_get_census_version_directory(directory_mock: Any) -> None:
assert all((type(k) == str for k in directory.keys()))
assert all((type(v) == dict for v in directory.values()))

for tag in DIRECTORY_JSON:
assert tag[0] == "_" or tag in directory
if isinstance(DIRECTORY_JSON[tag], dict):
assert directory[tag] == DIRECTORY_JSON[tag]
assert "_dangling" not in directory

assert directory["latest"] == directory["2022-11-01"]
assert directory["2022-11-01"] == {**DIRECTORY_JSON["2022-11-01"], "alias": None} # type: ignore
assert directory["2022-10-01"] == {**DIRECTORY_JSON["2022-10-01"], "alias": None} # type: ignore

assert directory["latest"] == {**DIRECTORY_JSON["2022-11-01"], "alias": "latest"} # type: ignore
assert directory["stable"] == {**DIRECTORY_JSON["2022-10-01"], "alias": "stable"} # type: ignore

for tag in directory:
assert directory[tag] == cellxgene_census.get_census_version_description(tag)
Expand Down
95 changes: 89 additions & 6 deletions api/python/cellxgene_census/tests/test_open.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,47 @@
import pathlib
import re
import time
from unittest.mock import patch

import anndata
import numpy as np
import pytest
import requests_mock as rm
import tiledbsoma as soma

import cellxgene_census
from cellxgene_census._open import DEFAULT_TILEDB_CONFIGURATION
from cellxgene_census._release_directory import CELL_CENSUS_RELEASE_DIRECTORY_URL


@pytest.mark.live_corpus
def test_open_soma_latest() -> None:
# There should _always_ be a 'latest'
with cellxgene_census.open_soma(census_version="latest") as census:
def test_open_soma_stable() -> None:
# There should _always_ be a 'stable'
with cellxgene_census.open_soma(census_version="stable") as census:
assert census is not None
assert isinstance(census, soma.Collection)

# and it should always be the default
# and it should be the latest, until the first "stable" build is available
with cellxgene_census.open_soma() as default_census:
assert default_census.uri == census.uri
for k, v in DEFAULT_TILEDB_CONFIGURATION.items():
assert census.context.tiledb_ctx.config()[k] == str(v)

# TODO: After the first "stable" build is available, this commented-out code can be replace this above block
# and it should always be the default
# with cellxgene_census.open_soma() as default_census:
# assert default_census.uri == census.uri
# for k, v in DEFAULT_TILEDB_CONFIGURATION.items():
# assert census.context.tiledb_ctx.config()[k] == str(v)


@pytest.mark.live_corpus
def test_open_soma_latest() -> None:
# There should _always_ be a 'latest'
with cellxgene_census.open_soma(census_version="latest") as census:
assert census is not None
assert isinstance(census, soma.Collection)


@pytest.mark.live_corpus
def test_open_soma_with_context() -> None:
Expand Down Expand Up @@ -52,11 +71,75 @@ def test_open_soma_with_context() -> None:
assert census.context.timestamp_ms == timestamp_ms


def test_open_soma_errors() -> None:
with pytest.raises(ValueError):
def test_open_soma_invalid_args() -> None:
with pytest.raises(
ValueError,
match=re.escape("Must specify either a census version or an explicit URI."),
):
cellxgene_census.open_soma(census_version=None)


def test_open_soma_errors(requests_mock: rm.Mocker) -> None:
requests_mock.get(CELL_CENSUS_RELEASE_DIRECTORY_URL, json={})
with pytest.raises(
ValueError,
match=re.escape(
'The "does-not-exist" Census version is not valid. Use get_census_version_directory() to retrieve available versions.'
),
):
cellxgene_census.open_soma(census_version="does-not-exist")


def test_open_soma_defaults_to_latest_if_missing_stable(requests_mock: rm.Mocker) -> None:
dir_missing_stable = {
"latest": "2022-11-01",
"2022-11-01": {
"release_date": "2022-11-30",
"release_build": "2022-11-01",
"soma": {
"uri": "s3://cellxgene-data-public/cell-census/2022-11-01/soma/",
"s3_region": "us-west-2",
},
"h5ads": {
"uri": "s3://cellxgene-data-public/cell-census/2022-11-01/h5ads/",
"s3_region": "us-west-2",
},
},
}

requests_mock.get(CELL_CENSUS_RELEASE_DIRECTORY_URL, json=dir_missing_stable)
with patch("cellxgene_census._open._open_soma") as m:
cellxgene_census.open_soma(census_version="stable")
m.assert_called_once_with(
{"uri": "s3://cellxgene-data-public/cell-census/2022-11-01/soma/", "s3_region": "us-west-2"}, None
)


def test_open_soma_defaults_to_stable(requests_mock: rm.Mocker) -> None:
directory_with_stable = {
"stable": "2022-10-01",
"2022-10-01": {
"release_date": "2022-10-30",
"release_build": "2022-10-01",
"soma": {
"uri": "s3://cellxgene-data-public/cell-census/2022-10-01/soma/",
"s3_region": "us-west-2",
},
"h5ads": {
"uri": "s3://cellxgene-data-public/cell-census/2022-10-01/h5ads/",
"s3_region": "us-west-2",
},
},
}

requests_mock.get(CELL_CENSUS_RELEASE_DIRECTORY_URL, json=directory_with_stable)
with patch("cellxgene_census._open._open_soma") as m:
cellxgene_census.open_soma()
m.assert_called_once_with(
{"uri": "s3://cellxgene-data-public/cell-census/2022-10-01/soma/", "s3_region": "us-west-2"}, None
)


@pytest.mark.live_corpus
def test_get_source_h5ad_uri() -> None:
with cellxgene_census.open_soma(census_version="latest") as census:
Expand Down

0 comments on commit 2f9b51b

Please sign in to comment.