diff --git a/README.md b/README.md index 4db8353d9..a8e0a42fb 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Reference documentation can also be accessed directly from Python or R. It is recommended to install the CELLxGENE Census and all of its dependencies in a new virtual environment via `pip`: ``` -pip install -U cell-census +pip install -U cellxgene-census ``` #### Usage examples diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py index 9480d7945..e2c8f6a9a 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_open.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_open.py @@ -4,7 +4,7 @@ """Open census and related datasets -Contains methods to open publicly hosted versions of Cell Census object and access its source datasets. +Contains methods to open publicly hosted versions of Census object and access its source datasets. """ import os.path @@ -51,7 +51,7 @@ def open_soma( uri: Optional[str] = None, context: Optional[soma.options.SOMATileDBContext] = None, ) -> soma.Collection: - """Open the Cell Census by version or URI. + """Open the Census by version or URI. Args: census_version: @@ -74,8 +74,8 @@ def open_soma( Experimental. Examples: - Open the default Cell Census version, using a context manager which will automatically - close the census upon exit of the context. + Open the default Census version, using a context manager which will automatically + close the Census upon exit of the context. >>> with cellxgene_census.open_soma() as census: ... @@ -86,17 +86,17 @@ def open_soma( ... census.close() - Open a specific Cell Census by version: + Open a specific Census by version: >>> with cellxgene_census.open_soma("2022-12-31") as census: ... - Open a Cell Census by S3 URI, rather than by version. + Open a Census by S3 URI, rather than by version. >>> with cellxgene_census.open_soma(uri="s3://bucket/path") as census: ... - Open a Cell Census by path (file:// URI), rather than by version. + Open a Census by path (file:// URI), rather than by version. >>> with cellxgene_census.open_soma(uri="/tmp/census") as census: ... @@ -106,7 +106,7 @@ def open_soma( return _open_soma({"uri": uri, "s3_region": None}, context) if census_version is None: - raise ValueError("Must specify either a cell census version or an explicit URI.") + raise ValueError("Must specify either a census version or an explicit URI.") description = get_census_version_description(census_version) # raises return _open_soma(description["soma"], context) diff --git a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py index d9110fc1b..39c981544 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py @@ -2,9 +2,9 @@ # # Licensed under the MIT License. -"""Versioning of Cell Census builds +"""Versioning of Census builds -Methods to retrieve information about versions of the publicly hosted Cell Census object. +Methods to retrieve information about versions of the publicly hosted Census object. """ from typing import Dict, Optional, Union, cast @@ -13,7 +13,7 @@ from typing_extensions import TypedDict """ -The following types describe the expected directory of Cell Census builds, used +The following types describe the expected directory of Census builds, used to bootstrap all data location requests. """ CensusVersionName = str # census version name, e.g., "release-99", "2022-10-01-test", etc. @@ -41,8 +41,7 @@ def get_census_version_description(census_version: str) -> CensusVersionDescription: - """Get release description for given census version, from the Cell - Census release directory. + """Get release description for given Census version, from the Census release directory. Args: census_version: @@ -72,13 +71,13 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript census_directory = get_census_version_directory() description = census_directory.get(census_version, None) if description is None: - raise KeyError(f"Unable to locate cell census version: {census_version}.") + raise KeyError(f"Unable to locate Census version: {census_version}.") return description def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescription]: """ - Get the directory of cell census releases currently available. + Get the directory of Census releases currently available. Returns: A dictionary that contains release names and their corresponding release description. diff --git a/api/python/cellxgene_census/tests/README.md b/api/python/cellxgene_census/tests/README.md index 03cb3be16..00d143264 100644 --- a/api/python/cellxgene_census/tests/README.md +++ b/api/python/cellxgene_census/tests/README.md @@ -1,22 +1,22 @@ # Test README -This directory contains tests of the cell-census package API, _and_ the use of the API on the -live "corpus", i.e., data in the public cell census S3 bucket. The tests use Pytest, and have +This directory contains tests of the cellxgene-census package API, _and_ the use of the API on the +live "corpus", i.e., data in the public Census S3 bucket. The tests use Pytest, and have Pytest marks to control which tests are run. -Tests can be run in the usual manner. First, ensure you have cell-census installed, e.g., from the top-level repo directory: +Tests can be run in the usual manner. First, ensure you have cellxgene-census installed, e.g., from the top-level repo directory: -> pip install -e ./api/python/cell_census/ +> pip install -e ./api/python/cellxgene_census/ Then run the tests: -> pytest ./api/python/cell_census/ +> pytest ./api/python/cellxgene_census/ ## Pytest Marks There are two Pytest marks you can use from the command line: -- live_corpus: tests that directly access the `latest` version of the Cell Census. Enabled by default. +- live_corpus: tests that directly access the `latest` version of the Census. Enabled by default. - expensive: tests that are expensive (ie., cpu, memory, time). Disabled by default - enable with `--expensive`. Some of these tests are _very_ expensive, ie., require a very large memory host to succeed. By default, only relatively cheap & fast tests are run. To enable `expensive` tests: @@ -42,10 +42,10 @@ When run, please record the results in this file (below) and commit the change t - EC2 instance type and any system config (i.e., swap) - host and OS as reported by `uname -a` - Python & package versions and OS - suggest capturing the output of `tiledbsoma.show_package_versions()` - - The Cell Census version used for the test (i.e., the version aliased as `latest`). This can be easily captured using `cell_census.get_census_version_description('latest')` - - the cell_census package version (ie., `cell_census.__version__`) + - The Census version used for the test (i.e., the version aliased as `latest`). This can be easily captured using `cellxgene_census.get_census_version_description('latest')` + - the cellxgene_census package version (ie., `cellxgene_census.__version__`) - any run notes -- full output of: `pytest -v --durations=0 --expensive ./api/python/cell_census/tests/` +- full output of: `pytest -v --durations=0 --expensive ./api/python/cellxgene_census/tests/` ## 2023-03-29 diff --git a/api/python/cellxgene_census/tests/test_acceptance.py b/api/python/cellxgene_census/tests/test_acceptance.py index c02de7aed..812b820b1 100644 --- a/api/python/cellxgene_census/tests/test_acceptance.py +++ b/api/python/cellxgene_census/tests/test_acceptance.py @@ -1,5 +1,5 @@ """ -Acceptance tests for the CELLxGENE Census. +Acceptance tests for the Census. NOTE: those marked `expensive` are not run in the CI as they are, well, expensive... diff --git a/tools/cellxgene_census_builder/README.md b/tools/cellxgene_census_builder/README.md index 81c23131b..b641edd21 100644 --- a/tools/cellxgene_census_builder/README.md +++ b/tools/cellxgene_census_builder/README.md @@ -1,6 +1,6 @@ # README -This package contains code to build and release the CELLxGENE Census in the SOMA format, as specified in the +This package contains code to build and release the Census in the SOMA format, as specified in the [data schema](https://github.com/chanzuckerberg/cellxgene-census/blob/main/docs/cell_census_schema.md). This tool is not intended for end-users - it is used by the CELLxGENE team to periodically create and release all @@ -136,13 +136,13 @@ The build process: - (Optional) Validate the entire Census, re-reading from storage. Modes of operation: -a) (default) creating the entire "census" using all files currently in the CELLxGENE repository. -b) creating a smaller "census" from a user-provided list of files (a "manifest") +a) (default) creating the entire "Census" using all files currently in the CELLxGENE repository. +b) creating a smaller "Census" from a user-provided list of files (a "manifest") -#### Mode (a) - creating the full cell census from the entire CELLxGENE (public) corpus: +#### Mode (a) - creating the full Census from the entire CELLxGENE (public) corpus: - On a large-memory machine with _ample_ free (local) disk (eg, 3/4 TB or more) and swap (1 TB or more) -- To create a cell census at ``, execute: +- To create a Census at ``, execute: > $ python -m cellxgene_census_builder -mp --max-workers 12 build - Tips: - `-v` to view info-level logging during run, or `-v -v` for debug-level logging @@ -151,7 +151,7 @@ b) creating a smaller "census" from a user-provided list of files (a "manifest") If you run out of memory, reduce `--max-workers`. You can also try a higher number if you have lots of CPU & memory. -#### Mode (b) - creating a cell census from a user-provided list of H5AD files: +#### Mode (b) - creating a Census from a user-provided list of H5AD files: - Create a manifest file, in CSV format, containing two columns: dataset_id, h5ad_uri. Example: ```csv @@ -160,5 +160,5 @@ If you run out of memory, reduce `--max-workers`. You can also try a higher numb 5b93b8fc-7c9a-45bd-ad3f-dc883137de30, /files/5b93b8fc-7c9a-45bd-ad3f-dc883137de30.h5ad ``` You can specify a file system path or a URI in the second field -- To create a cell census at ``, execute: +- To create a Census at ``, execute: > $ python -m cellxgene_census_builder build --manifest diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/__main__.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/__main__.py index 3ba0ca2c6..2ec2a4ef7 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/__main__.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/__main__.py @@ -145,7 +145,7 @@ def do_create_reports(args: CensusBuildArgs) -> bool: def create_args_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser(prog="cellxgene_census_builder", description="Build the official cell census.") + parser = argparse.ArgumentParser(prog="cellxgene_census_builder", description="Build the official Census.") parser.add_argument("working_dir", type=str, help="Working directory for the build") parser.add_argument( "--test-resume", diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/__main__.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/__main__.py index b98124f9f..a5c8a187e 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/__main__.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/__main__.py @@ -50,7 +50,7 @@ def create_args_parser() -> argparse.ArgumentParser: subparsers = parser.add_subparsers(required=True, dest="subcommand") # BUILD - build_parser = subparsers.add_parser("build", help="Build Cell Census") + build_parser = subparsers.add_parser("build", help="Build the Census") build_parser.add_argument( "--manifest", type=argparse.FileType("r"), @@ -71,7 +71,7 @@ def create_args_parser() -> argparse.ArgumentParser: build_parser.add_argument("--test-disable-dirty-git-check", action=argparse.BooleanOptionalAction) # VALIDATE - subparsers.add_parser("validate", help="Validate an existing cell census build") + subparsers.add_parser("validate", help="Validate an existing Census build") return parser diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/datasets.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/datasets.py index b5578602d..e02bdaa7a 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/datasets.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/build_soma/datasets.py @@ -64,7 +64,7 @@ def assign_dataset_soma_joinids(datasets: List[Dataset]) -> None: def create_dataset_manifest(info_collection: soma.Collection, datasets: List[Dataset]) -> None: """ - Write the Cell Census `census_datasets` dataframe + Write the Census `census_datasets` dataframe """ logging.info("Creating dataset_manifest") manifest_df = Dataset.to_dataframe(datasets) diff --git a/tools/cellxgene_census_builder/src/cellxgene_census_builder/census_summary.py b/tools/cellxgene_census_builder/src/cellxgene_census_builder/census_summary.py index 25cf59217..48a9bfb8a 100644 --- a/tools/cellxgene_census_builder/src/cellxgene_census_builder/census_summary.py +++ b/tools/cellxgene_census_builder/src/cellxgene_census_builder/census_summary.py @@ -158,15 +158,15 @@ def display_diff( def create_args_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(prog="cellxgene_census_summary") - parser.add_argument("-c", "--census-version", default="latest", help="Version of the census. Defaults to latest") + parser.add_argument("-c", "--census-version", default="latest", help="Version of the Census. Defaults to latest") subparsers = parser.add_subparsers(required=True, dest="subcommand") # BUILD - subparsers.add_parser("summarize", help="Summarize the cell census") + subparsers.add_parser("summarize", help="Summarize the Census") # VALIDATE - diff_parser = subparsers.add_parser("diff", help="Shows the diff with a previous census version") - diff_parser.add_argument("-p", "--previous-version", help="Version of the census to diff") + diff_parser = subparsers.add_parser("diff", help="Shows the diff with a previous Census version") + diff_parser.add_argument("-p", "--previous-version", help="Version of the Census to diff") return parser diff --git a/tools/cellxgene_census_builder/tests/test_builder.py b/tools/cellxgene_census_builder/tests/test_builder.py index 8349ddb1a..d99e6deca 100644 --- a/tools/cellxgene_census_builder/tests/test_builder.py +++ b/tools/cellxgene_census_builder/tests/test_builder.py @@ -111,7 +111,7 @@ def test_base_builder_creation( def test_unicode_support(tmp_path: pathlib.Path) -> None: """ Regression test that unicode is supported correctly in tiledbsoma. - This test is not strictly necessary, but it validates the requirements that Cell Census + This test is not strictly necessary, but it validates the requirements that Census support unicode in DataFrame columns. """ pd_df = pd.DataFrame(data={"value": ["Ünicode", "S̈upport"]}, columns=["value"])