Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update cell census name in docstrings/md files #347

Merged
merged 11 commits into from
Apr 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Reference documentation can also be accessed directly from Python or R.
It is recommended to install the CELLxGENE Census and all of its dependencies in a new virtual environment via `pip`:

```
pip install -U cell-census
pip install -U cellxgene-census
```

#### Usage examples
Expand Down
16 changes: 8 additions & 8 deletions api/python/cellxgene_census/src/cellxgene_census/_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

"""Open census and related datasets

Contains methods to open publicly hosted versions of Cell Census object and access its source datasets.
Contains methods to open publicly hosted versions of Census object and access its source datasets.
"""

import os.path
Expand Down Expand Up @@ -51,7 +51,7 @@ def open_soma(
uri: Optional[str] = None,
context: Optional[soma.options.SOMATileDBContext] = None,
) -> soma.Collection:
"""Open the Cell Census by version or URI.
"""Open the Census by version or URI.

Args:
census_version:
Expand All @@ -74,8 +74,8 @@ def open_soma(
Experimental.

Examples:
Open the default Cell Census version, using a context manager which will automatically
close the census upon exit of the context.
Open the default Census version, using a context manager which will automatically
close the Census upon exit of the context.

>>> with cellxgene_census.open_soma() as census:
...
Expand All @@ -86,17 +86,17 @@ def open_soma(
...
census.close()

Open a specific Cell Census by version:
Open a specific Census by version:

>>> with cellxgene_census.open_soma("2022-12-31") as census:
...

Open a Cell Census by S3 URI, rather than by version.
Open a Census by S3 URI, rather than by version.

>>> with cellxgene_census.open_soma(uri="s3://bucket/path") as census:
...

Open a Cell Census by path (file:// URI), rather than by version.
Open a Census by path (file:// URI), rather than by version.

>>> with cellxgene_census.open_soma(uri="/tmp/census") as census:
...
Expand All @@ -106,7 +106,7 @@ def open_soma(
return _open_soma({"uri": uri, "s3_region": None}, context)

if census_version is None:
raise ValueError("Must specify either a cell census version or an explicit URI.")
raise ValueError("Must specify either a census version or an explicit URI.")

description = get_census_version_description(census_version) # raises
return _open_soma(description["soma"], context)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
#
# Licensed under the MIT License.

"""Versioning of Cell Census builds
"""Versioning of Census builds

Methods to retrieve information about versions of the publicly hosted Cell Census object.
Methods to retrieve information about versions of the publicly hosted Census object.
"""

from typing import Dict, Optional, Union, cast
Expand All @@ -13,7 +13,7 @@
from typing_extensions import TypedDict

"""
The following types describe the expected directory of Cell Census builds, used
The following types describe the expected directory of Census builds, used
to bootstrap all data location requests.
"""
CensusVersionName = str # census version name, e.g., "release-99", "2022-10-01-test", etc.
Expand Down Expand Up @@ -41,8 +41,7 @@


def get_census_version_description(census_version: str) -> CensusVersionDescription:
"""Get release description for given census version, from the Cell
Census release directory.
"""Get release description for given Census version, from the Census release directory.

Args:
census_version:
Expand Down Expand Up @@ -72,13 +71,13 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript
census_directory = get_census_version_directory()
description = census_directory.get(census_version, None)
if description is None:
raise KeyError(f"Unable to locate cell census version: {census_version}.")
raise KeyError(f"Unable to locate Census version: {census_version}.")
return description


def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescription]:
"""
Get the directory of cell census releases currently available.
Get the directory of Census releases currently available.

Returns:
A dictionary that contains release names and their corresponding release description.
Expand Down
18 changes: 9 additions & 9 deletions api/python/cellxgene_census/tests/README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
# Test README

This directory contains tests of the cell-census package API, _and_ the use of the API on the
live "corpus", i.e., data in the public cell census S3 bucket. The tests use Pytest, and have
This directory contains tests of the cellxgene-census package API, _and_ the use of the API on the
live "corpus", i.e., data in the public Census S3 bucket. The tests use Pytest, and have
Pytest marks to control which tests are run.

Tests can be run in the usual manner. First, ensure you have cell-census installed, e.g., from the top-level repo directory:
Tests can be run in the usual manner. First, ensure you have cellxgene-census installed, e.g., from the top-level repo directory:

> pip install -e ./api/python/cell_census/
> pip install -e ./api/python/cellxgene_census/

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

l don't know why but I can't suggest in line 13 which needs to be changed as well

From

> pytest ./api/python/cell_census/

To

> pytest ./api/python/cellxgene_census/

Then run the tests:

> pytest ./api/python/cell_census/
> pytest ./api/python/cellxgene_census/

## Pytest Marks

There are two Pytest marks you can use from the command line:

- live_corpus: tests that directly access the `latest` version of the Cell Census. Enabled by default.
- live_corpus: tests that directly access the `latest` version of the Census. Enabled by default.
- expensive: tests that are expensive (ie., cpu, memory, time). Disabled by default - enable with `--expensive`. Some of these tests are _very_ expensive, ie., require a very large memory host to succeed.

By default, only relatively cheap & fast tests are run. To enable `expensive` tests:
Expand All @@ -42,10 +42,10 @@ When run, please record the results in this file (below) and commit the change t
- EC2 instance type and any system config (i.e., swap)
- host and OS as reported by `uname -a`
- Python & package versions and OS - suggest capturing the output of `tiledbsoma.show_package_versions()`
- The Cell Census version used for the test (i.e., the version aliased as `latest`). This can be easily captured using `cell_census.get_census_version_description('latest')`
- the cell_census package version (ie., `cell_census.__version__`)
- The Census version used for the test (i.e., the version aliased as `latest`). This can be easily captured using `cellxgene_census.get_census_version_description('latest')`
- the cellxgene_census package version (ie., `cellxgene_census.__version__`)
- any run notes
- full output of: `pytest -v --durations=0 --expensive ./api/python/cell_census/tests/`
- full output of: `pytest -v --durations=0 --expensive ./api/python/cellxgene_census/tests/`

## 2023-03-29

Expand Down
2 changes: 1 addition & 1 deletion api/python/cellxgene_census/tests/test_acceptance.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Acceptance tests for the CELLxGENE Census.
Acceptance tests for the Census.

NOTE: those marked `expensive` are not run in the CI as they are, well, expensive...

Expand Down
14 changes: 7 additions & 7 deletions tools/cellxgene_census_builder/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# README

This package contains code to build and release the CELLxGENE Census in the SOMA format, as specified in the
This package contains code to build and release the Census in the SOMA format, as specified in the
[data schema](https://github.com/chanzuckerberg/cellxgene-census/blob/main/docs/cell_census_schema.md).

This tool is not intended for end-users - it is used by the CELLxGENE team to periodically create and release all
Expand Down Expand Up @@ -136,13 +136,13 @@ The build process:
- (Optional) Validate the entire Census, re-reading from storage.

Modes of operation:
a) (default) creating the entire "census" using all files currently in the CELLxGENE repository.
b) creating a smaller "census" from a user-provided list of files (a "manifest")
a) (default) creating the entire "Census" using all files currently in the CELLxGENE repository.
b) creating a smaller "Census" from a user-provided list of files (a "manifest")

#### Mode (a) - creating the full cell census from the entire CELLxGENE (public) corpus:
#### Mode (a) - creating the full Census from the entire CELLxGENE (public) corpus:

- On a large-memory machine with _ample_ free (local) disk (eg, 3/4 TB or more) and swap (1 TB or more)
- To create a cell census at `<census_path>`, execute:
- To create a Census at `<census_path>`, execute:
> $ python -m cellxgene_census_builder -mp --max-workers 12 <census_path> build
- Tips:
- `-v` to view info-level logging during run, or `-v -v` for debug-level logging
Expand All @@ -151,7 +151,7 @@ b) creating a smaller "census" from a user-provided list of files (a "manifest")

If you run out of memory, reduce `--max-workers`. You can also try a higher number if you have lots of CPU & memory.

#### Mode (b) - creating a cell census from a user-provided list of H5AD files:
#### Mode (b) - creating a Census from a user-provided list of H5AD files:

- Create a manifest file, in CSV format, containing two columns: dataset_id, h5ad_uri. Example:
```csv
Expand All @@ -160,5 +160,5 @@ If you run out of memory, reduce `--max-workers`. You can also try a higher numb
5b93b8fc-7c9a-45bd-ad3f-dc883137de30, /files/5b93b8fc-7c9a-45bd-ad3f-dc883137de30.h5ad
```
You can specify a file system path or a URI in the second field
- To create a cell census at `<census_path>`, execute:
- To create a Census at `<census_path>`, execute:
> $ python -m cellxgene_census_builder <census_path> build --manifest <the_manifest_file.csv>
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def do_create_reports(args: CensusBuildArgs) -> bool:


def create_args_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="cellxgene_census_builder", description="Build the official cell census.")
parser = argparse.ArgumentParser(prog="cellxgene_census_builder", description="Build the official Census.")
parser.add_argument("working_dir", type=str, help="Working directory for the build")
parser.add_argument(
"--test-resume",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def create_args_parser() -> argparse.ArgumentParser:
subparsers = parser.add_subparsers(required=True, dest="subcommand")

# BUILD
build_parser = subparsers.add_parser("build", help="Build Cell Census")
build_parser = subparsers.add_parser("build", help="Build the Census")
build_parser.add_argument(
"--manifest",
type=argparse.FileType("r"),
Expand All @@ -71,7 +71,7 @@ def create_args_parser() -> argparse.ArgumentParser:
build_parser.add_argument("--test-disable-dirty-git-check", action=argparse.BooleanOptionalAction)

# VALIDATE
subparsers.add_parser("validate", help="Validate an existing cell census build")
subparsers.add_parser("validate", help="Validate an existing Census build")

return parser

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def assign_dataset_soma_joinids(datasets: List[Dataset]) -> None:

def create_dataset_manifest(info_collection: soma.Collection, datasets: List[Dataset]) -> None:
"""
Write the Cell Census `census_datasets` dataframe
Write the Census `census_datasets` dataframe
"""
logging.info("Creating dataset_manifest")
manifest_df = Dataset.to_dataframe(datasets)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,15 +158,15 @@ def display_diff(

def create_args_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="cellxgene_census_summary")
parser.add_argument("-c", "--census-version", default="latest", help="Version of the census. Defaults to latest")
parser.add_argument("-c", "--census-version", default="latest", help="Version of the Census. Defaults to latest")
subparsers = parser.add_subparsers(required=True, dest="subcommand")

# BUILD
subparsers.add_parser("summarize", help="Summarize the cell census")
subparsers.add_parser("summarize", help="Summarize the Census")

# VALIDATE
diff_parser = subparsers.add_parser("diff", help="Shows the diff with a previous census version")
diff_parser.add_argument("-p", "--previous-version", help="Version of the census to diff")
diff_parser = subparsers.add_parser("diff", help="Shows the diff with a previous Census version")
diff_parser.add_argument("-p", "--previous-version", help="Version of the Census to diff")

return parser

Expand Down
2 changes: 1 addition & 1 deletion tools/cellxgene_census_builder/tests/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_base_builder_creation(
def test_unicode_support(tmp_path: pathlib.Path) -> None:
"""
Regression test that unicode is supported correctly in tiledbsoma.
This test is not strictly necessary, but it validates the requirements that Cell Census
This test is not strictly necessary, but it validates the requirements that Census
support unicode in DataFrame columns.
"""
pd_df = pd.DataFrame(data={"value": ["Ünicode", "S̈upport"]}, columns=["value"])
Expand Down