diff --git a/api/python/cell_census/src/cell_census/_experiment.py b/api/python/cell_census/src/cell_census/_experiment.py index 6ed258165..c0f6a4c80 100644 --- a/api/python/cell_census/src/cell_census/_experiment.py +++ b/api/python/cell_census/src/cell_census/_experiment.py @@ -5,21 +5,21 @@ def _get_experiment(census: soma.Collection, organism: str) -> soma.Experiment: """ - Given a census soma.Collection, return the experiment for the named organism. + Given a census ``soma.Collection``, return the experiment for the named organism. Organism matching is somewhat flexible, attempting to map from human-friendly - names to the underlying collection element name. Will raise a ValueError if + names to the underlying collection element name. Will raise a ``ValueError`` if unable to find the specified organism [lifecycle: experimental]. Parameters ---------- - census - soma.Collection + census - ``soma.Collection`` The census - organism - str + organism - ``str`` The organism name, eg., ``Homo sapiens`` Returns ------- - soma.Experiment - the requested experiment. + ``soma.Experiment`` - the requested experiment. Examples -------- diff --git a/api/python/cell_census/src/cell_census/_get_anndata.py b/api/python/cell_census/src/cell_census/_get_anndata.py index 03a1b61a1..e79b132a7 100644 --- a/api/python/cell_census/src/cell_census/_get_anndata.py +++ b/api/python/cell_census/src/cell_census/_get_anndata.py @@ -24,37 +24,37 @@ def get_anndata( column_names: Optional[AxisColumnNames] = None, ) -> anndata.AnnData: """ - Convience wrapper around soma.Experiment query, to build and execute a query, - and return it as an AnnData object [lifecycle: experimental]. + Convience wrapper around ``soma.Experiment`` query, to build and execute a query, + and return it as an ``AnnData`` object [lifecycle: experimental]. Parameters ---------- - census : soma.Collection + census : ``soma.Collection`` The census object, usually returned by `cell_census.open_soma()` - organism : str + organism : ``str`` The organism to query, usually one of "Homo sapiens" or "Mus musculus" - measurement_name : str, default 'RNA' + measurement_name : ``str``, default ``"RNA"`` The measurement object to query - X_name : str, default "raw" - The X layer to query - obs_value_filter: str, default None + X_name : str, default ``"raw"`` + The ``X`` layer to query + obs_value_filter: ``str``, default ``None`` Value filter for the ``obs`` metadata. Value is a filter query written in the SOMA ``value_filter`` syntax. - obs_coords: tuple[int, slice or NumPy ArrayLike of int], default None + obs_coords: ``tuple``[``int``, slice or NumPy ArrayLike of ``int``], default ``None`` Coordinates for the ``obs`` axis, which is indexed by the ``soma_joinid`` value. - May be an int, a list of int, or a slice. The default, None, selects all. - var_value_filter: str, default None + May be an ``int``, a list of ``int``, or a slice. The default, ``None``, selects all. + var_value_filter: ``str``, default ``None`` Value filter for the ``var`` metadata. Value is a filter query written in the SOMA ``value_filter`` syntax. - var_coords: tuple[int, slice or NumPy ArrayLike of int], default None + var_coords: ``tuple``[``int``, slice or NumPy ArrayLike of ``int``], default ``None`` Coordinates for the ``var`` axis, which is indexed by the ``soma_joinid`` value. - May be an int, a list of int, or a slice. The default, None, selects all. - column_names: dict[Literal['obs', 'var'], List[str]] - Colums to fetch for obs and var dataframes. + May be an ``int``, a list of ``int``, or a slice. The default, ``None``, selects all. + column_names: ``dict[Literal['obs', 'var'], List[str]]`` + Colums to fetch for ``obs`` and ``var`` dataframes. Returns ------- - anndata.AnnData - containing the census slice + ``anndata.AnnData`` - containing the census slice Examples -------- diff --git a/api/python/cell_census/src/cell_census/_open.py b/api/python/cell_census/src/cell_census/_open.py index d43b6d7ed..f220fd61f 100644 --- a/api/python/cell_census/src/cell_census/_open.py +++ b/api/python/cell_census/src/cell_census/_open.py @@ -43,14 +43,14 @@ def open_soma( context: Optional[soma.options.SOMATileDBContext] = None, ) -> soma.Collection: """ - Open the Cell Census by version or URI, returning a soma.Collection containing the + Open the Cell Census by version or URI, returning a ``soma.Collection`` containing the top-level census. Raises error if ``census_version`` is specified and unknown, or if neither ``uri`` or ``census_version`` are specified, or if the ``uri`` can not be opened [lifecycle: experimental]. Parameters ---------- - census_version : Optional[str] + census_version : ``Optional[str]`` The version of the Census, e.g., "latest" uri : Optional[str] The URI containing the Census SOMA objects. If specified, will take precedence @@ -59,7 +59,7 @@ def open_soma( Returns ------- - soma.Collection : returns a SOMA Collection object. Can be used as a context manager, which + ``soma.Collection`` : returns a SOMA Collection object. Can be used as a context manager, which will automatically close upon exit. Examples @@ -71,19 +71,23 @@ def open_soma( ... Open and close: + >>> census = cell_census.open_soma() ... census.close() Open a specific Cell Census by version: + >>> with cell_census.open_soma("2022-12-31") as census: ... Open a Cell Census by S3 URI, rather than by version. + >>> with cell_census.open_soma(uri="s3://bucket/path") as census: ... Open a Cell Census by path (file:// URI), rather than by version. + >>> with cell_census.open_soma(uri="/tmp/census") as census: ... """ @@ -100,20 +104,20 @@ def open_soma( def get_source_h5ad_uri(dataset_id: str, *, census_version: str = "latest") -> CensusLocator: """ - Open the named version of the census, and return the URI for the dataset_id. This + Open the named version of the census, and return the URI for the ``dataset_id``. This does not guarantee that the H5AD exists or is accessible to the user. Raises an - error if dataset_id or census_version are unknown [lifecycle: experimental]. + error if ``dataset_id`` or ``census_version`` are unknown [lifecycle: experimental]. Parameters ---------- - dataset_id : str - The dataset_id of interest - census_version : Optional[str] + dataset_id : ``str`` + The ``dataset_id`` of interest + census_version : ``Optional[str]`` The census version Returns ------- - CensusLocator : the URI and optional S3 region for the source H5AD + ``CensusLocator`` : the URI and optional S3 region for the source H5AD Examples -------- @@ -136,17 +140,17 @@ def get_source_h5ad_uri(dataset_id: str, *, census_version: str = "latest") -> C def download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = "latest") -> None: """ - Download the source H5AD dataset, for the given dataset_id, to the user-specified + Download the source H5AD dataset, for the given ``dataset_id``, to the user-specified file name. Will raise an error if the path already exists (i.e., will not overwrite an existing file), or is not a file [lifecycle: experimental]. Parameters ---------- - dataset_id : str - Fetch the source (original) H5AD associated with this dataset_id. - to_path : str + dataset_id : ``str`` + Fetch the source (original) H5AD associated with this ``dataset_id``. + to_path : ``str`` The file name where the downloaded H5AD will be written. Must not already exist. - census_version : str + census_version : ``str`` The census version name. Defaults to ``latest``. Returns @@ -155,7 +159,7 @@ def download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = See Also -------- - get_source_h5ad_uri : Look up the location of the source H5AD. + ``get_source_h5ad_uri`` : Look up the location of the source H5AD. Examples -------- diff --git a/api/python/cell_census/src/cell_census/_presence_matrix.py b/api/python/cell_census/src/cell_census/_presence_matrix.py index 1a4e789e2..c133e96ea 100644 --- a/api/python/cell_census/src/cell_census/_presence_matrix.py +++ b/api/python/cell_census/src/cell_census/_presence_matrix.py @@ -11,23 +11,23 @@ def get_presence_matrix( ) -> sparse.csr_matrix: """ Read the gene presence matrix and return as a SciPy sparse CSR array - (scipy.sparse.csr_array). The returned sparse matrix is indexed on the + (``scipy.sparse.csr_array``). The returned sparse matrix is indexed on the first dimension by the dataset ``soma_joinid`` values, and on the second dimension by the ``var`` DataFrame ``soma_joinid`` values [lifecycle: experimental]. Parameters ---------- - census : soma.Collection + census : ``soma.Collection`` The census from which to read the presence matrix. - organism : str + organism : ``str`` The organism to query, usually one of "Homo sapiens" or "Mus musculus" - measurement_name : str, default 'RNA' + measurement_name : ``str``, default ``"RNA"`` The measurement object to query Returns ------- - scipy.sparse.csr_array - containing the presence matrix. + ``scipy.sparse.csr_array`` - containing the presence matrix. Examples -------- diff --git a/api/python/cell_census/src/cell_census/_release_directory.py b/api/python/cell_census/src/cell_census/_release_directory.py index 37a9112dd..de09d03a5 100644 --- a/api/python/cell_census/src/cell_census/_release_directory.py +++ b/api/python/cell_census/src/cell_census/_release_directory.py @@ -39,17 +39,17 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript Parameters ---------- - census_version : str + census_version : ``str`` The census version name. Returns ------- - CensusReleaseDescription + ``CensusVersionDescription`` Dictionary containing a description of the release. See Also -------- - get_census_version_directory : returns the entire directory as a dict. + ``get_census_version_directory`` : returns the entire directory as a dict. Examples -------- @@ -78,13 +78,13 @@ def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescr Returns ------- - Dict[CensusReleaseName, CensusReleaseDescription] + ``Dict[CensusVersionName, CensusVersionDescription]`` Dictionary of release names and their corresponding release description. See Also -------- - get_census_version_description : get description by census_version. + ``get_census_version_description`` : get description by census_version. Examples -------- diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..d4bb2cbb9 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/custom.css b/docs/_static/custom.css new file mode 100644 index 000000000..9ad37f429 --- /dev/null +++ b/docs/_static/custom.css @@ -0,0 +1,39 @@ +.wy-side-nav-search { + background-color: #fafafa; + color: #404040; +} + +.wy-side-nav-search > a { + color: #2980B9; +} + +.wy-side-nav-search > div.version { + color: rgba(64, 64, 64, 0.3); +} + +.red { + color: red; +} + +.green { + color: green; +}.wy-side-nav-search { + background-color: #fafafa; + color: #404040; +} + +.wy-side-nav-search > a { + color: #2980B9; +} + +.wy-side-nav-search > div.version { + color: rgba(64, 64, 64, 0.3); +} + +.red { + color: red; +} + +.green { + color: green; +} \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..7f207bfd4 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,37 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'cell-census' +copyright = '2022-2023 Chan Zuckerberg Initiative' +author = 'Chan Zuckerberg Initiative' + +version = "0.5" +release = "0.5.0" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ['sphinx.ext.autodoc'] + +templates_path = ['_templates'] +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +# html_theme = 'alabaster' +# html_static_path = ['_static'] + +import sphinx_rtd_theme +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] + +def setup(app): + app.add_css_file("custom.css") \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 000000000..375058178 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,28 @@ +.. cell-census documentation master file, created by + sphinx-quickstart on Thu Feb 16 11:22:43 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +.. meta:: + :name=robots: noindex + +Welcome to cell-census documentation! +======================================= + +Note: this site is currently under construction. The location will change in the future, so don't bookmark it. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + +Contents +-------- + +.. toctree:: + :maxdepth: 1 + + setup + python-api + schema \ No newline at end of file diff --git a/docs/python-api.rst b/docs/python-api.rst new file mode 100644 index 000000000..86203917a --- /dev/null +++ b/docs/python-api.rst @@ -0,0 +1,24 @@ +.. meta:: + :name=robots: noindex + +cell-census Python API Reference +============================== + +Open/retrieve Cell Census data +---- +.. autofunction:: cell_census.open_soma +.. autofunction:: cell_census.get_source_h5ad_uri +.. autofunction:: cell_census.download_source_h5ad + +Get slice as AnnData +---- +.. autofunction:: cell_census.get_anndata + +Feature presence matrix +---- +.. autofunction:: cell_census.get_presence_matrix + +Versioning of Cell Census builds +---- +.. autofunction:: cell_census.get_census_version_description +.. autofunction:: cell_census.get_census_version_directory \ No newline at end of file diff --git a/docs/schema.rst b/docs/schema.rst new file mode 100644 index 000000000..87cb93e92 --- /dev/null +++ b/docs/schema.rst @@ -0,0 +1,7 @@ +.. meta:: + :name=robots: noindex + +Schema +===== + +TODO \ No newline at end of file diff --git a/docs/setup.rst b/docs/setup.rst new file mode 100644 index 000000000..ebdf588a7 --- /dev/null +++ b/docs/setup.rst @@ -0,0 +1,62 @@ +.. meta:: + :name=robots: noindex + +Installation +===== + +Dependencies +---- + +You must be on a Linux or MacOS system, with the following installed: + +- Python 3.7 to 3.10 (3.11 is not yet supported) +- Jupyter or some other means of running notebooks (e.g., vscode) + +For now, it is recommended that you do all this on a host with sufficient memory, +and a high bandwidth connection to AWS S3 in the us-west-2 region, e.g., an m6i.8xlarge. +If you utilize AWS, Ubuntu 20 or 22 AMI are recommended (AWS AMI should work fine, but has +not been tested). + +I also recommend you use a ``d`` instance type, and mount all of the NVME drives as swap, +as it will keep you from running out of RAM. + + +Set up Python environment +---- + +1. (optional, but highly recommended) In your working directory, make and activate a virtual environment. For example: +:: + + $ python -m venv ./venv + $ source ./venv/bin/activate + +2. Install the ``cell_census`` package using ``pip``: +:: + + $ pip install -U cell-census + +3. Install other third-party packages needed to run the notebooks: +:: + + $ pip install scikit-misc scvi-tools + + +Verify your installation +---- + +Check that your installation works - this make take a few seconds, as it loads metadata from S3: +:: + + $ python -c 'import cell_census; print(cell_census.open_soma().soma_type)' + SOMACollection + +Latest development version +---- + +If you want to work with the latest development version of cell-census, you can simply clone the repository +and, from the root directory, install locally via pip: +:: + + $ git clone https://github.com/chanzuckerberg/cell-census.git + $ cd cell-census + $ pip install -e api/python/cell_census/ \ No newline at end of file