From 72cc867726935e59fa5b53a94524d3313ebd06b9 Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 12:51:55 -0400 Subject: [PATCH 1/7] Update lifecycle tags for non-experimental Python API to "maturing" --- .../cellxgene_census/src/cellxgene_census/_experiment.py | 2 +- .../cellxgene_census/src/cellxgene_census/_get_anndata.py | 2 +- api/python/cellxgene_census/src/cellxgene_census/_open.py | 6 +++--- .../src/cellxgene_census/_presence_matrix.py | 2 +- .../src/cellxgene_census/_release_directory.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/_experiment.py b/api/python/cellxgene_census/src/cellxgene_census/_experiment.py index 8f5b3f5fd..c1a372547 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_experiment.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_experiment.py @@ -31,7 +31,7 @@ def _get_experiment(census: soma.Collection, organism: str) -> soma.Experiment: ValueError: if unable to find the specified organism. Lifecycle: - Experimental. + Maturing. Examples: diff --git a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py index 271a9498c..faff16bdf 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py @@ -63,7 +63,7 @@ def get_anndata( An :class:`anndata.AnnData` object containing the census slice. Lifecycle: - Experimental. + Maturing. Examples: >>> get_anndata(census, "Mus musculus", obs_value_filter="tissue_general in ['brain', 'lung']") diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py index 2b126c3ea..1253aa12d 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_open.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_open.py @@ -89,7 +89,7 @@ def open_soma( or a version are specified. Lifecycle: - Experimental. + Maturing. Examples: Open the default Census version, using a context manager which will automatically @@ -169,7 +169,7 @@ def get_source_h5ad_uri(dataset_id: str, *, census_version: str = "latest") -> C KeyError: if either `dataset_id` or `census_version` do not exist. Lifecycle: - Experimental. + Maturing. Examples: >>> cellxgene_census.get_source_h5ad_uri("cb5efdb0-f91c-4cbd-9ad4-9d4fa41c572d") @@ -206,7 +206,7 @@ def download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = an existing file), or is not a file. Lifecycle: - Experimental. + Maturing. See Also: :func:`get_source_h5ad_uri`: Look up the location of the source H5AD. diff --git a/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py b/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py index 2ab5c0d5c..e20321381 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py @@ -38,7 +38,7 @@ def get_presence_matrix( ValueError: if the organism cannot be found. Lifecycle: - Experimental. + Maturing. Examples: >>> get_presence_matrix(census, "Homo sapiens", "RNA") diff --git a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py index 3d61ac18c..6ed86a327 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py @@ -54,7 +54,7 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript KeyError: if unknown census_version value. Lifecycle: - Experimental. + Maturing. See Also: :func:`get_census_version_directory`: returns the entire directory as a dict. @@ -83,7 +83,7 @@ def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescr A dictionary that contains release names and their corresponding release description. Lifecycle: - Experimental. + Maturing. See Also: :func:`get_census_version_description`: get description by census_version. From e20d7cdf6d59b82f8ed6213249af5fa63e159b3f Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 12:53:02 -0400 Subject: [PATCH 2/7] Update lifecycle tags for experimental Python API to "experimental" --- .../experimental/ml/pytorch.py | 75 ++++++++++++++++++- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py index 8c89536ae..71d8f9adf 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py @@ -34,6 +34,13 @@ @attrs class Stats: + """ + Statistics about the data retrieved by ExperimentDataPipe from TileDB-SOMA. + + Lifecycle: + Maturing. + """ + n_obs: int = 0 """The total number of obs rows retrieved""" @@ -223,6 +230,9 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsDatum]]): # type: ignore attribute: exp_data_pipe.obs_encoders()[""].inverse_transform(encoded_values) + + Lifecycle: + Maturing. """ _query: Optional[soma.ExperimentAxisQuery] @@ -252,6 +262,19 @@ def __init__( num_workers: int = 0, soma_buffer_bytes: Optional[int] = None, ) -> None: + """ + Construct a new ExperimentDataPipe. + + Args: + + Returns: ExperimentDataPipe + + Examples: + + Lifecycle: + Experimental. + """ + self.exp_uri = experiment.uri self.aws_region = experiment.context.tiledb_ctx.config().get("vfs.s3.region") self.measurement_name = measurement_name @@ -356,6 +379,18 @@ def __setstate__(self, state: Dict[str, Any]) -> None: self._query = None def obs_encoders(self) -> Encoders: + """ + Returns the encoders were used to encode obs column values and that are needed to decode them. + + Args: + + Returns: ```Dict[str, LabelEncoder]``` mapping column names to ```LabelEncoder```s. + + Examples: + + Lifecycle: + Experimental. + """ if self._encoders is not None: return self._encoders @@ -373,10 +408,37 @@ def obs_encoders(self) -> Encoders: return self._encoders def stats(self) -> Stats: + """ + Get data loading stats for this ExperimentDataPipe. + + Args: None. + + Returns: ```Stats``` object + + Examples: + + Lifecycle: + Experimental. + """ return self._stats @property def shape(self) -> Tuple[int, int]: + """ + Get the shape of the data that will be returned by this ExperimentDataPipe. This is the number of + observations (cells) and variables (features) in the returned data. If used in multiprocessing mode + (i.e. DataLoader instantiated with num_workers > 0), the observations (cell) count will reflect the size of the + partition of the data that is handled by ths active process. + + Args: + + Returns: 2-Tuple of ints, for obs and var counts, respectively + + Examples: + + Lifecycle: + Experimental. + """ self._init() assert self._query is not None @@ -384,7 +446,7 @@ def shape(self) -> Tuple[int, int]: # Note: must be a top-level function (and not a lambda), to play nice with multiprocessing pickling -def collate_noop(x: Any) -> Any: +def _collate_noop(x: Any) -> Any: return x @@ -398,6 +460,15 @@ def experiment_dataloader( Factory method for PyTorch DataLoader. Provides a safer, more convenient interface for instantiating a DataLoader that works with the ExperimentDataPipe, since not all of DataLoader's params can be used (batch_size, sampler, batch_sampler, collate_fn). + + Args: TODO + + Returns: PyTorch ```DataLoader``` + + Examples: + + Lifecycle: + Experimental. """ unsupported_dataloader_args = ["batch_size", "sampler", "batch_sampler", "collate_fn"] @@ -409,7 +480,7 @@ def experiment_dataloader( batch_size=None, # batching is handled by our ExperimentDataPipe num_workers=num_workers, # avoid use of default collator, which adds an extra (3rd) dimension to the tensor batches - collate_fn=collate_noop, + collate_fn=_collate_noop, **dataloader_kwargs, ) From 65e70a20063dd91b5c6c023c44fac98b8b1e7518 Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 12:53:29 -0400 Subject: [PATCH 3/7] export public names for experimental ml package --- .../src/cellxgene_census/experimental/ml/__init__.py | 7 +++++++ api/python/notebooks/ml_demo/pytorch_lr_classifier.py | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/__init__.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/__init__.py index e69de29bb..ab62b4e63 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/__init__.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/__init__.py @@ -0,0 +1,7 @@ +""" +An API to facilitate use of PyTorch ML training with data from the CZI Science CELLxGENE Census. +""" + +from .pytorch import ExperimentDataPipe, Stats, experiment_dataloader + +__all__ = ["Stats", "ExperimentDataPipe", "experiment_dataloader"] diff --git a/api/python/notebooks/ml_demo/pytorch_lr_classifier.py b/api/python/notebooks/ml_demo/pytorch_lr_classifier.py index 711108117..a8ef9c68e 100644 --- a/api/python/notebooks/ml_demo/pytorch_lr_classifier.py +++ b/api/python/notebooks/ml_demo/pytorch_lr_classifier.py @@ -3,7 +3,7 @@ import torch import cellxgene_census -from cellxgene_census.experimental.ml.pytorch import experiment_dataloader, ExperimentDataPipe +import cellxgene_census.experimental.ml as census_ml # TODO: Convert this to a notebook @@ -82,7 +82,7 @@ def run(): obs_value_filter = "tissue_general == 'tongue' and is_primary_data == True" var_value_filter = "" - exp_dp = ExperimentDataPipe( + exp_dp = census_ml.ExperimentDataPipe( census["census_data"]["homo_sapiens"], measurement_name="RNA", X_name="raw", @@ -95,7 +95,7 @@ def run(): dp = exp_dp.shuffle(buffer_size=len(exp_dp)) dp_train, dp_test = dp.random_split(weights={"train": 0.7, "test": 0.3}, seed=RANDOM_SEED) - dl_train = experiment_dataloader( + dl_train = census_ml.experiment_dataloader( dp_train, # >= 1 uses multiprocessing to load data num_workers=0, From 08ac5d43a7d3b66843c0812661e14cedcd723acd Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 12:56:27 -0400 Subject: [PATCH 4/7] fix copy/paste error --- .../src/cellxgene_census/experimental/ml/pytorch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py index 71d8f9adf..a6b4774cc 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py @@ -38,7 +38,7 @@ class Stats: Statistics about the data retrieved by ExperimentDataPipe from TileDB-SOMA. Lifecycle: - Maturing. + Experimental. """ n_obs: int = 0 @@ -232,7 +232,7 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsDatum]]): # type: ignore exp_data_pipe.obs_encoders()[""].inverse_transform(encoded_values) Lifecycle: - Maturing. + Experimental. """ _query: Optional[soma.ExperimentAxisQuery] From 7b2865a3a1e2c0970ad052624e90e1b2845f7e23 Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 13:05:34 -0400 Subject: [PATCH 5/7] docstring formatting --- .../experimental/ml/pytorch.py | 75 ++++++++++--------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py index a6b4774cc..f7fda8c08 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py @@ -35,7 +35,7 @@ @attrs class Stats: """ - Statistics about the data retrieved by ExperimentDataPipe from TileDB-SOMA. + Statistics about the data retrieved by ``ExperimentDataPipe`` via SOMA API. Lifecycle: Experimental. @@ -78,21 +78,21 @@ def _open_experiment( class _ObsAndXIterator(Iterator[ObsDatum]): """ - Iterates through a set of obs and related X rows, specified as `soma_joinid`s. Encapsulates the batch-based data - fetching from TileDB-SOMA objects, providing row-based iteration. + Iterates through a set of obs and related X rows, specified as ``soma_joinid``s. Encapsulates the batch-based data + fetching from SOMA objects, providing row-based iteration. """ obs_tables_iter: somacore.ReadIter[pa.Table] - """Iterates the TileDB-SOMA batches (tables) of obs data""" + """Iterates the SOMA batches (tables) of obs data""" obs_batch_: pd.DataFrame = pd.DataFrame() - """The current TileDB-SOMA batch of obs data""" + """The current SOMA batch of obs data""" X_batch: scipy.matrix = None - """All X data for the soma_joinids of the current obs - batch""" + """All X data for the ``soma_joinid``s of the current obs - batch""" i: int = -1 - """Index into current obs TileDB-SOMA batch""" + """Index into current obs ``SOMA`` batch""" def __init__( self, @@ -184,7 +184,7 @@ def obs_batch(self) -> pd.DataFrame: Returns the current SOMA batch of obs rows. If the current SOMA batch has been fully iterated, loads the next SOMA batch of both obs and X data and returns the new obs batch (only). - Raises StopIteration if there are no more SOMA batches to retrieve. + Raises ``StopIteration`` if there are no more SOMA batches to retrieve. """ if 0 <= self.i < len(self.obs_batch_): return self.obs_batch_ @@ -210,33 +210,33 @@ def obs_batch(self) -> pd.DataFrame: class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsDatum]]): # type: ignore """ - An iterable-style PyTorch data pipe that reads obs and X data from a SOMA Experiment, and returns an iterator of - tuples of torch tensors: + An iterable-style PyTorch ``DataPipe`` that reads obs and X data from a SOMA Experiment, and returns an iterator of + tuples of PyTorch ``Tensor``s: - (tensor([0., 0., 0., 0., 0., 1., 0., 0., 0.]), # X data - tensor([2415, 0, 0], dtype=torch.int32)) # obs data, encoded + >>> (tensor([0., 0., 0., 0., 0., 1., 0., 0., 0.]), # X data + tensor([2415, 0, 0], dtype=torch.int32)) # obs data, encoded Supports batching via `batch_size` param: - DataLoader(..., batch_size=3, ...): - (tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0.], # X batch - [0., 0., 0., 0., 0., 0., 0., 0., 0.], - [0., 0., 0., 0., 0., 0., 0., 0., 0.]]), - tensor([[2415, 0, 0], # obs batch - [2416, 0, 4], - [2417, 0, 3]], dtype=torch.int32)) + >>> DataLoader(..., batch_size=3, ...): + (tensor([[0., 0., 0., 0., 0., 1., 0., 0., 0.], # X batch + [0., 0., 0., 0., 0., 0., 0., 0., 0.], + [0., 0., 0., 0., 0., 0., 0., 0., 0.]]), + tensor([[2415, 0, 0], # obs batch + [2416, 0, 4], + [2417, 0, 3]], dtype=torch.int32)) Obs attribute values are encoded as categoricals. Values can be decoded by obtaining the encoder for a given attribute: - exp_data_pipe.obs_encoders()[""].inverse_transform(encoded_values) + >>> exp_data_pipe.obs_encoders()[""].inverse_transform(encoded_values) Lifecycle: Experimental. """ _query: Optional[soma.ExperimentAxisQuery] - """In multi-processing mode (i.e. num_workers > 0), this ExperimentAxisQuery object will *not* be pickled; + """In multi-processing mode (i.e. num_workers > 0), this ``ExperimentAxisQuery`` object will *not* be pickled; each worker will instantiate a new query""" _obs_joinids_partitioned: Optional[pa.Array] @@ -263,11 +263,12 @@ def __init__( soma_buffer_bytes: Optional[int] = None, ) -> None: """ - Construct a new ExperimentDataPipe. + Construct a new ``ExperimentDataPipe``. Args: - Returns: ExperimentDataPipe + Returns: + ``ExperimentDataPipe``. Examples: @@ -380,11 +381,12 @@ def __setstate__(self, state: Dict[str, Any]) -> None: def obs_encoders(self) -> Encoders: """ - Returns the encoders were used to encode obs column values and that are needed to decode them. + Returns the encoders that were used to encode obs column values and that are needed to decode them. Args: - Returns: ```Dict[str, LabelEncoder]``` mapping column names to ```LabelEncoder```s. + Returns: + ``Dict[str, LabelEncoder]`` mapping column names to ``LabelEncoder``s. Examples: @@ -409,11 +411,12 @@ def obs_encoders(self) -> Encoders: def stats(self) -> Stats: """ - Get data loading stats for this ExperimentDataPipe. + Get data loading stats for this ``ExperimentDataPipe``. Args: None. - Returns: ```Stats``` object + Returns: + ``Stats`` object. Examples: @@ -426,13 +429,14 @@ def stats(self) -> Stats: def shape(self) -> Tuple[int, int]: """ Get the shape of the data that will be returned by this ExperimentDataPipe. This is the number of - observations (cells) and variables (features) in the returned data. If used in multiprocessing mode - (i.e. DataLoader instantiated with num_workers > 0), the observations (cell) count will reflect the size of the - partition of the data that is handled by ths active process. + obs (cell) and var (feature) counts in the returned data. If used in multiprocessing mode + (i.e. DataLoader instantiated with num_workers > 0), the obs (cell) count will reflect the size of the + partition of the data assigned to the active process. Args: - Returns: 2-Tuple of ints, for obs and var counts, respectively + Returns: + 2-tuple of ``int``s, for obs and var counts, respectively. Examples: @@ -457,13 +461,14 @@ def experiment_dataloader( **dataloader_kwargs: Any, ) -> DataLoader: """ - Factory method for PyTorch DataLoader. Provides a safer, more convenient interface for instantiating a DataLoader - that works with the ExperimentDataPipe, since not all of DataLoader's params can be used (batch_size, sampler, - batch_sampler, collate_fn). + Factory method for PyTorch ``DataLoader``. Provides a safer, more convenient interface for instantiating a + ``DataLoader`` that works with the ``ExperimentDataPipe``, since not all of ``DataLoader``'s params can be + used (``batch_size``, ``sampler``, ``batch_sampler``, ``collate_fn``). Args: TODO - Returns: PyTorch ```DataLoader``` + Returns: + PyTorch ``DataLoader``. Examples: From 72731683b137dd611f3c0b3a88e439c121a73c68 Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 13:58:41 -0400 Subject: [PATCH 6/7] fix case/punctuation for lifecycle tags --- .../src/cellxgene_census/_experiment.py | 2 +- .../src/cellxgene_census/_get_anndata.py | 2 +- .../cellxgene_census/src/cellxgene_census/_open.py | 6 +++--- .../src/cellxgene_census/_presence_matrix.py | 2 +- .../src/cellxgene_census/_release_directory.py | 4 ++-- .../cellxgene_census/experimental/ml/pytorch.py | 14 +++++++------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/_experiment.py b/api/python/cellxgene_census/src/cellxgene_census/_experiment.py index c1a372547..f8edb0ce2 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_experiment.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_experiment.py @@ -31,7 +31,7 @@ def _get_experiment(census: soma.Collection, organism: str) -> soma.Experiment: ValueError: if unable to find the specified organism. Lifecycle: - Maturing. + maturing Examples: diff --git a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py index faff16bdf..9d5c6f068 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py @@ -63,7 +63,7 @@ def get_anndata( An :class:`anndata.AnnData` object containing the census slice. Lifecycle: - Maturing. + maturing Examples: >>> get_anndata(census, "Mus musculus", obs_value_filter="tissue_general in ['brain', 'lung']") diff --git a/api/python/cellxgene_census/src/cellxgene_census/_open.py b/api/python/cellxgene_census/src/cellxgene_census/_open.py index 1253aa12d..11d977c4a 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_open.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_open.py @@ -89,7 +89,7 @@ def open_soma( or a version are specified. Lifecycle: - Maturing. + maturing Examples: Open the default Census version, using a context manager which will automatically @@ -169,7 +169,7 @@ def get_source_h5ad_uri(dataset_id: str, *, census_version: str = "latest") -> C KeyError: if either `dataset_id` or `census_version` do not exist. Lifecycle: - Maturing. + maturing Examples: >>> cellxgene_census.get_source_h5ad_uri("cb5efdb0-f91c-4cbd-9ad4-9d4fa41c572d") @@ -206,7 +206,7 @@ def download_source_h5ad(dataset_id: str, to_path: str, *, census_version: str = an existing file), or is not a file. Lifecycle: - Maturing. + maturing See Also: :func:`get_source_h5ad_uri`: Look up the location of the source H5AD. diff --git a/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py b/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py index e20321381..bd6f57e9f 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_presence_matrix.py @@ -38,7 +38,7 @@ def get_presence_matrix( ValueError: if the organism cannot be found. Lifecycle: - Maturing. + maturing Examples: >>> get_presence_matrix(census, "Homo sapiens", "RNA") diff --git a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py index 6ed86a327..3f8f43b32 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py +++ b/api/python/cellxgene_census/src/cellxgene_census/_release_directory.py @@ -54,7 +54,7 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript KeyError: if unknown census_version value. Lifecycle: - Maturing. + maturing See Also: :func:`get_census_version_directory`: returns the entire directory as a dict. @@ -83,7 +83,7 @@ def get_census_version_directory() -> Dict[CensusVersionName, CensusVersionDescr A dictionary that contains release names and their corresponding release description. Lifecycle: - Maturing. + maturing See Also: :func:`get_census_version_description`: get description by census_version. diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py index f7fda8c08..0a22f423d 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py @@ -38,7 +38,7 @@ class Stats: Statistics about the data retrieved by ``ExperimentDataPipe`` via SOMA API. Lifecycle: - Experimental. + experimental """ n_obs: int = 0 @@ -232,7 +232,7 @@ class ExperimentDataPipe(pipes.IterDataPipe[Dataset[ObsDatum]]): # type: ignore >>> exp_data_pipe.obs_encoders()[""].inverse_transform(encoded_values) Lifecycle: - Experimental. + experimental """ _query: Optional[soma.ExperimentAxisQuery] @@ -273,7 +273,7 @@ def __init__( Examples: Lifecycle: - Experimental. + experimental """ self.exp_uri = experiment.uri @@ -391,7 +391,7 @@ def obs_encoders(self) -> Encoders: Examples: Lifecycle: - Experimental. + experimental """ if self._encoders is not None: return self._encoders @@ -421,7 +421,7 @@ def stats(self) -> Stats: Examples: Lifecycle: - Experimental. + experimental """ return self._stats @@ -441,7 +441,7 @@ def shape(self) -> Tuple[int, int]: Examples: Lifecycle: - Experimental. + experimental """ self._init() assert self._query is not None @@ -473,7 +473,7 @@ def experiment_dataloader( Examples: Lifecycle: - Experimental. + experimental """ unsupported_dataloader_args = ["batch_size", "sampler", "batch_sampler", "collate_fn"] From 9f95b53f1ca4fce1875cd78b12a6222c9beffa95 Mon Sep 17 00:00:00 2001 From: Andrew Tolopko Date: Tue, 30 May 2023 14:01:21 -0400 Subject: [PATCH 7/7] rm empty docstring section headers --- .../experimental/ml/pytorch.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py index 0a22f423d..153584bd9 100644 --- a/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py +++ b/api/python/cellxgene_census/src/cellxgene_census/experimental/ml/pytorch.py @@ -265,13 +265,9 @@ def __init__( """ Construct a new ``ExperimentDataPipe``. - Args: - Returns: ``ExperimentDataPipe``. - Examples: - Lifecycle: experimental """ @@ -383,13 +379,9 @@ def obs_encoders(self) -> Encoders: """ Returns the encoders that were used to encode obs column values and that are needed to decode them. - Args: - Returns: ``Dict[str, LabelEncoder]`` mapping column names to ``LabelEncoder``s. - Examples: - Lifecycle: experimental """ @@ -418,8 +410,6 @@ def stats(self) -> Stats: Returns: ``Stats`` object. - Examples: - Lifecycle: experimental """ @@ -433,13 +423,9 @@ def shape(self) -> Tuple[int, int]: (i.e. DataLoader instantiated with num_workers > 0), the obs (cell) count will reflect the size of the partition of the data assigned to the active process. - Args: - Returns: 2-tuple of ``int``s, for obs and var counts, respectively. - Examples: - Lifecycle: experimental """ @@ -465,13 +451,9 @@ def experiment_dataloader( ``DataLoader`` that works with the ``ExperimentDataPipe``, since not all of ``DataLoader``'s params can be used (``batch_size``, ``sampler``, ``batch_sampler``, ``collate_fn``). - Args: TODO - Returns: PyTorch ``DataLoader``. - Examples: - Lifecycle: experimental """