Skip to content

Commit

Permalink
Run linter
Browse files Browse the repository at this point in the history
  • Loading branch information
ivirshup committed Jul 2, 2024
1 parent 37a579f commit 415c34e
Show file tree
Hide file tree
Showing 16 changed files with 151 additions and 151 deletions.
49 changes: 25 additions & 24 deletions api/python/cellxgene_census/src/cellxgene_census/_get_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Methods to retrieve slices of the census as AnnData objects.
"""

from typing import Literal, Optional, Sequence
from collections.abc import Sequence
from typing import Literal
from warnings import warn

import anndata
Expand All @@ -27,20 +28,20 @@ def get_anndata(
organism: str,
measurement_name: str = "RNA",
X_name: str = "raw",
X_layers: Optional[Sequence[str]] = (),
obsm_layers: Optional[Sequence[str]] = (),
obsp_layers: Optional[Sequence[str]] = (),
varm_layers: Optional[Sequence[str]] = (),
varp_layers: Optional[Sequence[str]] = (),
obs_value_filter: Optional[str] = None,
obs_coords: Optional[SparseDFCoord] = None,
var_value_filter: Optional[str] = None,
var_coords: Optional[SparseDFCoord] = None,
column_names: Optional[soma.AxisColumnNames] = None,
obs_embeddings: Optional[Sequence[str]] = (),
var_embeddings: Optional[Sequence[str]] = (),
obs_column_names: Optional[Sequence[str]] = None,
var_column_names: Optional[Sequence[str]] = None,
X_layers: Sequence[str] | None = (),
obsm_layers: Sequence[str] | None = (),
obsp_layers: Sequence[str] | None = (),
varm_layers: Sequence[str] | None = (),
varp_layers: Sequence[str] | None = (),
obs_value_filter: str | None = None,
obs_coords: SparseDFCoord | None = None,
var_value_filter: str | None = None,
var_coords: SparseDFCoord | None = None,
column_names: soma.AxisColumnNames | None = None,
obs_embeddings: Sequence[str] | None = (),
var_embeddings: Sequence[str] | None = (),
obs_column_names: Sequence[str] | None = None,
var_column_names: Sequence[str] | None = None,
) -> anndata.AnnData:
"""Convenience wrapper around :class:`tiledbsoma.Experiment` query, to build and execute a query,
and return it as an :class:`anndata.AnnData` object.
Expand Down Expand Up @@ -176,9 +177,9 @@ def _get_axis_metadata(
axis: Literal["obs", "var"],
organism: str,
*,
value_filter: Optional[str] = None,
coords: Optional[SparseDFCoord] = slice(None),
column_names: Optional[Sequence[str]] = None,
value_filter: str | None = None,
coords: SparseDFCoord | None = slice(None),
column_names: Sequence[str] | None = None,
) -> pd.DataFrame:
exp = _get_experiment(census, organism)
coords = (slice(None),) if coords is None else (coords,)
Expand All @@ -198,9 +199,9 @@ def get_obs(
census: soma.Collection,
organism: str,
*,
value_filter: Optional[str] = None,
coords: Optional[SparseDFCoord] = slice(None),
column_names: Optional[Sequence[str]] = None,
value_filter: str | None = None,
coords: SparseDFCoord | None = slice(None),
column_names: Sequence[str] | None = None,
) -> pd.DataFrame:
"""Get the observation metadata for a query on the census.
Expand Down Expand Up @@ -230,9 +231,9 @@ def get_var(
census: soma.Collection,
organism: str,
*,
value_filter: Optional[str] = None,
coords: Optional[SparseDFCoord] = slice(None),
column_names: Optional[Sequence[str]] = None,
value_filter: str | None = None,
coords: SparseDFCoord | None = slice(None),
column_names: Sequence[str] | None = None,
) -> pd.DataFrame:
"""Get the variable metadata for a query on the census.
Expand Down
18 changes: 9 additions & 9 deletions api/python/cellxgene_census/src/cellxgene_census/_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import logging
import os.path
import urllib.parse
from typing import Any, Dict, Optional, get_args
from typing import Any, get_args

import s3fs
import tiledbsoma as soma
Expand All @@ -28,7 +28,7 @@

DEFAULT_CENSUS_VERSION = "stable"

DEFAULT_TILEDB_CONFIGURATION: Dict[str, Any] = {
DEFAULT_TILEDB_CONFIGURATION: dict[str, Any] = {
# https://docs.tiledb.com/main/how-to/configuration#configuration-parameters
"py.init_buffer_bytes": 1 * 1024**3,
"soma.init_buffer_bytes": 1 * 1024**3,
Expand Down Expand Up @@ -67,7 +67,7 @@ def _resolve_census_locator(locator: CensusLocator, mirror: CensusMirror) -> Res

def _open_soma(
locator: ResolvedCensusLocator,
context: Optional[soma.options.SOMATileDBContext] = None,
context: soma.options.SOMATileDBContext | None = None,
) -> soma.Collection:
"""Private. Merge config defaults and return open census as a soma Collection/context."""
# if no user-defined context, cellxgene_census defaults take precedence over SOMA defaults
Expand All @@ -81,7 +81,7 @@ def _open_soma(
return soma.open(locator["uri"], mode="r", soma_type=soma.Collection, context=context)


def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) -> soma.options.SOMATileDBContext:
def get_default_soma_context(tiledb_config: dict[str, Any] | None = None) -> soma.options.SOMATileDBContext:
"""Return a :class:`tiledbsoma.SOMATileDBContext` with sensible defaults that can be further customized by the
user. The customized context can then be passed to :func:`cellxgene_census.open_soma` with the ``context``
argument or to :meth:`somacore.SOMAObject.open` with the ``context`` argument, such as
Expand Down Expand Up @@ -126,11 +126,11 @@ def get_default_soma_context(tiledb_config: Optional[Dict[str, Any]] = None) ->

def open_soma(
*,
census_version: Optional[str] = DEFAULT_CENSUS_VERSION,
mirror: Optional[str] = None,
uri: Optional[str] = None,
tiledb_config: Optional[Dict[str, Any]] = None,
context: Optional[soma.options.SOMATileDBContext] = None,
census_version: str | None = DEFAULT_CENSUS_VERSION,
mirror: str | None = None,
uri: str | None = None,
tiledb_config: dict[str, Any] | None = None,
context: soma.options.SOMATileDBContext | None = None,
) -> soma.Collection:
"""Open the Census by version or URI.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
Methods to retrieve information about versions of the publicly hosted Census object.
"""

import typing
from collections import OrderedDict
from typing import Any, Dict, Literal, Optional, Union, cast
from typing import Any, Literal, cast

import requests
from typing_extensions import NotRequired, TypedDict
Expand All @@ -35,7 +34,7 @@ class CensusLocator(TypedDict):

uri: str
relative_uri: str
s3_region: Optional[str]
s3_region: str | None


class CensusVersionRetraction(TypedDict):
Expand All @@ -53,13 +52,13 @@ class CensusVersionRetraction(TypedDict):
"""

date: str
reason: Optional[str]
info_url: Optional[str]
replaced_by: Optional[str]
reason: str | None
info_url: str | None
replaced_by: str | None


ReleaseFlag = Literal["lts", "retracted"]
ReleaseFlags = Dict[ReleaseFlag, bool]
ReleaseFlags = dict[ReleaseFlag, bool]


class CensusVersionDescription(TypedDict):
Expand All @@ -80,15 +79,15 @@ class CensusVersionDescription(TypedDict):
If retracted, details of the retraction.
"""

release_date: Optional[str]
release_date: str | None
release_build: str
soma: CensusLocator
h5ads: CensusLocator
flags: NotRequired[ReleaseFlags]
retraction: NotRequired[CensusVersionRetraction]


CensusDirectory = Dict[CensusVersionName, Union[CensusVersionName, CensusVersionDescription]]
CensusDirectory = dict[CensusVersionName, CensusVersionName | CensusVersionDescription]

"""
A provider identifies a storage medium for the Census, which can either be a cloud provider or a local file.
Expand Down Expand Up @@ -130,10 +129,10 @@ class CensusMirror(TypedDict):

provider: Provider
base_uri: str
region: Optional[str]
region: str | None


CensusMirrors = Dict[CensusMirrorName, Union[CensusMirrorName, CensusMirror]]
CensusMirrors = dict[CensusMirrorName, CensusMirrorName | CensusMirror]


class ResolvedCensusLocator(TypedDict):
Expand All @@ -152,7 +151,7 @@ class ResolvedCensusLocator(TypedDict):
"""

uri: str
region: Optional[str]
region: str | None
provider: str


Expand Down Expand Up @@ -197,8 +196,8 @@ def get_census_version_description(census_version: str) -> CensusVersionDescript


def get_census_version_directory(
*, lts: Optional[bool] = None, retracted: Optional[bool] = False
) -> Dict[CensusVersionName, CensusVersionDescription]:
*, lts: bool | None = None, retracted: bool | None = False
) -> dict[CensusVersionName, CensusVersionDescription]:
"""Get the directory of Census versions currently available, optionally filtering by specified
flags. If a filtering flag is not specified, Census versions will not be filtered by that flag.
Defaults to including both "long-term stable" (LTS) and weekly Census versions, and excluding
Expand Down Expand Up @@ -355,7 +354,7 @@ def get_census_version_directory(

directory: dict[str, str | dict[str, Any]] = response.json()
directory_out: CensusDirectory = {}
aliases: typing.Set[CensusVersionName] = set()
aliases: set[CensusVersionName] = set()

# Resolve all aliases for easier use
for census_version_name in list(directory.keys()):
Expand Down Expand Up @@ -398,7 +397,7 @@ def get_census_version_directory(
directory_out[census_version_name] = census_version_description.copy()

# Cast is safe, as we have removed all aliases
unordered_directory = cast(Dict[CensusVersionName, CensusVersionDescription], directory_out)
unordered_directory = cast(dict[CensusVersionName, CensusVersionDescription], directory_out)

# Sort by aliases and release date, descending
aliased_releases = [(k, v) for k, v in unordered_directory.items() if k in aliases]
Expand All @@ -414,7 +413,7 @@ def get_census_version_directory(
return ordered_directory


def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
def get_census_mirror_directory() -> dict[CensusMirrorName, CensusMirror]:
"""Get the directory of Census mirrors currently available.
Returns:
Expand All @@ -426,7 +425,7 @@ def get_census_mirror_directory() -> Dict[CensusMirrorName, CensusMirror]:
"""
mirrors = _get_census_mirrors()
del mirrors["default"]
return cast(Dict[CensusMirrorName, CensusMirror], mirrors)
return cast(dict[CensusMirrorName, CensusMirror], mirrors)


def _get_census_mirrors() -> CensusMirrors:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import json
import warnings
from typing import Any, Dict, cast
from typing import Any, cast

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -53,7 +53,7 @@ def get_embedding_metadata(embedding_uri: str, context: soma.options.SOMATileDBC
embedding_metadata = json.loads(E.metadata["CxG_embedding_info"])
assert isinstance(embedding_metadata, dict)

return cast(Dict[str, Any], embedding_metadata)
return cast(dict[str, Any], embedding_metadata)


def _get_embedding(
Expand All @@ -65,7 +65,7 @@ def _get_embedding(
context: soma.options.SOMATileDBContext | None = None,
) -> npt.NDArray[np.float32]:
"""Private. Like get_embedding, but accepts a Census object and a Census directory."""
if isinstance(obs_soma_joinids, (pa.Array, pa.ChunkedArray, pd.Series)):
if isinstance(obs_soma_joinids, pa.Array | pa.ChunkedArray | pd.Series):
obs_soma_joinids = obs_soma_joinids.to_numpy()
assert isinstance(obs_soma_joinids, np.ndarray)
if obs_soma_joinids.dtype != np.int64:
Expand Down Expand Up @@ -192,7 +192,7 @@ def get_embedding_metadata_by_name(
response = requests.get(CELL_CENSUS_EMBEDDINGS_MANIFEST_URL)
response.raise_for_status()

manifest = cast(Dict[str, Dict[str, Any]], response.json())
manifest = cast(dict[str, dict[str, Any]], response.json())
embeddings = []
for _, obj in manifest.items():
if (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import uuid
from abc import ABC, abstractmethod
from typing import Any, Dict, Generator, Optional
from collections.abc import Generator
from typing import Any

import scipy.sparse
from datasets import Dataset
Expand Down Expand Up @@ -37,7 +38,7 @@ def __init__(
measurement_name: str = "RNA",
layer_name: str = "raw",
*,
block_size: Optional[int] = None,
block_size: int | None = None,
**kwargs: Any,
):
"""Initialize the CellDatasetBuilder to process the results of a Census
Expand All @@ -55,13 +56,13 @@ def __init__(
self.layer_name = layer_name
self.block_size = block_size

def build(self, from_generator_kwargs: Optional[Dict[str, Any]] = None) -> Dataset:
def build(self, from_generator_kwargs: dict[str, Any] | None = None) -> Dataset:
"""Build the dataset from query results.
- `from_generator_kwargs`: kwargs passed through to `Dataset.from_generator()`
"""

def gen() -> Generator[Dict[str, Any], None, None]:
def gen() -> Generator[dict[str, Any], None, None]:
for Xblock, (block_cell_joinids, _) in (
self.X(self.layer_name).blockwise(axis=0, reindex_disable_on_axis=[1], size=self.block_size).scipy()
):
Expand All @@ -72,7 +73,7 @@ def gen() -> Generator[Dict[str, Any], None, None]:
return Dataset.from_generator(_DatasetGeneratorPickleHack(gen), **(from_generator_kwargs or {}))

@abstractmethod
def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> Dict[str, Any]:
def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> dict[str, Any]:
"""Abstract method to process the X row for one cell into a Dataset item.
- `cell_joinid`: The cell `soma_joinid`.
Expand All @@ -85,7 +86,7 @@ def cell_item(self, cell_joinid: int, Xrow: scipy.sparse.csr_matrix) -> Dict[str
class _DatasetGeneratorPickleHack:
"""SEE: https://github.com/huggingface/datasets/issues/6194."""

def __init__(self, generator: Any, generator_id: Optional[str] = None) -> None:
def __init__(self, generator: Any, generator_id: str | None = None) -> None:
self.generator = generator
self.generator_id = generator_id if generator_id is not None else str(uuid.uuid4())

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pickle
from typing import Any, Dict, Optional, Sequence, Set
from collections.abc import Sequence
from typing import Any

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -42,7 +43,7 @@ class GeneformerTokenizer(CellDatasetBuilder):
- and the specified `obs_column_names` (cell metadata from the experiment obs dataframe)
"""

obs_column_names: Set[str]
obs_column_names: set[str]
max_input_tokens: int

# set of gene soma_joinids corresponding to genes modeled by Geneformer:
Expand All @@ -54,8 +55,8 @@ def __init__(
self,
experiment: tiledbsoma.Experiment,
*,
obs_column_names: Optional[Sequence[str]] = None,
obs_attributes: Optional[Sequence[str]] = None,
obs_column_names: Sequence[str] | None = None,
obs_attributes: Sequence[str] | None = None,
max_input_tokens: int = 2048,
token_dictionary_file: str = "",
gene_median_file: str = "",
Expand Down Expand Up @@ -152,7 +153,7 @@ def __enter__(self) -> "GeneformerTokenizer":
self.obs_df = self.obs(column_names=obs_column_names).concat().to_pandas().set_index("soma_joinid")
return self

def cell_item(self, cell_joinid: int, cell_Xrow: scipy.sparse.csr_matrix) -> Dict[str, Any]:
def cell_item(self, cell_joinid: int, cell_Xrow: scipy.sparse.csr_matrix) -> dict[str, Any]:
"""Given the expression vector for one cell, compute the Dataset item providing
the Geneformer inputs (token sequence and metadata).
"""
Expand Down
Loading

0 comments on commit 415c34e

Please sign in to comment.