-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
highly variable gene annotation (#511)
* initial implementation of highly_variable_genes * add test marks * add prebuffered iterator * lint * lint * docstrings * reduce expensive tests * fix typo * actually fix typo * add test for get_highly_variable_genes * lint * reduce memory use in tests * add example to docstring * fix anon access in small memory context * PR feedback * loess jitter * increase max loess noise max to 1e-6 * add tests
- Loading branch information
Bruce Martin
authored
Jun 7, 2023
1 parent
58cb475
commit 379a8ca
Showing
8 changed files
with
1,005 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
api/python/cellxgene_census/src/cellxgene_census/experimental/pp/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
""" | ||
API to facilitate preprocessing of SOMA datasets. | ||
""" | ||
|
||
from ._highly_variable_genes import get_highly_variable_genes, highly_variable_genes | ||
|
||
__all__ = [ | ||
"get_highly_variable_genes", | ||
"highly_variable_genes", | ||
] |
92 changes: 92 additions & 0 deletions
92
api/python/cellxgene_census/src/cellxgene_census/experimental/pp/_eager_iter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import threading | ||
from collections import deque | ||
from concurrent import futures | ||
from typing import Deque, Iterator, Optional, TypeVar | ||
|
||
_T = TypeVar("_T") | ||
|
||
|
||
class EagerIterator(Iterator[_T]): | ||
def __init__( | ||
self, | ||
iterator: Iterator[_T], | ||
pool: Optional[futures.Executor] = None, | ||
): | ||
super().__init__() | ||
self.iterator = iterator | ||
self._pool = pool or futures.ThreadPoolExecutor() | ||
self._own_pool = pool is None | ||
self._future = self._pool.submit(self.iterator.__next__) | ||
|
||
def __next__(self) -> _T: | ||
try: | ||
res = self._future.result() | ||
self._future = self._pool.submit(self.iterator.__next__) | ||
return res | ||
except StopIteration: | ||
self._cleanup() | ||
raise | ||
|
||
def _cleanup(self) -> None: | ||
if self._own_pool: | ||
self._pool.shutdown() | ||
|
||
def __del__(self) -> None: | ||
# Ensure the threadpool is cleaned up in the case where the | ||
# iterator is not exhausted. For more information on __del__: | ||
# https://docs.python.org/3/reference/datamodel.html#object.__del__ | ||
self._cleanup() | ||
super_del = getattr(super(), "__del__", lambda: None) | ||
super_del() | ||
|
||
|
||
class EagerBufferedIterator(Iterator[_T]): | ||
def __init__( | ||
self, | ||
iterator: Iterator[_T], | ||
max_pending: int = 1, | ||
pool: Optional[futures.Executor] = None, | ||
): | ||
super().__init__() | ||
self.iterator = iterator | ||
self.max_pending = max_pending | ||
self._pool = pool or futures.ThreadPoolExecutor() | ||
self._own_pool = pool is None | ||
self._pending_results: Deque[futures.Future[_T]] = deque() | ||
self._lock = threading.Lock() | ||
self._begin_next() | ||
|
||
def __next__(self) -> _T: | ||
try: | ||
res = self._pending_results[0].result() | ||
self._pending_results.popleft() | ||
self._begin_next() | ||
return res | ||
except StopIteration: | ||
self._cleanup() | ||
raise | ||
|
||
def _begin_next(self) -> None: | ||
def _fut_done(fut: futures.Future[_T]) -> None: | ||
if fut.exception() is None: | ||
self._begin_next() | ||
|
||
with self._lock: | ||
not_running = len(self._pending_results) == 0 or self._pending_results[-1].done() | ||
if len(self._pending_results) < self.max_pending and not_running: | ||
_future = self._pool.submit(self.iterator.__next__) | ||
_future.add_done_callback(_fut_done) | ||
self._pending_results.append(_future) | ||
assert len(self._pending_results) <= self.max_pending | ||
|
||
def _cleanup(self) -> None: | ||
if self._own_pool: | ||
self._pool.shutdown() | ||
|
||
def __del__(self) -> None: | ||
# Ensure the threadpool is cleaned up in the case where the | ||
# iterator is not exhausted. For more information on __del__: | ||
# https://docs.python.org/3/reference/datamodel.html#object.__del__ | ||
self._cleanup() | ||
super_del = getattr(super(), "__del__", lambda: None) | ||
super_del() |
Oops, something went wrong.