-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Util unit tests #261
Util unit tests #261
Changes from 2 commits
412a03b
79754b1
b79a7de
93e7569
6fb6bc4
2f02aba
32817c8
41630bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import numpy as np | ||
import pytest | ||
from scipy.sparse import coo_matrix, csr_matrix, triu | ||
|
||
from tools.cell_census_builder.util import array_chunker, is_positive_integral, uricat | ||
|
||
|
||
def test_is_positive_integral() -> None: | ||
X = np.array([1, 2, 3, 4]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these result in an array of dtype np.int64. The function only accepts floats (both float32 and float64), although it does not raise an error if the input violates the type signature. These tests should explicitly create arrays of dtype float32 or float64. Prefer float32 for most tests, as that is what we typically receive. |
||
assert is_positive_integral(X) | ||
|
||
X = np.array([-1, 2, 3, 4]) | ||
assert not is_positive_integral(X) | ||
|
||
X = np.array([1.2, 0, 3, 4]) | ||
assert not is_positive_integral(X) | ||
|
||
X = np.zeros((3, 4)) | ||
assert is_positive_integral(X) | ||
|
||
X = csr_matrix([[1, 2, 3], [4, 5, 6]]) | ||
assert is_positive_integral(X) | ||
|
||
X = csr_matrix([[-1, 2, 3], [4, 5, 6]]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are also int64 - same issue as with ndarray, they need a float dtype to be specified. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Apologies, for some reason I thought scipy.sparse created float matrices by default. Fixing it. |
||
assert not is_positive_integral(X) | ||
|
||
X = csr_matrix([[1.2, 0, 3], [4, 5, 6]]) | ||
assert not is_positive_integral(X) | ||
|
||
X = csr_matrix([0, 0, 0]) | ||
assert is_positive_integral(X) | ||
|
||
|
||
def test_array_chunker() -> None: | ||
# Case 1: dense matrix (np.ndarray) | ||
X = np.ones(1200).reshape(30, 40) | ||
# If nnz_chunk_size is less than the number of cols, the number of cols is used (40 in this example) | ||
chunked = list(array_chunker(X, nnz_chunk_size=10)) | ||
assert len(chunked) == 30 | ||
for i, s in enumerate(chunked): | ||
assert isinstance(s, coo_matrix) | ||
assert s.nnz == 40 | ||
assert s.shape == (30, 40) | ||
# The i-th row of the matrix should have 40 nonzeros (which implies the rest are zeros) | ||
csr = s.tocsr() | ||
assert csr.getrow(i).nnz == 40 | ||
|
||
# If nnz_chunk_size is less than | ||
chunked = list(array_chunker(X, nnz_chunk_size=600)) | ||
assert len(chunked) == 2 | ||
for s in chunked: | ||
assert isinstance(s, coo_matrix) | ||
assert s.nnz == 600 | ||
assert s.shape == (30, 40) | ||
|
||
chunked = list(array_chunker(X, nnz_chunk_size=2400)) | ||
assert len(chunked) == 1 | ||
|
||
# Case 2: compressed row sparse matrix (csr_matrix) | ||
# we'll use an upper triangular matrix with all ones (avg 5 nnz per row) | ||
# [ | ||
# [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], | ||
# [0., 1., 1., 1., 1., 1., 1., 1., 1., 1.], | ||
# [0., 0., 1., 1., 1., 1., 1., 1., 1., 1.], | ||
# [0., 0., 0., 1., 1., 1., 1., 1., 1., 1.], | ||
# [0., 0., 0., 0., 1., 1., 1., 1., 1., 1.], | ||
# [0., 0., 0., 0., 0., 1., 1., 1., 1., 1.], | ||
# [0., 0., 0., 0., 0., 0., 1., 1., 1., 1.], | ||
# [0., 0., 0., 0., 0., 0., 0., 1., 1., 1.], | ||
# [0., 0., 0., 0., 0., 0., 0., 0., 1., 1.], | ||
# [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], | ||
# ] | ||
|
||
X = triu(np.ones(100).reshape(10, 10)).tocsr() | ||
# In this case, chunks will be 2 rows x 10 column (since on average each row contains 5 nonzeros) | ||
chunked = list(array_chunker(X, nnz_chunk_size=10)) | ||
assert len(chunked) == 5 | ||
assert chunked[0].nnz == 19 | ||
assert chunked[1].nnz == 15 | ||
assert chunked[2].nnz == 11 | ||
assert chunked[3].nnz == 7 | ||
assert chunked[4].nnz == 3 | ||
|
||
# Case 3: compressed column sparse matrix (csc_matrix) | ||
# We'll use the same example as for csr, but note that chunking is done by column and not by row. | ||
|
||
X = triu(np.ones(100).reshape(10, 10)).tocsc() | ||
# In this case, chunks will be 2 rows x 10 column (since on average each row contains 5 nonzeros) | ||
chunked = list(array_chunker(X, nnz_chunk_size=10)) | ||
assert len(chunked) == 5 | ||
assert chunked[0].nnz == 3 | ||
assert chunked[1].nnz == 7 | ||
assert chunked[2].nnz == 11 | ||
assert chunked[3].nnz == 15 | ||
assert chunked[4].nnz == 19 | ||
|
||
# Other formats are rejected by the method | ||
X = triu(np.ones(100).reshape(10, 10)).tocoo() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. recommend testing this by passing something other than COO (eg., LIL or BSR), as it is fairly likely we will add COO support the the array_chunker at some point. Just change the final call from |
||
with pytest.raises(NotImplementedError): | ||
list(array_chunker(X)) | ||
|
||
|
||
def test_uricat() -> None: | ||
bkmartinjr marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert uricat("path", "to", "somewhere") == "path/to/somewhere" | ||
assert uricat("path/", "to/", "somewhere") == "path/to/somewhere" | ||
assert uricat("path/", "to/", "somewhere/") == "path/to/somewhere/" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To be pedantic, this method should be called
is_nonnegative_integral
since 0 is an accepted value. If that looks better I can make the change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
feel free to improve it!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you do this, please also add a test of empty array, e.g.,
and equivalent for sparse.