diff --git a/scvelo/preprocessing/utils.py b/scvelo/preprocessing/utils.py index ea89ff60..5c9608e7 100644 --- a/scvelo/preprocessing/utils.py +++ b/scvelo/preprocessing/utils.py @@ -492,16 +492,22 @@ def csr_vcorrcoef(X, y): mu_x = np.ravel(np.mean(X, axis=-1)) mu_y = np.ravel(np.mean(y, axis=-1)) nom = X.dot(y) - X.dot(np.repeat(mu_y, len(y))) - mu_x * np.sum(y - mu_y) + + if X.ndim == 1: + n_features = len(X) + else: + n_features = X.shape[1] + denom_x = ( np.ravel(np.sum(X.multiply(X), axis=-1)) if issparse(X) else np.sum(X * X, axis=-1) ) - denom_x = denom_x - np.ravel(np.sum(X, axis=-1)) * mu_x + mu_x ** 2 + denom_x = denom_x - 2 * np.ravel(np.sum(X, axis=-1)) * mu_x + n_features * mu_x ** 2 denom_y = ( np.ravel(np.sum(y * y, axis=-1)) - - (np.ravel(np.sum(y, axis=-1)) * mu_y) - + mu_y ** 2 + - 2 * (np.ravel(np.sum(y, axis=-1)) * mu_y) + + n_features * mu_y ** 2 ) return nom / np.sqrt(denom_x * denom_y) diff --git a/tests/preprocessing/__init__.py b/tests/preprocessing/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/preprocessing/test_utils.py b/tests/preprocessing/test_utils.py new file mode 100644 index 00000000..dfd70e87 --- /dev/null +++ b/tests/preprocessing/test_utils.py @@ -0,0 +1,76 @@ +import pytest + +import numpy as np +from scipy.sparse import csr_matrix, spmatrix + +from scvelo.preprocessing.utils import csr_vcorrcoef + + +class TestCsrVcorrcoef: + @pytest.mark.parametrize( + "X", + ( + np.zeros(3), + np.array([1, 0, -4]), + np.array([-0.3, 0.5, 0.93]), + np.zeros(shape=(3, 3)), + np.eye(3), + np.array([[1, 2, 3], [1, -1, 1]]), + np.array([[0.1, -0.3, 7.5], [8.3, 0.4, -0.9]]), + ), + ) + @pytest.mark.parametrize( + "y", + ( + np.zeros(3), + np.ones(3), + np.array([1, 0, 0]), + np.array([1, 2, 3]), + np.array([-0.24, 0.7, 0.4]), + ), + ) + def test_dense_arrays(self, X: np.ndarray, y: np.ndarray): + pearsonr = csr_vcorrcoef(X=X, y=y) + + if X.ndim == 1: + np.testing.assert_almost_equal(np.corrcoef(X, y)[0, 1], pearsonr) + else: + assert all( + np.allclose(np.corrcoef(sample, y)[0, 1], corr, equal_nan=True) + for corr, sample in zip(pearsonr, X) + ) + + @pytest.mark.parametrize( + "X", + ( + csr_matrix(np.zeros(3)), + csr_matrix(np.array([1, 0, -4])), + csr_matrix(np.array([-0.3, 0.5, 0.93])), + csr_matrix(np.zeros(shape=(3, 3))), + csr_matrix(np.eye(3)), + csr_matrix(np.array([[1, 2, 3], [1, -1, 1]])), + csr_matrix(np.array([[0.1, -0.3, 7.5], [8.3, 0.4, -0.9]])), + ), + ) + @pytest.mark.parametrize( + "y", + ( + np.zeros(3), + np.ones(3), + np.array([1, 0, 0]), + np.array([1, 2, 3]), + np.array([-0.24, 0.7, 0.4]), + ), + ) + def test_sparse_arrays(self, X: spmatrix, y: np.ndarray): + pearsonr = csr_vcorrcoef(X=X, y=y) + + X_dense = X.A.squeeze() + + if X_dense.ndim == 1: + np.testing.assert_almost_equal(np.corrcoef(X_dense, y)[0, 1], pearsonr) + else: + assert all( + np.allclose(np.corrcoef(sample, y)[0, 1], corr, equal_nan=True) + for corr, sample in zip(pearsonr, X_dense) + )