From 4d1c1c94c54af41d2f3a4c6c4ed83e3254c5b109 Mon Sep 17 00:00:00 2001 From: Mickael Ide Date: Thu, 1 Apr 2021 08:49:42 -0700 Subject: [PATCH] Update raft, use cuml normalization, add hellinger to neighors --- cpp/cmake/Dependencies.cmake | 2 +- python/cuml/neighbors/__init__.py | 2 +- python/cuml/test/test_metrics.py | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/cmake/Dependencies.cmake b/cpp/cmake/Dependencies.cmake index 94f5637df2..57a96d682d 100644 --- a/cpp/cmake/Dependencies.cmake +++ b/cpp/cmake/Dependencies.cmake @@ -39,7 +39,7 @@ else(DEFINED ENV{RAFT_PATH}) ExternalProject_Add(raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG a57cf7df757b24230454e442c83f8491f97a4843 + GIT_TAG d1fd927bc4ec67bfd765620b5fa93f17c54cfa70 PREFIX ${RAFT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/python/cuml/neighbors/__init__.py b/python/cuml/neighbors/__init__.py index 4d0f25525f..68a301bfc7 100644 --- a/python/cuml/neighbors/__init__.py +++ b/python/cuml/neighbors/__init__.py @@ -51,5 +51,5 @@ "brute": set(["euclidean", "l2", "inner_product", "l1", "cityblock", "manhattan", "taxicab", "canberra", "linf", "chebyshev", "jaccard", - "minkowski", "lp", "cosine"]) + "minkowski", "lp", "cosine", "hellinger"]) } diff --git a/python/cuml/test/test_metrics.py b/python/cuml/test/test_metrics.py index add8c41168..e92ccb85c3 100644 --- a/python/cuml/test/test_metrics.py +++ b/python/cuml/test/test_metrics.py @@ -63,6 +63,7 @@ from sklearn.metrics import mean_squared_log_error as sklearn_msle from cuml.common import has_scipy +from cuml.common.sparsefuncs import csr_row_normalize_l1 from cuml.metrics import roc_auc_score from cuml.metrics import precision_recall_curve @@ -1119,10 +1120,9 @@ def prepare_sparse_data(size0, size1, dtype, density, metric): # create sparse array, then normalize every row to one data = cupyx.scipy.sparse.random(size0, size1, dtype=dtype, - random_state=123, density=density) + random_state=123, density=density).tocsr() if metric == 'hellinger': - data = (cupyx.scipy.sparse.diags(cp.array(1 / data.sum(1).T)[0], - 0).tocoo()) * data + data = csr_row_normalize_l1(data) return data @@ -1130,7 +1130,8 @@ def prepare_sparse_data(size0, size1, dtype, density, metric): @pytest.mark.parametrize("matrix_size, density", [ ((3, 3), 0.7), ((5, 40), 0.2)]) -def test_sparse_pairwise_distances(metric: str, matrix_size, density: float): +def test_sparse_pairwise_distances_corner_cases(metric: str, matrix_size, + density: float): # Test the sparse_pairwise_distance helper function. # Use sparse input for sklearn calls when possible sk_sparse = metric in ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',