Skip to content

Commit

Permalink
Update raft, use cuml normalization, add hellinger to neighors
Browse files Browse the repository at this point in the history
  • Loading branch information
lowener committed Apr 1, 2021
1 parent 5cbf694 commit 4d1c1c9
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cpp/cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ else(DEFINED ENV{RAFT_PATH})

ExternalProject_Add(raft
GIT_REPOSITORY https://github.com/rapidsai/raft.git
GIT_TAG a57cf7df757b24230454e442c83f8491f97a4843
GIT_TAG d1fd927bc4ec67bfd765620b5fa93f17c54cfa70
PREFIX ${RAFT_DIR}
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/neighbors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,5 @@
"brute": set(["euclidean", "l2", "inner_product",
"l1", "cityblock", "manhattan", "taxicab",
"canberra", "linf", "chebyshev", "jaccard",
"minkowski", "lp", "cosine"])
"minkowski", "lp", "cosine", "hellinger"])
}
9 changes: 5 additions & 4 deletions python/cuml/test/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from sklearn.metrics import mean_squared_log_error as sklearn_msle

from cuml.common import has_scipy
from cuml.common.sparsefuncs import csr_row_normalize_l1

from cuml.metrics import roc_auc_score
from cuml.metrics import precision_recall_curve
Expand Down Expand Up @@ -1119,18 +1120,18 @@ def prepare_sparse_data(size0, size1, dtype, density, metric):
# create sparse array, then normalize every row to one
data = cupyx.scipy.sparse.random(size0, size1,
dtype=dtype,
random_state=123, density=density)
random_state=123, density=density).tocsr()
if metric == 'hellinger':
data = (cupyx.scipy.sparse.diags(cp.array(1 / data.sum(1).T)[0],
0).tocoo()) * data
data = csr_row_normalize_l1(data)
return data


@pytest.mark.parametrize("metric", PAIRWISE_DISTANCE_SPARSE_METRICS.keys())
@pytest.mark.parametrize("matrix_size, density", [
((3, 3), 0.7),
((5, 40), 0.2)])
def test_sparse_pairwise_distances(metric: str, matrix_size, density: float):
def test_sparse_pairwise_distances_corner_cases(metric: str, matrix_size,
density: float):
# Test the sparse_pairwise_distance helper function.
# Use sparse input for sklearn calls when possible
sk_sparse = metric in ['cityblock', 'cosine', 'euclidean', 'l1', 'l2',
Expand Down

0 comments on commit 4d1c1c9

Please sign in to comment.