Skip to content

Commit

Permalink
[REVIEW] Speeding up MNMG KNN Cl&Re testing (#3052)
Browse files Browse the repository at this point in the history
* Speeding up MNMG KNN Cl&Re testing

* Update changelog

* Testing with extreme values
  • Loading branch information
viclafargue authored Nov 2, 2020
1 parent 983c6f8 commit 5b7757a
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
- PR #3044: Move leftover `linalg` and `stats` to RAFT namespaces
- PR #3067: Deleting prims moved to RAFT and updating header paths
- PR #3074: Reducing dask coordinate descent test runtime
- PR #3052: Speeding up MNMG KNN Cl&Re testing

## Bug Fixes
- PR #3033: Splitting ml metrics to individual files
Expand Down
3 changes: 2 additions & 1 deletion cpp/src_prims/selection/knn.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ void class_probs(std::vector<float *> &out, const int64_t *knn_indices,
* Build array of class probability arrays from
* knn_indices and labels
*/
device_buffer<int> y_normalized(allocator, stream, n_index_rows);
device_buffer<int> y_normalized(allocator, stream,
n_index_rows + n_unique_labels);

/*
* Appending the array of unique labels to the original labels array
Expand Down
24 changes: 12 additions & 12 deletions python/cuml/test/dask/test_kneighbors_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def dataset(request):
if len(new_x) >= request.param['n_samples']:
break
X = X[new_x]
noise = np.random.normal(0, 1.2, X.shape)
noise = np.random.normal(0, 5., X.shape)
X += noise
y = np.array(new_y)

return train_test_split(X, y, test_size=0.1)
return train_test_split(X, y, test_size=0.3)


def exact_match(output1, output2):
Expand Down Expand Up @@ -108,11 +108,11 @@ def check_probabilities(l_probas, d_probas):


@pytest.mark.parametrize("datatype", ['dask_array', 'dask_cudf'])
@pytest.mark.parametrize("n_neighbors", [1, 3, 8])
@pytest.mark.parametrize("n_parts", [2, 4, 12])
@pytest.mark.parametrize("batch_size", [128, 1024])
def test_predict_and_score(dataset, datatype, n_neighbors,
n_parts, batch_size, client):
@pytest.mark.parametrize("parameters", [(1, 3, 256),
(8, 8, 256),
(9, 3, 128)])
def test_predict_and_score(dataset, datatype, parameters, client):
n_neighbors, n_parts, batch_size = parameters
X_train, X_test, y_train, y_test = dataset
np_y_test = y_test

Expand Down Expand Up @@ -165,11 +165,11 @@ def test_predict_and_score(dataset, datatype, n_neighbors,


@pytest.mark.parametrize("datatype", ['dask_array', 'dask_cudf'])
@pytest.mark.parametrize("n_neighbors", [1, 3, 8])
@pytest.mark.parametrize("n_parts", [2, 4, 12])
@pytest.mark.parametrize("batch_size", [128, 1024])
def test_predict_proba(dataset, datatype, n_neighbors,
n_parts, batch_size, client):
@pytest.mark.parametrize("parameters", [(1, 3, 256),
(8, 8, 256),
(9, 3, 128)])
def test_predict_proba(dataset, datatype, parameters, client):
n_neighbors, n_parts, batch_size = parameters
X_train, X_test, y_train, y_test = dataset

l_model = lKNNClf(n_neighbors=n_neighbors)
Expand Down
14 changes: 7 additions & 7 deletions python/cuml/test/dask/test_kneighbors_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,11 @@ def dataset(request):
if len(new_x) >= request.param['n_samples']:
break
X = X[new_x]
noise = np.random.normal(0, 1.2, X.shape)
noise = np.random.normal(0, 5., X.shape)
X += noise
y = np.array(new_y, dtype=np.float32)

return train_test_split(X, y, test_size=0.1)
return train_test_split(X, y, test_size=0.3)


def exact_match(output1, output2):
Expand Down Expand Up @@ -102,11 +102,11 @@ def exact_match(output1, output2):


@pytest.mark.parametrize("datatype", ['dask_array', 'dask_cudf'])
@pytest.mark.parametrize("n_neighbors", [1, 3, 8])
@pytest.mark.parametrize("n_parts", [2, 4, 12])
@pytest.mark.parametrize("batch_size", [128, 1024])
def test_predict_and_score(dataset, datatype, n_neighbors,
n_parts, batch_size, client):
@pytest.mark.parametrize("parameters", [(1, 3, 256),
(8, 8, 256),
(9, 3, 128)])
def test_predict_and_score(dataset, datatype, parameters, client):
n_neighbors, n_parts, batch_size = parameters
X_train, X_test, y_train, y_test = dataset
np_y_test = y_test

Expand Down

0 comments on commit 5b7757a

Please sign in to comment.