Skip to content

Commit

Permalink
CI test speed improvement (#3851)
Browse files Browse the repository at this point in the history
Closes #3786.
Some redundant combinations are removed and I reduced the number of samples on some unit tests that are run very often.
`test_umap.py::test_umap_fit_transform_trust`  is the test that takes the most time in CI.

Local speed-up:
- `test_umap.py`: From `273s` to `79s`
- `test_dbscan.py`: From `14s` to `2s`
- `dask/test_nearest_neighbors.py::test_compare_skl`: From `113s` to `31s`

In total, the expected saved time is `288s` (~5 min) on the pipeline locally.

Authors:
  - Micka (https://github.com/lowener)

Approvers:
  - John Zedlewski (https://github.com/JohnZed)
  - Dante Gama Dessavre (https://github.com/dantegd)

URL: #3851
  • Loading branch information
lowener authored May 27, 2021
1 parent 85445f6 commit 474e2e7
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
7 changes: 3 additions & 4 deletions python/cuml/test/dask/test_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,7 @@ def _scale_rows(client, nrows):
return n_workers * nrows


@pytest.mark.parametrize("nrows", [unit_param(100),
unit_param(1e3),
@pytest.mark.parametrize("nrows", [unit_param(300),
quality_param(1e6),
stress_param(5e8)])
@pytest.mark.parametrize("ncols", [10, 30])
Expand All @@ -81,8 +80,8 @@ def _scale_rows(client, nrows):
stress_param(100)])
@pytest.mark.parametrize("n_parts", [unit_param(1), unit_param(5),
quality_param(7), stress_param(50)])
@pytest.mark.parametrize("streams_per_handle", [5, 10])
@pytest.mark.parametrize("reverse_worker_order", [True, False])
@pytest.mark.parametrize("streams_per_handle,reverse_worker_order",
[(5, True), (10, False)])
def test_compare_skl(nrows, ncols, nclusters, n_parts, n_neighbors,
streams_per_handle, reverse_worker_order, client):

Expand Down
5 changes: 3 additions & 2 deletions python/cuml/test/test_dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ def test_core_point_prop3():
@pytest.mark.parametrize('datatype', [np.float32, np.float64])
@pytest.mark.parametrize('use_handle', [True, False])
@pytest.mark.parametrize('out_dtype', ["int32", np.int32, "int64", np.int64])
def test_dbscan_propagation(datatype, use_handle, out_dtype):
X, y = make_blobs(5000, centers=1, cluster_std=8.0,
@pytest.mark.parametrize('n_samples', [unit_param(500), stress_param(5000)])
def test_dbscan_propagation(datatype, use_handle, out_dtype, n_samples):
X, y = make_blobs(n_samples, centers=1, cluster_std=8.0,
center_box=(-100.0, 100.0), random_state=8)
X = X.astype(datatype)

Expand Down
24 changes: 19 additions & 5 deletions python/cuml/test/test_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def test_umap_fit_transform_trust(name, target_metric):
data = wine.data
labels = wine.target
else:
data, labels = make_blobs(n_samples=5000, n_features=10,
data, labels = make_blobs(n_samples=500, n_features=10,
centers=10, random_state=42)

model = umap.UMAP(n_neighbors=10, min_dist=0.01,
Expand Down Expand Up @@ -336,8 +336,15 @@ def test_umap_fit_transform_against_fit_and_transform():
assert joblib.hash(ft_embedding) != joblib.hash(fit_embedding_diff_input)


@pytest.mark.parametrize('n_components', [2, 21, 25, 50])
@pytest.mark.parametrize('random_state', [None, 8, np.random.RandomState(42)])
@pytest.mark.parametrize('n_components,random_state',
[unit_param(2, None),
unit_param(2, 8),
unit_param(2, np.random.RandomState(42)),
unit_param(21, None),
unit_param(21, np.random.RandomState(42)),
unit_param(25, 8),
unit_param(50, None),
stress_param(50, 8)])
def test_umap_fit_transform_reproducibility(n_components, random_state):

n_samples = 8000
Expand Down Expand Up @@ -372,8 +379,15 @@ def get_embedding(n_components, random_state):
assert mean_diff > 0.5


@pytest.mark.parametrize('n_components', [2, 21, 25, 50])
@pytest.mark.parametrize('random_state', [None, 8, np.random.RandomState(42)])
@pytest.mark.parametrize('n_components,random_state',
[unit_param(2, None),
unit_param(2, 8),
unit_param(2, np.random.RandomState(42)),
unit_param(21, None),
unit_param(25, 8),
unit_param(25, np.random.RandomState(42)),
unit_param(50, None),
stress_param(50, 8)])
def test_umap_transform_reproducibility(n_components, random_state):

n_samples = 5000
Expand Down

0 comments on commit 474e2e7

Please sign in to comment.