Skip to content

Commit

Permalink
Merge pull request #215 from tomwhite/enforce-black
Browse files Browse the repository at this point in the history
Make Travis check formatting with Black
  • Loading branch information
lmcinnes committed Jun 23, 2019
2 parents 8c9984e + 8fb6181 commit c61cdfe
Show file tree
Hide file tree
Showing 9 changed files with 96 additions and 98 deletions.
15 changes: 15 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,18 @@ are fixing a new issue feel free to file an issue and then reference it in the P
You can [browse open issues](https://github.com/lmcinnes/umap/issues),
or consult the [project roadmap](https://github.com/lmcinnes/umap/issues/15), for potential code
contributions. Fixes for issues tagged with 'help wanted' are especially appreciated.

### Code formatting

If possible, install the [black code formatter](https://github.com/python/black) (e.g.
`pip install black`) and run it before submitting a pull request. This helps maintain consistency
across the code, but also there is a check in the Travis-CI continuous integration system which
will show up as a failure in the pull request if `black` detects that it hasn't been run.

Formatting is as simple as running:

```bash
black .
```

in the root of the project.
1 change: 1 addition & 0 deletions ci_scripts/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ if [[ "$DISTRIB" == "conda" ]]; then

source activate testenv

pip install black

if [[ "$COVERAGE" == "true" ]]; then
pip install coverage coveralls
Expand Down
4 changes: 4 additions & 0 deletions ci_scripts/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
set -e

if [[ "$COVERAGE" == "true" ]]; then
black --check $MODULE
fi

# Get into a temp directory to run test from the installed scikit learn and
# check if we do not leave artifacts
mkdir -p $TEST_DIR
Expand Down
3 changes: 2 additions & 1 deletion umap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

# Workaround: https://github.com/numba/numba/issues/3341
import numba
numba.config.THREADING_LAYER = 'workqueue'

numba.config.THREADING_LAYER = "workqueue"

import pkg_resources

Expand Down
10 changes: 5 additions & 5 deletions umap/rp_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def angular_random_projection_split(data, indices, rng_state):

for d in range(dim):
hyperplane_vector[d] = (data[left, d] / left_norm) - (
data[right, d] / right_norm
data[right, d] / right_norm
)

hyperplane_norm = norm(hyperplane_vector)
Expand Down Expand Up @@ -175,7 +175,7 @@ def euclidean_random_projection_split(data, indices, rng_state):
for d in range(dim):
hyperplane_vector[d] = data[left, d] - data[right, d]
hyperplane_offset -= (
hyperplane_vector[d] * (data[left, d] + data[right, d]) / 2.0
hyperplane_vector[d] * (data[left, d] + data[right, d]) / 2.0
)

# For each point compute the margin (project into normal vector, add offset)
Expand Down Expand Up @@ -606,7 +606,7 @@ def max_sparse_hyperplane_size(tree):


def recursive_flatten(
tree, hyperplanes, offsets, children, indices, node_num, leaf_num
tree, hyperplanes, offsets, children, indices, node_num, leaf_num
):
if tree.is_leaf:
children[node_num, 0] = -leaf_num
Expand All @@ -630,7 +630,7 @@ def recursive_flatten(
indices,
node_num + 1,
leaf_num,
)
)
children[old_node_num, 1] = node_num + 1
node_num, leaf_num = recursive_flatten(
tree.right_child,
Expand All @@ -640,7 +640,7 @@ def recursive_flatten(
indices,
node_num + 1,
leaf_num,
)
)
return node_num, leaf_num


Expand Down
5 changes: 1 addition & 4 deletions umap/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,10 +266,7 @@ def spectral_layout(data, graph, dim, random_state, metric="euclidean", metric_k
)
else:
eigenvalues, eigenvectors = scipy.sparse.linalg.lobpcg(
L,
random_state.normal(size=(L.shape[0], k)),
largest=False,
tol=1e-8
L, random_state.normal(size=(L.shape[0], k)), largest=False, tol=1e-8
)
order = np.argsort(eigenvalues)[1:k]
return eigenvectors[:, order]
Expand Down
86 changes: 29 additions & 57 deletions umap/tests/test_umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,23 @@
import os.path
from nose.tools import assert_greater_equal
from nose.tools import assert_less

"""
Tests for UMAP to ensure things are working as expected.
"""
import warnings
warnings.filterwarnings('ignore', category=UserWarning)

warnings.filterwarnings("ignore", category=UserWarning)


np.random.seed(42)
spatial_data = np.random.randn(10, 20)
spatial_data = np.vstack(
[spatial_data, np.zeros((2, 20))]
) # Add some all zero data for corner case test
binary_data = np.random.choice(
a=[False, True], size=(10, 20), p=[0.66, 1 - 0.66])
binary_data = np.random.choice(a=[False, True], size=(10, 20), p=[0.66, 1 - 0.66])
binary_data = np.vstack(
[binary_data, np.zeros((2, 20), dtype='bool')]
[binary_data, np.zeros((2, 20), dtype="bool")]
) # Add some all zero data for corner case test
sparse_spatial_data = sparse.csr_matrix(spatial_data * binary_data)
sparse_binary_data = sparse.csr_matrix(binary_data)
Expand All @@ -70,16 +71,14 @@
nn_data = np.vstack(
[nn_data, np.zeros((2, 5))]
) # Add some all zero data for corner case test
binary_nn_data = np.random.choice(
a=[False, True], size=(1000, 5), p=[0.66, 1 - 0.66])
binary_nn_data = np.random.choice(a=[False, True], size=(1000, 5), p=[0.66, 1 - 0.66])
binary_nn_data = np.vstack(
[binary_nn_data, np.zeros((2, 5), dtype='bool')]
[binary_nn_data, np.zeros((2, 5), dtype="bool")]
) # Add some all zero data for corner case test
sparse_nn_data = sparse.csr_matrix(nn_data * binary_nn_data)

iris = datasets.load_iris()
iris_selection = np.random.choice(
[True, False], 150, replace=True, p=[0.75, 0.25])
iris_selection = np.random.choice([True, False], 150, replace=True, p=[0.75, 0.25])


def spatial_check(metric):
Expand Down Expand Up @@ -137,9 +136,7 @@ def binary_check(metric):

def sparse_spatial_check(metric):
if metric in spdist.sparse_named_distances:
dist_matrix = pairwise_distances(
sparse_spatial_data.todense(), metric=metric
)
dist_matrix = pairwise_distances(sparse_spatial_data.todense(), metric=metric)
if metric in ("braycurtis", "dice", "sokalsneath", "yule"):
dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
if metric in ("cosine", "correlation", "kulsinski", "russellrao"):
Expand Down Expand Up @@ -190,9 +187,7 @@ def sparse_spatial_check(metric):

def sparse_binary_check(metric):
if metric in spdist.sparse_named_distances:
dist_matrix = pairwise_distances(
sparse_binary_data.todense(), metric=metric
)
dist_matrix = pairwise_distances(sparse_binary_data.todense(), metric=metric)
if metric in ("jaccard", "dice", "sokalsneath", "yule"):
dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
if metric in ("kulsinski", "russellrao"):
Expand Down Expand Up @@ -294,8 +289,7 @@ def test_sparse_nn_descent_neighbor_accuracy():
)

tree = KDTree(sparse_nn_data.todense())
true_indices = tree.query(sparse_nn_data.todense(),
10, return_distance=False)
true_indices = tree.query(sparse_nn_data.todense(), 10, return_distance=False)

num_correct = 0.0
for i in range(nn_data.shape[0]):
Expand Down Expand Up @@ -420,8 +414,7 @@ def test_nn_search():
False,
)

search_graph = sparse.lil_matrix(
(train.shape[0], train.shape[0]), dtype=np.int8)
search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8)
search_graph.rows = knn_indices
search_graph.data = (knn_dists != 0).astype(np.int8)
search_graph = search_graph.maximum(search_graph.transpose()).tocsr()
Expand All @@ -433,8 +426,7 @@ def test_nn_search():
init = initialise_search(
rp_forest, train, test, int(10 * 3), random_init, tree_init, rng_state
)
result = search(train, search_graph.indptr,
search_graph.indices, init, test)
result = search(train, search_graph.indptr, search_graph.indices, init, test)

indices, dists = deheap_sort(result)
indices = indices[:, :10]
Expand Down Expand Up @@ -596,8 +588,7 @@ def test_seuclidean():
test_matrix = np.array(
[
[
dist.standardised_euclidean(
spatial_data[i], spatial_data[j], v)
dist.standardised_euclidean(spatial_data[i], spatial_data[j], v)
for j in range(spatial_data.shape[0])
]
for i in range(spatial_data.shape[0])
Expand All @@ -612,13 +603,11 @@ def test_seuclidean():

def test_weighted_minkowski():
v = np.abs(np.random.randn(spatial_data.shape[1]))
dist_matrix = pairwise_distances(
spatial_data, metric="wminkowski", w=v, p=3)
dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3)
test_matrix = np.array(
[
[
dist.weighted_minkowski(
spatial_data[i], spatial_data[j], v, p=3)
dist.weighted_minkowski(spatial_data[i], spatial_data[j], v, p=3)
for j in range(spatial_data.shape[0])
]
for i in range(spatial_data.shape[0])
Expand Down Expand Up @@ -683,14 +672,12 @@ def test_umap_sparse_trustworthiness():

def test_umap_trustworthiness_on_iris():
data = iris.data
embedding = UMAP(n_neighbors=10, min_dist=0.01,
random_state=42).fit_transform(data)
embedding = UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit_transform(data)
trust = trustworthiness(iris.data, embedding, 10)
assert_greater_equal(
trust,
0.97,
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(
trust),
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
)


Expand All @@ -703,8 +690,7 @@ def test_umap_trustworthiness_on_iris_random_init():
assert_greater_equal(
trust,
0.95,
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(
trust),
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
)


Expand All @@ -717,8 +703,7 @@ def test_supervised_umap_trustworthiness_on_iris():
assert_greater_equal(
trust,
0.97,
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(
trust),
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
)


Expand All @@ -733,8 +718,7 @@ def test_semisupervised_umap_trustworthiness_on_iris():
assert_greater_equal(
trust,
0.97,
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(
trust),
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
)


Expand All @@ -747,8 +731,7 @@ def test_initialized_umap_trustworthiness_on_iris():
assert_greater_equal(
trust,
0.97,
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(
trust),
"Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
)


Expand All @@ -763,8 +746,7 @@ def test_umap_transform_on_iris():
assert_greater_equal(
trust,
0.89,
"Insufficiently trustworthy transform for" "iris dataset: {}".format(
trust),
"Insufficiently trustworthy transform for" "iris dataset: {}".format(trust),
)


Expand All @@ -783,26 +765,23 @@ def test_umap_transform_on_iris():
def test_blobs_cluster():
data, labels = datasets.make_blobs(n_samples=500, n_features=10, centers=5)
embedding = UMAP().fit_transform(data)
assert_equal(adjusted_rand_score(
labels, KMeans(5).fit_predict(embedding)), 1.0)
assert_equal(adjusted_rand_score(labels, KMeans(5).fit_predict(embedding)), 1.0)


def test_multi_component_layout():
data, labels = datasets.make_blobs(
100, 2, centers=5, cluster_std=0.5, center_box=[-20, 20], random_state=42
)

true_centroids = np.empty(
(labels.max() + 1, data.shape[1]), dtype=np.float64)
true_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64)

for label in range(labels.max() + 1):
true_centroids[label] = data[labels == label].mean(axis=0)

true_centroids = normalize(true_centroids, norm="l2")

embedding = UMAP(n_neighbors=4).fit_transform(data)
embed_centroids = np.empty(
(labels.max() + 1, data.shape[1]), dtype=np.float64)
embed_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64)
embed_labels = KMeans(n_clusters=5).fit_predict(embedding)

for label in range(embed_labels.max() + 1):
Expand Down Expand Up @@ -830,7 +809,7 @@ def test_bad_too_large_min_dist():
# a RuntimeWarning about division by zero in a,b curve fitting is expected
# caught and ignored for this test
with warnings.catch_warnings():
warnings.filterwarnings('ignore', category=RuntimeWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
assert_raises(ValueError, u.fit, nn_data)


Expand Down Expand Up @@ -905,14 +884,7 @@ def test_negative_target_nneighbors():


def test_umap_bad_nn():
assert_raises(ValueError,
nearest_neighbors,
nn_data,
10,
42,
{},
False,
np.random)
assert_raises(ValueError, nearest_neighbors, nn_data, 10, 42, {}, False, np.random)


def test_umap_bad_nn_sparse():
Expand All @@ -930,7 +902,7 @@ def test_umap_bad_nn_sparse():

def test_too_many_neighbors_warns():
u = UMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random")
u.fit(nn_data[:100, ])
u.fit(nn_data[:100,])
assert_equal(u._a, 1.2)
assert_equal(u._b, 1.75)

Expand Down
Loading

0 comments on commit c61cdfe

Please sign in to comment.