diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2a877cb1..b54deba4 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -35,3 +35,18 @@ are fixing a new issue feel free to file an issue and then reference it in the P
 You can [browse open issues](https://github.com/lmcinnes/umap/issues), 
 or consult the [project roadmap](https://github.com/lmcinnes/umap/issues/15), for potential code
 contributions. Fixes for issues tagged with 'help wanted' are especially appreciated.
+
+### Code formatting
+
+If possible, install the [black code formatter](https://github.com/python/black) (e.g.
+`pip install black`) and run it before submitting a pull request. This helps maintain consistency
+across the code, but also there is a check in the Travis-CI continuous integration system which
+will show up as a failure in the pull request if `black` detects that it hasn't been run.
+
+Formatting is as simple as running:
+
+```bash
+black .
+```
+
+in the root of the project.
diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh
index c91365c3..cba68853 100644
--- a/ci_scripts/install.sh
+++ b/ci_scripts/install.sh
@@ -31,6 +31,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
 
   source activate testenv
 
+  pip install black
 
   if [[ "$COVERAGE" == "true" ]]; then
       pip install coverage coveralls
diff --git a/ci_scripts/test.sh b/ci_scripts/test.sh
index e0548499..e131d451 100644
--- a/ci_scripts/test.sh
+++ b/ci_scripts/test.sh
@@ -1,5 +1,9 @@
 set -e
 
+if [[ "$COVERAGE" == "true" ]]; then
+    black --check $MODULE
+fi
+
 # Get into a temp directory to run test from the installed scikit learn and
 # check if we do not leave artifacts
 mkdir -p $TEST_DIR
diff --git a/umap/__init__.py b/umap/__init__.py
index 8c32fe10..c5ac3065 100644
--- a/umap/__init__.py
+++ b/umap/__init__.py
@@ -2,7 +2,8 @@
 
 # Workaround: https://github.com/numba/numba/issues/3341
 import numba
-numba.config.THREADING_LAYER = 'workqueue'
+
+numba.config.THREADING_LAYER = "workqueue"
 
 import pkg_resources
 
diff --git a/umap/rp_tree.py b/umap/rp_tree.py
index 6f14e8df..daaa7d7f 100644
--- a/umap/rp_tree.py
+++ b/umap/rp_tree.py
@@ -79,7 +79,7 @@ def angular_random_projection_split(data, indices, rng_state):
 
     for d in range(dim):
         hyperplane_vector[d] = (data[left, d] / left_norm) - (
-                data[right, d] / right_norm
+            data[right, d] / right_norm
         )
 
     hyperplane_norm = norm(hyperplane_vector)
@@ -175,7 +175,7 @@ def euclidean_random_projection_split(data, indices, rng_state):
     for d in range(dim):
         hyperplane_vector[d] = data[left, d] - data[right, d]
         hyperplane_offset -= (
-                hyperplane_vector[d] * (data[left, d] + data[right, d]) / 2.0
+            hyperplane_vector[d] * (data[left, d] + data[right, d]) / 2.0
         )
 
     # For each point compute the margin (project into normal vector, add offset)
@@ -606,7 +606,7 @@ def max_sparse_hyperplane_size(tree):
 
 
 def recursive_flatten(
-        tree, hyperplanes, offsets, children, indices, node_num, leaf_num
+    tree, hyperplanes, offsets, children, indices, node_num, leaf_num
 ):
     if tree.is_leaf:
         children[node_num, 0] = -leaf_num
@@ -630,7 +630,7 @@ def recursive_flatten(
             indices,
             node_num + 1,
             leaf_num,
-            )
+        )
         children[old_node_num, 1] = node_num + 1
         node_num, leaf_num = recursive_flatten(
             tree.right_child,
@@ -640,7 +640,7 @@ def recursive_flatten(
             indices,
             node_num + 1,
             leaf_num,
-            )
+        )
         return node_num, leaf_num
 
 
diff --git a/umap/spectral.py b/umap/spectral.py
index c82d68de..153c34d6 100644
--- a/umap/spectral.py
+++ b/umap/spectral.py
@@ -266,10 +266,7 @@ def spectral_layout(data, graph, dim, random_state, metric="euclidean", metric_k
             )
         else:
             eigenvalues, eigenvectors = scipy.sparse.linalg.lobpcg(
-                L,
-                random_state.normal(size=(L.shape[0], k)),
-                largest=False,
-                tol=1e-8
+                L, random_state.normal(size=(L.shape[0], k)), largest=False, tol=1e-8
             )
         order = np.argsort(eigenvalues)[1:k]
         return eigenvectors[:, order]
diff --git a/umap/tests/test_umap.py b/umap/tests/test_umap.py
index 296ed765..3e45d960 100644
--- a/umap/tests/test_umap.py
+++ b/umap/tests/test_umap.py
@@ -46,11 +46,13 @@
 import os.path
 from nose.tools import assert_greater_equal
 from nose.tools import assert_less
+
 """
 Tests for UMAP to ensure things are working as expected.
 """
 import warnings
-warnings.filterwarnings('ignore', category=UserWarning)
+
+warnings.filterwarnings("ignore", category=UserWarning)
 
 
 np.random.seed(42)
@@ -58,10 +60,9 @@
 spatial_data = np.vstack(
     [spatial_data, np.zeros((2, 20))]
 )  # Add some all zero data for corner case test
-binary_data = np.random.choice(
-    a=[False, True], size=(10, 20), p=[0.66, 1 - 0.66])
+binary_data = np.random.choice(a=[False, True], size=(10, 20), p=[0.66, 1 - 0.66])
 binary_data = np.vstack(
-    [binary_data, np.zeros((2, 20), dtype='bool')]
+    [binary_data, np.zeros((2, 20), dtype="bool")]
 )  # Add some all zero data for corner case test
 sparse_spatial_data = sparse.csr_matrix(spatial_data * binary_data)
 sparse_binary_data = sparse.csr_matrix(binary_data)
@@ -70,16 +71,14 @@
 nn_data = np.vstack(
     [nn_data, np.zeros((2, 5))]
 )  # Add some all zero data for corner case test
-binary_nn_data = np.random.choice(
-    a=[False, True], size=(1000, 5), p=[0.66, 1 - 0.66])
+binary_nn_data = np.random.choice(a=[False, True], size=(1000, 5), p=[0.66, 1 - 0.66])
 binary_nn_data = np.vstack(
-    [binary_nn_data, np.zeros((2, 5), dtype='bool')]
+    [binary_nn_data, np.zeros((2, 5), dtype="bool")]
 )  # Add some all zero data for corner case test
 sparse_nn_data = sparse.csr_matrix(nn_data * binary_nn_data)
 
 iris = datasets.load_iris()
-iris_selection = np.random.choice(
-    [True, False], 150, replace=True, p=[0.75, 0.25])
+iris_selection = np.random.choice([True, False], 150, replace=True, p=[0.75, 0.25])
 
 
 def spatial_check(metric):
@@ -137,9 +136,7 @@ def binary_check(metric):
 
 def sparse_spatial_check(metric):
     if metric in spdist.sparse_named_distances:
-        dist_matrix = pairwise_distances(
-            sparse_spatial_data.todense(), metric=metric
-        )
+        dist_matrix = pairwise_distances(sparse_spatial_data.todense(), metric=metric)
     if metric in ("braycurtis", "dice", "sokalsneath", "yule"):
         dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
     if metric in ("cosine", "correlation", "kulsinski", "russellrao"):
@@ -190,9 +187,7 @@ def sparse_spatial_check(metric):
 
 def sparse_binary_check(metric):
     if metric in spdist.sparse_named_distances:
-        dist_matrix = pairwise_distances(
-            sparse_binary_data.todense(), metric=metric
-        )
+        dist_matrix = pairwise_distances(sparse_binary_data.todense(), metric=metric)
     if metric in ("jaccard", "dice", "sokalsneath", "yule"):
         dist_matrix[np.where(~np.isfinite(dist_matrix))] = 0.0
     if metric in ("kulsinski", "russellrao"):
@@ -294,8 +289,7 @@ def test_sparse_nn_descent_neighbor_accuracy():
     )
 
     tree = KDTree(sparse_nn_data.todense())
-    true_indices = tree.query(sparse_nn_data.todense(),
-                              10, return_distance=False)
+    true_indices = tree.query(sparse_nn_data.todense(), 10, return_distance=False)
 
     num_correct = 0.0
     for i in range(nn_data.shape[0]):
@@ -420,8 +414,7 @@ def test_nn_search():
         False,
     )
 
-    search_graph = sparse.lil_matrix(
-        (train.shape[0], train.shape[0]), dtype=np.int8)
+    search_graph = sparse.lil_matrix((train.shape[0], train.shape[0]), dtype=np.int8)
     search_graph.rows = knn_indices
     search_graph.data = (knn_dists != 0).astype(np.int8)
     search_graph = search_graph.maximum(search_graph.transpose()).tocsr()
@@ -433,8 +426,7 @@ def test_nn_search():
     init = initialise_search(
         rp_forest, train, test, int(10 * 3), random_init, tree_init, rng_state
     )
-    result = search(train, search_graph.indptr,
-                    search_graph.indices, init, test)
+    result = search(train, search_graph.indptr, search_graph.indices, init, test)
 
     indices, dists = deheap_sort(result)
     indices = indices[:, :10]
@@ -596,8 +588,7 @@ def test_seuclidean():
     test_matrix = np.array(
         [
             [
-                dist.standardised_euclidean(
-                    spatial_data[i], spatial_data[j], v)
+                dist.standardised_euclidean(spatial_data[i], spatial_data[j], v)
                 for j in range(spatial_data.shape[0])
             ]
             for i in range(spatial_data.shape[0])
@@ -612,13 +603,11 @@ def test_seuclidean():
 
 def test_weighted_minkowski():
     v = np.abs(np.random.randn(spatial_data.shape[1]))
-    dist_matrix = pairwise_distances(
-        spatial_data, metric="wminkowski", w=v, p=3)
+    dist_matrix = pairwise_distances(spatial_data, metric="wminkowski", w=v, p=3)
     test_matrix = np.array(
         [
             [
-                dist.weighted_minkowski(
-                    spatial_data[i], spatial_data[j], v, p=3)
+                dist.weighted_minkowski(spatial_data[i], spatial_data[j], v, p=3)
                 for j in range(spatial_data.shape[0])
             ]
             for i in range(spatial_data.shape[0])
@@ -683,14 +672,12 @@ def test_umap_sparse_trustworthiness():
 
 def test_umap_trustworthiness_on_iris():
     data = iris.data
-    embedding = UMAP(n_neighbors=10, min_dist=0.01,
-                     random_state=42).fit_transform(data)
+    embedding = UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit_transform(data)
     trust = trustworthiness(iris.data, embedding, 10)
     assert_greater_equal(
         trust,
         0.97,
-        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
-            trust),
+        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
     )
 
 
@@ -703,8 +690,7 @@ def test_umap_trustworthiness_on_iris_random_init():
     assert_greater_equal(
         trust,
         0.95,
-        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
-            trust),
+        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
     )
 
 
@@ -717,8 +703,7 @@ def test_supervised_umap_trustworthiness_on_iris():
     assert_greater_equal(
         trust,
         0.97,
-        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
-            trust),
+        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
     )
 
 
@@ -733,8 +718,7 @@ def test_semisupervised_umap_trustworthiness_on_iris():
     assert_greater_equal(
         trust,
         0.97,
-        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
-            trust),
+        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
     )
 
 
@@ -747,8 +731,7 @@ def test_initialized_umap_trustworthiness_on_iris():
     assert_greater_equal(
         trust,
         0.97,
-        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(
-            trust),
+        "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust),
     )
 
 
@@ -763,8 +746,7 @@ def test_umap_transform_on_iris():
     assert_greater_equal(
         trust,
         0.89,
-        "Insufficiently trustworthy transform for" "iris dataset: {}".format(
-            trust),
+        "Insufficiently trustworthy transform for" "iris dataset: {}".format(trust),
     )
 
 
@@ -783,8 +765,7 @@ def test_umap_transform_on_iris():
 def test_blobs_cluster():
     data, labels = datasets.make_blobs(n_samples=500, n_features=10, centers=5)
     embedding = UMAP().fit_transform(data)
-    assert_equal(adjusted_rand_score(
-        labels, KMeans(5).fit_predict(embedding)), 1.0)
+    assert_equal(adjusted_rand_score(labels, KMeans(5).fit_predict(embedding)), 1.0)
 
 
 def test_multi_component_layout():
@@ -792,8 +773,7 @@ def test_multi_component_layout():
         100, 2, centers=5, cluster_std=0.5, center_box=[-20, 20], random_state=42
     )
 
-    true_centroids = np.empty(
-        (labels.max() + 1, data.shape[1]), dtype=np.float64)
+    true_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64)
 
     for label in range(labels.max() + 1):
         true_centroids[label] = data[labels == label].mean(axis=0)
@@ -801,8 +781,7 @@ def test_multi_component_layout():
     true_centroids = normalize(true_centroids, norm="l2")
 
     embedding = UMAP(n_neighbors=4).fit_transform(data)
-    embed_centroids = np.empty(
-        (labels.max() + 1, data.shape[1]), dtype=np.float64)
+    embed_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64)
     embed_labels = KMeans(n_clusters=5).fit_predict(embedding)
 
     for label in range(embed_labels.max() + 1):
@@ -830,7 +809,7 @@ def test_bad_too_large_min_dist():
     # a RuntimeWarning about division by zero in a,b curve fitting is expected
     # caught and ignored for this test
     with warnings.catch_warnings():
-        warnings.filterwarnings('ignore', category=RuntimeWarning)
+        warnings.filterwarnings("ignore", category=RuntimeWarning)
         assert_raises(ValueError, u.fit, nn_data)
 
 
@@ -905,14 +884,7 @@ def test_negative_target_nneighbors():
 
 
 def test_umap_bad_nn():
-    assert_raises(ValueError,
-                  nearest_neighbors,
-                  nn_data,
-                  10,
-                  42,
-                  {},
-                  False,
-                  np.random)
+    assert_raises(ValueError, nearest_neighbors, nn_data, 10, 42, {}, False, np.random)
 
 
 def test_umap_bad_nn_sparse():
@@ -930,7 +902,7 @@ def test_umap_bad_nn_sparse():
 
 def test_too_many_neighbors_warns():
     u = UMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random")
-    u.fit(nn_data[:100, ])
+    u.fit(nn_data[:100,])
     assert_equal(u._a, 1.2)
     assert_equal(u._b, 1.75)
 
diff --git a/umap/umap_.py b/umap/umap_.py
index 91c068ea..601f7f34 100644
--- a/umap/umap_.py
+++ b/umap/umap_.py
@@ -49,7 +49,9 @@
 NPY_INFINITY = np.inf
 
 
-@numba.njit(fastmath=True) # benchmarking `parallel=True` shows it to *decrease* performance
+@numba.njit(
+    fastmath=True
+)  # benchmarking `parallel=True` shows it to *decrease* performance
 def smooth_knn_dist(distances, k, n_iter=64, local_connectivity=1.0, bandwidth=1.0):
     """Compute a continuous version of the distance to the kth nearest
     neighbor. That is, this is similar to knn-distance but allows continuous
@@ -109,7 +111,9 @@ def smooth_knn_dist(distances, k, n_iter=64, local_connectivity=1.0, bandwidth=1
             if index > 0:
                 rho[i] = non_zero_dists[index - 1]
                 if interpolation > SMOOTH_K_TOLERANCE:
-                    rho[i] += interpolation * (non_zero_dists[index] - non_zero_dists[index - 1])
+                    rho[i] += interpolation * (
+                        non_zero_dists[index] - non_zero_dists[index - 1]
+                    )
             else:
                 rho[i] = interpolation * non_zero_dists[0]
         elif non_zero_dists.shape[0] > 0:
@@ -125,7 +129,6 @@ def smooth_knn_dist(distances, k, n_iter=64, local_connectivity=1.0, bandwidth=1
                 else:
                     psum += 1.0
 
-
             if np.fabs(psum - target) < SMOOTH_K_TOLERANCE:
                 break
 
@@ -233,7 +236,7 @@ def nearest_neighbors(
             n_trees = 5 + int(round((X.shape[0]) ** 0.5 / 20.0))
             n_iters = max(5, int(round(np.log2(X.shape[0]))))
             if verbose:
-                print(ts(), "Building RP forest with",  str(n_trees), "trees")
+                print(ts(), "Building RP forest with", str(n_trees), "trees")
 
             rp_forest = make_forest(X, n_neighbors, n_trees, rng_state, angular)
             leaf_array = rptree_leaf_array(rp_forest)
@@ -1284,7 +1287,6 @@ def __init__(
         self.a = a
         self.b = b
 
-
     def _validate_parameters(self):
         if self.set_op_mix_ratio < 0.0 or self.set_op_mix_ratio > 1.0:
             raise ValueError("set_op_mix_ratio must be between 0.0 and 1.0")
@@ -1320,8 +1322,7 @@ def _validate_parameters(self):
         if self.n_epochs is not None and (
             self.n_epochs <= 10 or not isinstance(self.n_epochs, int)
         ):
-            raise ValueError("n_epochs must be a positive integer "
-                             "larger than 10")
+            raise ValueError("n_epochs must be a positive integer " "larger than 10")
 
     def fit(self, X, y=None):
         """Fit X into an embedded space.
@@ -1378,7 +1379,9 @@ def fit(self, X, y=None):
         # Error check n_neighbors based on data size
         if X.shape[0] <= self.n_neighbors:
             if X.shape[0] == 1:
-                self.embedding_ = np.zeros((1, self.n_components))  # needed to sklearn comparability
+                self.embedding_ = np.zeros(
+                    (1, self.n_components)
+                )  # needed to sklearn comparability
                 return self
 
             warn(
@@ -1458,14 +1461,16 @@ def fit(self, X, y=None):
                 self._distance_func = self.metric
             elif self.metric in dist.named_distances:
                 self._distance_func = dist.named_distances[self.metric]
-            elif self.metric == 'precomputed':
-                warn('Using precomputed metric; transform will be unavailable for new data')
+            elif self.metric == "precomputed":
+                warn(
+                    "Using precomputed metric; transform will be unavailable for new data"
+                )
             else:
                 raise ValueError(
                     "Metric is neither callable, " + "nor a recognised string"
                 )
 
-            if self.metric != 'precomputed':
+            if self.metric != "precomputed":
                 self._dist_args = tuple(self._metric_kwds.values())
 
                 self._random_init, self._tree_init = make_initialisations(
@@ -1499,9 +1504,11 @@ def fit(self, X, y=None):
 
                 # Handle the small case as precomputed as before
                 if y.shape[0] < 4096:
-                    ydmat = pairwise_distances(y_[np.newaxis, :].T,
-                                               metric=self.target_metric,
-                                               **self._target_metric_kwds)
+                    ydmat = pairwise_distances(
+                        y_[np.newaxis, :].T,
+                        metric=self.target_metric,
+                        **self._target_metric_kwds
+                    )
                     target_graph = fuzzy_simplicial_set(
                         ydmat,
                         target_n_neighbors,
@@ -1513,7 +1520,7 @@ def fit(self, X, y=None):
                         False,
                         1.0,
                         1.0,
-                        False
+                        False,
                     )
                 else:
                     # Standard case
@@ -1612,8 +1619,10 @@ def transform(self, X):
         """
         # If we fit just a single instance then error
         if self.embedding_.shape[0] == 1:
-            raise ValueError('Transform unavailable when model was fit with'
-                             'only a single data sample.')
+            raise ValueError(
+                "Transform unavailable when model was fit with"
+                "only a single data sample."
+            )
         # If we just have the original input then short circuit things
         X = check_array(X, dtype=np.float32, accept_sparse="csr")
         x_hash = joblib.hash(X)
@@ -1622,9 +1631,10 @@ def transform(self, X):
 
         if self._sparse_data:
             raise ValueError("Transform not available for sparse input.")
-        elif self.metric == 'precomputed':
-            raise ValueError("Transform  of new data not available for "
-                             "precomputed metric.")
+        elif self.metric == "precomputed":
+            raise ValueError(
+                "Transform  of new data not available for " "precomputed metric."
+            )
 
         X = check_array(X, dtype=np.float32, order="C")
         random_state = check_random_state(self.transform_seed)
@@ -1634,13 +1644,11 @@ def transform(self, X):
             dmat = pairwise_distances(
                 X, self._raw_data, metric=self.metric, **self._metric_kwds
             )
-            indices = np.argpartition(dmat,
-                                      self._n_neighbors)[:, :self._n_neighbors]
+            indices = np.argpartition(dmat, self._n_neighbors)[:, : self._n_neighbors]
             dmat_shortened = submatrix(dmat, indices, self._n_neighbors)
             indices_sorted = np.argsort(dmat_shortened)
             indices = submatrix(indices, indices_sorted, self._n_neighbors)
-            dists = submatrix(dmat_shortened, indices_sorted,
-                              self._n_neighbors)
+            dists = submatrix(dmat_shortened, indices_sorted, self._n_neighbors)
         else:
             init = initialise_search(
                 self._rp_forest,
diff --git a/umap/utils.py b/umap/utils.py
index 4e8bc4bd..48c7d91e 100644
--- a/umap/utils.py
+++ b/umap/utils.py
@@ -21,14 +21,14 @@ def tau_rand_int(state):
     -------
     A (pseudo)-random int32 value
     """
-    state[0] = (((state[0] & 4294967294) << 12) & 0xffffffff) ^ (
-        (((state[0] << 13) & 0xffffffff) ^ state[0]) >> 19
+    state[0] = (((state[0] & 4294967294) << 12) & 0xFFFFFFFF) ^ (
+        (((state[0] << 13) & 0xFFFFFFFF) ^ state[0]) >> 19
     )
-    state[1] = (((state[1] & 4294967288) << 4) & 0xffffffff) ^ (
-        (((state[1] << 2) & 0xffffffff) ^ state[1]) >> 25
+    state[1] = (((state[1] & 4294967288) << 4) & 0xFFFFFFFF) ^ (
+        (((state[1] << 2) & 0xFFFFFFFF) ^ state[1]) >> 25
     )
-    state[2] = (((state[2] & 4294967280) << 17) & 0xffffffff) ^ (
-        (((state[2] << 3) & 0xffffffff) ^ state[2]) >> 11
+    state[2] = (((state[2] & 4294967280) << 17) & 0xFFFFFFFF) ^ (
+        (((state[2] << 3) & 0xFFFFFFFF) ^ state[2]) >> 11
     )
 
     return state[0] ^ state[1] ^ state[2]
@@ -48,7 +48,7 @@ def tau_rand(state):
     A (pseudo)-random float32 in the interval [0, 1]
     """
     integer = tau_rand_int(state)
-    return abs(float(integer) / 0x7fffffff)
+    return abs(float(integer) / 0x7FFFFFFF)
 
 
 @numba.njit()