diff --git a/datasketch/hnsw.py b/datasketch/hnsw.py index 04ce2382..e57a78c6 100644 --- a/datasketch/hnsw.py +++ b/datasketch/hnsw.py @@ -614,7 +614,8 @@ def _repair_connections( entry_point, entry_point_dist, layer, - allow_soft_deleted=False, + # We allow soft-deleted points to be returned and used as entry point. + allow_soft_deleted=True, key_to_hard_delete=key_to_delete, ) entry_points = [(-entry_point_dist, entry_point)] @@ -626,7 +627,9 @@ def _repair_connections( entry_points, layer, ef + 1, # We add 1 to ef to account for the point itself. - allow_soft_deleted=False, + # We allow soft-deleted points to be returned and used as entry point + # and neighbor candidates. + allow_soft_deleted=True, key_to_hard_delete=key_to_delete, ) # Filter out the updated node itself. diff --git a/test/test_hnsw.py b/test/test_hnsw.py index 64fb9e90..1f4e80f0 100644 --- a/test/test_hnsw.py +++ b/test/test_hnsw.py @@ -166,7 +166,6 @@ def test_soft_remove_and_pop_and_clean(self): "Potential graph connectivity issue." ) # NOTE: we are not getting the expected number of results. - # This may be because the graph is not connected anymore. # Try hard remove all previous soft removed points. hnsw.clean() results = hnsw.query(data[i], 10)