Skip to content

Commit

Permalink
Simpler way of detecting duplicates
Browse files Browse the repository at this point in the history
  • Loading branch information
jayantj committed Nov 13, 2017
1 parent 9446a05 commit 930dfd4
Showing 1 changed file with 3 additions and 23 deletions.
26 changes: 3 additions & 23 deletions gensim/models/poincare.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,28 +185,6 @@ def _get_candidate_negatives(self):
self._negatives_buffer = NegativesBuffer(cumsum_table_indices)
return self._negatives_buffer.get_items(self.negative)

@staticmethod
def _has_duplicates(array):
"""Returns whether or not the input array has any duplicates.
Parameters
----------
array : iterable of hashables
Input array to checked, should contain hashable items.
Returns
-------
bool
Whether the input array contains any duplicates.
"""
seen = set()
for value in array:
if value in seen:
return True
seen.add(value)
return False

def _sample_negatives(self, node_index):
"""Return a sample of negatives for the given node.
Expand Down Expand Up @@ -234,10 +212,12 @@ def _sample_negatives(self, node_index):
# If number of positive relations is a small fraction of total nodes
# re-sample till no positively connected nodes are chosen
indices = self._get_candidate_negatives()
unique_indices = set(indices)
times_sampled = 1
while self._has_duplicates(indices) or (set(indices) & node_relations):
while (len(indices) != len(unique_indices)) or (unique_indices & node_relations):
times_sampled += 1
indices = self._get_candidate_negatives()
unique_indices = set(indices)
if times_sampled > 1:
logger.debug('Sampled %d times, positive fraction %.5f', times_sampled, positive_fraction)
else:
Expand Down

0 comments on commit 930dfd4

Please sign in to comment.