Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix WordEmbeddingsKeyedVectors.most_similar #2356

Merged
merged 3 commits into from
Jan 27, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,9 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non
Sequence of (word, similarity).

"""
if topn is not None and topn < 1:
return []

if positive is None:
positive = []
if negative is None:
Expand Down Expand Up @@ -550,7 +553,7 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non

limited = self.vectors_norm if restrict_vocab is None else self.vectors_norm[:restrict_vocab]
dists = dot(limited, mean)
if not topn:
if topn is None:
return dists
best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
# ignore (don't return) words from the input
Expand Down
6 changes: 3 additions & 3 deletions gensim/similarities/docsim.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,13 +866,13 @@ class SoftCosineSimilarity(interfaces.SimilarityABC):
>>> from gensim.test.utils import common_texts
>>> from gensim.corpora import Dictionary
>>> from gensim.models import Word2Vec, WordEmbeddingSimilarityIndex
>>> from gensim.similarities import SoftCosineSimilarity, TermSimilarityMatrix
>>> from gensim.similarities import SoftCosineSimilarity, SparseTermSimilarityMatrix
>>>
>>> model = Word2Vec(common_texts, size=20, min_count=1) # train word-vectors
>>> termsim_index = WordEmbeddingSimilarityIndex(model)
>>> termsim_index = WordEmbeddingSimilarityIndex(model.wv)
>>> dictionary = Dictionary(common_texts)
>>> bow_corpus = [dictionary.doc2bow(document) for document in common_texts]
>>> similarity_matrix = TermSimilarityMatrix(termsim_index, dictionary) # construct similarity matrix
>>> similarity_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary) # construct similarity matrix
>>> docsim_index = SoftCosineSimilarity(bow_corpus, similarity_matrix, num_best=10)
>>>
>>> query = 'graph trees computer'.split() # make a query
Expand Down