Skip to content

Commit

Permalink
Fix _is_single from Phrases for case when corpus is numpy array (#…
Browse files Browse the repository at this point in the history
…1987)

* fix _is_single element-wise comparison error

* fix mode

* add test for corpus as numpy array
  • Loading branch information
rmalouf authored and menshikh-iv committed Mar 20, 2018
1 parent 58d560b commit 7652825
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
2 changes: 1 addition & 1 deletion gensim/models/phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _is_single(obj):
if isinstance(peek, string_types):
# It's a document, return the iterator
return True, obj_iter
if temp_iter == obj:
if temp_iter is obj:
# Checking for iterator to the object
return False, obj_iter
else:
Expand Down
18 changes: 17 additions & 1 deletion gensim/test/test_phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

import six

import numpy as np

from gensim.utils import to_unicode
from gensim.models.phrases import SentenceAnalyzer, Phrases, Phraser
from gensim.models.phrases import pseudocorpus, original_scorer
Expand Down Expand Up @@ -148,7 +150,7 @@ def gen_sentences(self):


class PhrasesCommon:
""" Tests that need to be run for both Prases and Phraser classes."""
""" Tests that need to be run for both Phrases and Phraser classes."""

def setUp(self):
self.bigram = Phrases(
Expand Down Expand Up @@ -230,6 +232,20 @@ def testBigramConstructionFromGenerator(self):
break
self.assertTrue(bigram1_seen and bigram2_seen)

def testBigramConstructionFromArray(self):
"""Test Phrases bigram construction building when corpus is a numpy array"""
bigram1_seen = False
bigram2_seen = False

for s in self.bigram[np.array(self.sentences)]:
if not bigram1_seen and self.bigram1 in s:
bigram1_seen = True
if not bigram2_seen and self.bigram2 in s:
bigram2_seen = True
if bigram1_seen and bigram2_seen:
break
self.assertTrue(bigram1_seen and bigram2_seen)

def testEncoding(self):
"""Test that both utf8 and unicode input work; output must be unicode."""
expected = [u'survey', u'user', u'computer', u'system', u'response_time']
Expand Down

0 comments on commit 7652825

Please sign in to comment.