Skip to content

Commit

Permalink
Throw exception if load() is called on instance rather than the class…
Browse files Browse the repository at this point in the history
… in word2vec and doc2vec (#889)

* Update word2vec.py

* Update word2vec.py

* Update word2vec.py

* Update doc2vec.py

* Update doc2vec.py

* Update word2vec.py

* Update utils.py

* Update word2vec.py

* Update utils.py

* Update word2vec.py

* Update doc2vec.py

* Update doc2vec.py

* Update word2vec.py

* Update utils.py

* Update word2vec.py

* Update doc2vec.py

* Update doc2vec.py

* Update utils.py

* 	modified:   gensim/utils.py

* 	modified:   gensim/test/test_doc2vec.py
	modified:   gensim/test/test_word2vec.py

* 	modified:   gensim/test/test_word2vec.py

* 	modified:   gensim/test/test_doc2vec.py
	modified:   gensim/test/test_word2vec.py

* 	modified:   gensim/test/test_doc2vec.py
	modified:   gensim/test/test_word2vec.py

* 	modified:   gensim/test/test_doc2vec.py
	modified:   gensim/test/test_word2vec.py

* 	modified:   gensim/test/test_doc2vec.py
	modified:   gensim/test/test_word2vec.py

* 	modified:   gensim/test/test_doc2vec.py
	modified:   gensim/test/test_word2vec.py

* Update test_word2vec.py

* Update test_doc2vec.py

* Update test_doc2vec.py

* Update test_word2vec.py

* Update doc2vec.py

* Update doc2vec.py

* Update test_word2vec.py

* Update test_word2vec.py

* Update test_word2vec.py

* Update test_doc2vec.py

* Update test_word2vec.py
  • Loading branch information
Dust0x authored and tmylk committed Dec 22, 2016
1 parent dc13d26 commit 804825e
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 5 deletions.
7 changes: 6 additions & 1 deletion gensim/models/doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap, \
sqrt, newaxis, ndarray, dot, vstack, dtype, divide as np_divide


from gensim.utils import call_on_class_only
from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc
from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_batch_sg
from six.moves import xrange, zip
Expand Down Expand Up @@ -603,10 +605,13 @@ def __init__(self, documents=None, dm_mean=None,
super(Doc2Vec, self).__init__(
sg=(1 + dm) % 2,
null_word=dm_concat, **kwargs)

self.load = call_on_class_only
self.load_word2vec_format = call_on_class_only

if dm_mean is not None:
self.cbow_mean = dm_mean

self.dbow_words = dbow_words
self.dm_concat = dm_concat
self.dm_tag_count = dm_tag_count
Expand Down
4 changes: 4 additions & 0 deletions gensim/models/word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
import threading
import itertools

from gensim.utils import keep_vocab_item, call_on_class_only
from gensim.utils import keep_vocab_item
from gensim.models.keyedvectors import KeyedVectors

Expand Down Expand Up @@ -421,6 +422,9 @@ def __init__(
"""

self.load = call_on_class_only
self.load_word2vec_format = call_on_class_only

if FAST_VERSION == -1:
logger.warning('Slow version of {0} is being used'.format(__name__))
else:
Expand Down
10 changes: 10 additions & 0 deletions gensim/test/test_doc2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def testfile():
# temporary data will be stored to this file
return os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.tst')

def load_on_instance():
# Save and load a Doc2Vec Model on instance for test
model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1)
model.save(testfile())
model = doc2vec.Doc2Vec() # should fail at this point
return model.load(testfile())

class TestDoc2VecModel(unittest.TestCase):
def test_persistence(self):
Expand Down Expand Up @@ -342,6 +348,10 @@ def testTrainWarning(self, l):
model.alpha += 0.05
warning = "Effective 'alpha' higher than previous training cycles"
self.assertTrue(warning in str(l))

def testLoadOnClassError(self):
"""Test if exception is raised when loading doc2vec model on instance"""
self.assertRaises(AttributeError, load_on_instance)
#endclass TestDoc2VecModel


Expand Down
16 changes: 12 additions & 4 deletions gensim/test/test_word2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,17 @@ def testfile():
# temporary data will be stored to this file
return os.path.join(tempfile.gettempdir(), 'gensim_word2vec.tst')


def _rule(word, count, min_count):
if word == "human":
return utils.RULE_DISCARD # throw out
else:
return utils.RULE_DEFAULT # apply default rule, i.e. min_count

def load_on_instance():
# Save and load a Word2Vec Model on instance for test
model = word2vec.Word2Vec(sentences, min_count=1)
model.save(testfile())
model = word2vec.Word2Vec() # should fail at this point
return model.load(testfile())

class TestWord2VecModel(unittest.TestCase):
def testOnlineLearning(self):
Expand Down Expand Up @@ -585,14 +589,18 @@ def testTrainWarning(self, l):
model.alpha += 0.05
warning = "Effective 'alpha' higher than previous training cycles"
self.assertTrue(warning in str(l))
#endclass TestWord2VecModel


def test_sentences_should_not_be_a_generator(self):
"""
Is sentences a generator object?
"""
gen = (s for s in sentences)
self.assertRaises(TypeError, word2vec.Word2Vec, (gen,))

def testLoadOnClassError(self):
"""Test if exception is raised when loading word2vec model on instance"""
self.assertRaises(AttributeError, load_on_instance)
#endclass TestWord2VecModel

class TestWMD(unittest.TestCase):
def testNonzero(self):
Expand Down
4 changes: 4 additions & 0 deletions gensim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ def any2unicode(text, encoding='utf8', errors='strict'):
return unicode(text, encoding, errors=errors)
to_unicode = any2unicode

def call_on_class_only(*args, **kwargs):
"""Raise exception when load methods are called on instance"""
raise AttributeError('This method should be called on a class object.')


class SaveLoad(object):
"""
Expand Down

0 comments on commit 804825e

Please sign in to comment.