Skip to content

Commit

Permalink
Fix incorrect initialization ShardedCorpus with a generator. Fix #1511 (
Browse files Browse the repository at this point in the history
#1512)

Fix incorrect initialization ShardedCorpus with a generator. Fix #1511.
  • Loading branch information
karkkainenk1 authored and menshikh-iv committed Sep 14, 2017
1 parent 1c0098c commit e667069
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
3 changes: 2 additions & 1 deletion gensim/corpora/sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,8 @@ def __init__(self, output_prefix, corpus, dim=None,
def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtype):
"""Initialize shards from the corpus."""

if not gensim.utils.is_corpus(corpus):
is_corpus, corpus = gensim.utils.is_corpus(corpus)
if not is_corpus:
raise ValueError(
"Cannot initialize shards without a corpus to read from! (Got corpus type: {0})".format(type(corpus))
)
Expand Down
12 changes: 12 additions & 0 deletions gensim/test/test_sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,18 @@ def test_resize(self):
fname = dataset._shard_name(n)
self.assertTrue(os.path.isfile(fname))

def test_init_with_generator(self):

def data_generator():
yield [(0, 1)]
yield [(1, 1)]

gen_tmp_fname = self.tmp_fname + '.generator'
corpus = ShardedCorpus(gen_tmp_fname, data_generator(), dim=2)

self.assertEqual(2, len(corpus))
self.assertEqual(1, corpus[0][0])


if __name__ == '__main__':
suite = unittest.TestSuite()
Expand Down

0 comments on commit e667069

Please sign in to comment.