Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue-2670] Bug fix: Initialize doc_no2 because it is not set when corpus' is empty #2672

Merged
3 changes: 3 additions & 0 deletions gensim/models/logentropy_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def initialize(self, corpus):
self.n_docs, len(glob_freq), self.n_words
)
logger.debug('iterating over corpus')

# initialize doc_no2 index in case corpus is empty
doc_no2 = 0
for doc_no2, bow in enumerate(corpus):
for key, freq in bow:
p = (float(freq) / glob_freq[key]) * math.log(float(freq) / glob_freq[key])
Expand Down
14 changes: 14 additions & 0 deletions gensim/test/test_logentropy_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,23 @@


class TestLogEntropyModel(unittest.TestCase):
TEST_CORPUS = [[(1, 1.0)], [], [(0, 0.5), (2, 1.0)], []]

def setUp(self):
self.corpus_small = MmCorpus(datapath('test_corpus_small.mm'))
self.corpus_ok = MmCorpus(datapath('test_corpus_ok.mm'))
self.corpus_empty = []

def test_generator_fail(self):
"""Test creating a model using a generator as input; should fail."""
def get_generator(test_corpus=TestLogEntropyModel.TEST_CORPUS):
for test_doc in test_corpus:
yield test_doc
self.assertRaises(ValueError, logentropy_model.LogEntropyModel, corpus=get_generator())

def test_empty_fail(self):
"""Test creating a model using an empty input; should fail."""
self.assertRaises(ValueError, logentropy_model.LogEntropyModel, corpus=self.corpus_empty)

def testTransform(self):
# create the transformation model
Expand Down