From 41e35b36b99bc960af9690fec1650b894c0c1bea Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 28 Apr 2019 13:18:02 +0900 Subject: [PATCH 1/2] fix types in logging calls --- gensim/models/nmf.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gensim/models/nmf.py b/gensim/models/nmf.py index 282f15b70b..07f88ad2c6 100644 --- a/gensim/models/nmf.py +++ b/gensim/models/nmf.py @@ -587,9 +587,9 @@ def update(self, corpus, chunksize=None, passes=None, eval_every=None): raise ValueError("Corpus is an iterator, only `passes=1` is valid.") logger.info( - "running NMF training, %s topics, %i passes over the supplied corpus of %i documents, evaluating l2 norm " + "running NMF training, %s topics, %i passes over the supplied corpus of %s documents, evaluating l2 norm " "every %i documents", - self.num_topics, passes, lencorpus if lencorpus < np.inf else "?", evalafter, + self.num_topics, passes, lencorpus, evalafter, ) chunk_overall_idx = 1 @@ -623,7 +623,7 @@ def update(self, corpus, chunksize=None, passes=None, eval_every=None): chunk_len = len(chunk) logger.info( - "PROGRESS: pass %i, at document #%i/%i", + "PROGRESS: pass %i, at document #%i/%s", pass_, chunk_idx * chunksize + chunk_len, lencorpus ) From 5a076203050c11d58301cefb527765803f2b19e1 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Sun, 28 Apr 2019 13:20:58 +0900 Subject: [PATCH 2/2] avoid divide by zero --- gensim/models/nmf.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gensim/models/nmf.py b/gensim/models/nmf.py index 07f88ad2c6..91805df977 100644 --- a/gensim/models/nmf.py +++ b/gensim/models/nmf.py @@ -499,7 +499,9 @@ def get_document_topics(self, bow, minimum_probability=None, if normalize is None: normalize = self.normalize if normalize: - h /= h.sum() + the_sum = h.sum() + if the_sum: + h /= the_sum return [ (idx, proba)