piskvorky · menshikh-iv · Mar 26, 2018 · Dec 6, 2017 · Dec 7, 2017 · Dec 7, 2017
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -919,15 +919,85 @@ def get_document_topics(self, word_id, minimum_probability=None):
             'Use the "get_author_topics" method.'
         )
 
+    def get_new_author_topics(self, corpus, minimum_probability=None):
+        """Infers topics for new author.
+
+        Infers a topic distribution for a new author over the passed corpus of docs,
+        assuming that all documents are from this single new author.
+
+        Parameters
+        ----------
+        corpus : iterable of iterable of (int, int)
+            Corpus in BoW format.
+        minimum_probability : float, optional
+            Ignore topics with probability below this value, if None - 1e-8 is used.
+
+        Returns
+        -------
+        list of (int, float)
+            Topic distribution for the given `corpus`.
+
+        """
+        def rho():
+            return pow(self.offset + 1 + 1, -self.decay)
+
+        # Wrap in fuction to avoid code duplication.
+        def rollback_new_author_chages():
+            self.state.gamma = self.state.gamma[0:-1]
+
+            del self.author2doc[new_author_name]
+            a_id = self.author2id[new_author_name]
+            del self.id2author[a_id]
+            del self.author2id[new_author_name]
+
+            for new_doc_id in corpus_doc_idx:
+                del self.doc2author[new_doc_id]
+
+        try:
+            len_input_corpus = len(corpus)
+        except TypeError:
+            logger.warning("input corpus stream has no len(); counting documents")
+            len_input_corpus = sum(1 for _ in corpus)
+        if len_input_corpus == 0:
+            raise ValueError("AuthorTopicModel.get_new_author_topics() called with an empty corpus")
+
+        new_author_name = "placeholder_name"
+        corpus_doc_idx = list(range(self.total_docs, self.total_docs + len_input_corpus))
+
+        # Add the new placeholder author to author2id/id2author dictionaries.
+        num_new_authors = 1
+        author_id = 0
+        self.author2id[new_author_name] = author_id + self.num_authors
+        self.id2author[author_id + self.num_authors] = new_author_name
+
+        # Add new author in author2doc and doc into doc2author.
+        self.author2doc[new_author_name] = corpus_doc_idx
+        for new_doc_id in corpus_doc_idx:
+            self.doc2author[new_doc_id] = [new_author_name]
+
+        gamma_new = self.random_state.gamma(100., 1. / 100., (num_new_authors, self.num_topics))
+        self.state.gamma = np.vstack([self.state.gamma, gamma_new])
+
+        # Should not record the sstats, as we are goint to delete the new author after calculated.
+        try:
+            gammat, _ = self.inference(
+                corpus, self.author2doc, self.doc2author, rho(),
+                collect_sstats=False, chunk_doc_idx=corpus_doc_idx
+            )
+            new_author_topics = self.get_author_topics(new_author_name, minimum_probability)
+        finally:
+            rollback_new_author_chages()
+        return new_author_topics
+
     def get_author_topics(self, author_name, minimum_probability=None):
         """
-        Return topic distribution the given author, as a list of
+        Return topic distribution the given author.
+
+        Input as as a list of
         (topic_id, topic_probability) 2-tuples.
         Ignore topics with very low probability (below `minimum_probability`).
-
         Obtaining topic probabilities of each word, as in LDA (via `per_word_topics`),
         is not supported.
-
         """
 
         author_id = self.author2id[author_name]

diff --git a/gensim/test/test_atmodel.py b/gensim/test/test_atmodel.py
@@ -26,7 +26,7 @@
 from gensim.test import basetmtests
 from gensim.test.utils import (datapath,
     get_tmpfile, common_texts, common_dictionary as dictionary, common_corpus as corpus)
-
+from gensim.matutils import jensen_shannon
 # TODO:
 # Test that computing the bound on new unseen documents works as expected (this is somewhat different
 # in the author-topic model than in LDA).
@@ -450,6 +450,45 @@ def testTermTopics(self):
             self.assertTrue(isinstance(topic_no, int))
             self.assertTrue(isinstance(probability, float))
 
+    def testNewAuthorTopics(self):
+
+        model = self.class_(
+            corpus, author2doc=author2doc, id2word=dictionary, num_topics=2,
+            passes=100, random_state=np.random.seed(0)
+        )
+        author2doc_newauthor = {}
+        author2doc_newauthor["test"] = [0, 1]
+        model.update(corpus=corpus[0:2], author2doc=author2doc_newauthor)
+
+        # temp save model state vars before get_new_author_topics is called
+        state_gamma_len = len(model.state.gamma)
+        author2doc_len = len(model.author2doc)
+        author2id_len = len(model.author2id)
+        id2author_len = len(model.id2author)
+        doc2author_len = len(model.doc2author)
+
+        new_author_topics = model.get_new_author_topics(corpus=corpus[0:2])
+
+        # sanity check
+        for k, v in new_author_topics:
+            self.assertTrue(isinstance(k, int))
+            self.assertTrue(isinstance(v, float))
+
+        # make sure topics are similar enough
+        similarity = 1 / (1 + jensen_shannon(model["test"], new_author_topics))
+        self.assertTrue(similarity >= 0.9)
+
+        # produce an error to test if rollback occurs
+        with self.assertRaises(TypeError):
+            model.get_new_author_topics(corpus=corpus[0])
+
+        # assure rollback was successful and the model state is as before
+        self.assertEqual(state_gamma_len, len(model.state.gamma))
+        self.assertEqual(author2doc_len, len(model.author2doc))
+        self.assertEqual(author2id_len, len(model.author2id))
+        self.assertEqual(id2author_len, len(model.id2author))
+        self.assertEqual(doc2author_len, len(model.doc2author))
+
     def testPasses(self):
         # long message includes the original error message with a custom one
         self.longMessage = True