piskvorky · menshikh-iv · Mar 26, 2018 · Dec 6, 2017 · Dec 7, 2017 · Dec 7, 2017
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -919,15 +919,95 @@ def get_document_topics(self, word_id, minimum_probability=None):
             'Use the "get_author_topics" method.'
         )
 
+    def get_new_author_topics(self, corpus, minimum_probability=None):
+        """Infers topics for new author.
+
+        Infers a topic distribution for a new author over the passed corpus of docs,
+        assuming that all documents are from this single new author.
+
+        Parameters
+        ----------
+        corpus : iterable of iterable of (int, int)
+            Corpus in BoW format.
+        minimum_probability : float, optional
+            Ignore topics with probability below this value, if None - 1e-8 is used.
+
+        Returns
+        -------
+        list of (int, float)
+            Topic distribution for the given `corpus`.
+
+        """
+        # TODO: how should this function look like for get_new_author_topics?
+        def rho():
+<<<<<<< Updated upstream
+            return pow(self.offset + 1 + 1, -self.decay)
+=======
+            return pow(self.offset + 1, -self.decay)
+>>>>>>> Stashed changes
+
+        # Wrap in fuction to avoid code duplication.
+        def rollback_new_author_chages():
+            self.state.gamma = self.state.gamma[0:-1]
+
+            del self.author2doc[new_author_name]
+            a_id = self.author2id[new_author_name]
+            del self.id2author[a_id]
+            del self.author2id[new_author_name]
+
+            for new_doc_id in corpus_doc_idx:
+                del self.doc2author[new_doc_id]
+
+        try:
+            len_input_corpus = len(corpus)
+        except TypeError:
+            logger.warning("input corpus stream has no len(); counting documents")
+            len_input_corpus = sum(1 for _ in corpus)
+        if len_input_corpus == 0:
+            raise ValueError("AuthorTopicModel.get_new_author_topics() called with an empty corpus")
+
+        new_author_name = "placeholder_name"
+        corpus_doc_idx = list(range(self.total_docs, self.total_docs + len_input_corpus))
+
+        # Add the new placeholder author to author2id/id2author dictionaries.
+        num_new_authors = 1
+        author_id = 0
+        self.author2id[new_author_name] = author_id + self.num_authors
+        self.id2author[author_id + self.num_authors] = new_author_name
+
+        # Add new author in author2doc and doc into doc2author.
+        self.author2doc[new_author_name] = corpus_doc_idx
+        for new_doc_id in corpus_doc_idx:
+            self.doc2author[new_doc_id] = [new_author_name]
+
+        gamma_new = self.random_state.gamma(100., 1. / 100., (num_new_authors, self.num_topics))
+        self.state.gamma = np.vstack([self.state.gamma, gamma_new])
+
+        # Should not record the sstats, as we are goint to delete the new author after calculated.
+        try:
+            gammat, _ = self.inference(
+                corpus, self.author2doc, self.doc2author, rho(),
+                collect_sstats=False, chunk_doc_idx=corpus_doc_idx
+            )
+        except ValueError as e:
+            # Something went wrong! Rollback temporary changes in object and log
+            rollback_new_author_chages()
+            logging.exception(e)
+            return
+
+        new_author_topics = self.get_author_topics(new_author_name, minimum_probability)
+        rollback_new_author_chages()
+        return new_author_topics
+
     def get_author_topics(self, author_name, minimum_probability=None):
         """
-        Return topic distribution the given author, as a list of
+        Return topic distribution the given author.
+
+        Input as as a list of
         (topic_id, topic_probability) 2-tuples.
         Ignore topics with very low probability (below `minimum_probability`).
-
         Obtaining topic probabilities of each word, as in LDA (via `per_word_topics`),
         is not supported.
-
         """
 
         author_id = self.author2id[author_name]