piskvorky · tmylk · Nov 10, 2016 · Nov 10, 2016
diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -145,7 +145,7 @@ def convert_input(self, corpus, infer=False, serialize_corpus=True):
                 self.corpus2mallet(corpus, fout)
 
         # convert the text file above into MALLET's internal format
-        cmd = self.mallet_path + " import-file --preserve-case --keep-sequence --remove-stopwords --token-regex '\S+' --input %s --output %s"
+        cmd = self.mallet_path + ' import-file --preserve-case --keep-sequence --remove-stopwords --token-regex "\S+" --input %s --output %s'
         if infer:
             cmd += ' --use-pipe-from ' + self.fcorpusmallet()
             cmd = cmd % (self.fcorpustxt(), self.fcorpusmallet() + '.infer')
@@ -166,7 +166,7 @@ def train(self, corpus):
         logger.info("training MALLET LDA with %s", cmd)
         check_output(cmd, shell=True)
         self.word_topics = self.load_word_topics()
-        # NOTE - we are still keeping the wordtopics variable to not break backward compatibility. 
+        # NOTE - we are still keeping the wordtopics variable to not break backward compatibility.
         # word_topics has replaced wordtopics throughout the code; wordtopics just stores the values of word_topics when train is called.
         self.wordtopics = self.word_topics
 
@@ -260,20 +260,20 @@ def get_version(self, direc_path):
             Check version of mallet via jar file
             """
             archive = zipfile.ZipFile(direc_path, 'r')
-            if u'cc/mallet/regression/' not in archive.namelist():     
+            if u'cc/mallet/regression/' not in archive.namelist():
                 return '2.0.7'
             else:
                 return '2.0.8RC3'
         except Exception:
-            
+
             xml_path = direc_path.split("bin")[0]
             try:
                 doc = et.parse(xml_path + "pom.xml").getroot()
                 namespace = doc.tag[:doc.tag.index('}') + 1]
                 return doc.find(namespace + 'version').text.split("-")[0]
             except Exception:
                 return "Can't parse pom.xml version file"
-        
+
 
 
     def read_doctopics(self, fname, eps=1e-6, renorm=True):
@@ -304,7 +304,7 @@ def read_doctopics(self, fname, eps=1e-6, renorm=True):
                     if mallet_version == "2.0.7":
                         """
 
-                            1   1   0   1.0780612802674239  30.005575655428533364   2   0.005575655428533364    1   0.005575655428533364    
+                            1   1   0   1.0780612802674239  30.005575655428533364   2   0.005575655428533364    1   0.005575655428533364
                             2   2   0   0.9184413079632608  40.009062076892971008   3   0.009062076892971008    2   0.009062076892971008    1   0.009062076892971008
                             In the above example there is a mix of the above if and elif statement. There are neither `2*num_topics` nor `num_topics` elements.
                             It has 2 formats 40.009062076892971008 and 0   1.0780612802674239 which cannot be handled by above if elif.
@@ -316,14 +316,14 @@ def read_doctopics(self, fname, eps=1e-6, renorm=True):
                         doc = []
                         if len(parts) > 0:
                             while count < len(parts):
-                                """ 
+                                """
                                 if section is to deal with formats of type 2 0.034
                                 so if count reaches index of 2 and since int(2) == float(2) so if block is executed
                                 now  there is one extra element afer 2, so count + 1 access should not give an error
 
                                 else section handles  formats of type 20.034
                                 now count is there on index of 20.034 since float(20.034) != int(20.034) so else block
-                                is executed 
+                                is executed
 
                                 """
                                 if float(parts[count]) == int(parts[count]):