Remove is_double_col() from PDFtoTXT

NREL · Jul 2, 2024 · 252d028 · 252d028
1 parent 8e8e41d
commit 252d028
Showing 1 changed file with 4 additions and 17 deletions.
diff --git a/elm/pdf.py b/elm/pdf.py
@@ -206,21 +206,6 @@ async def clean_txt_async(self, ignore_error=None, rate_limit=40e3):
 
         return clean_pages
 
-    def is_double_col(self, text, separator='    '):
-        """Does the text look like it has multiple vertical text columns?
-
-        Parameters
-        ----------
-        separator : str
-            Heuristic split string to look for spaces between columns
-
-        Returns
-        -------
-        out : bool
-            True if more than one vertical text column
-        """
-        return is_multi_col(text, separator=separator)
-
     def clean_poppler(self, layout=True):
         """Clean the pdf using the poppler pdftotxt utility
 
@@ -351,21 +336,23 @@ def clean_headers(self, char_thresh=0.6, page_thresh=0.8, split_on='\n',
         self.full = combine_pages(self.pages)
         return self.full
 
-    def convert_to_txt(self, txt_fp):
+    def convert_to_txt(self, txt_fp, separator='    '):
         """Function to convert contents of pdf document to txt file.
 
         Parameters
         ----------
         txt_fp: str
             Directory for output txt file.
+        separator : str
+            Heuristic split string to look for spaces between columns
 
         Returns
         -------
         text : str
             Text string containing contents from pdf
         """
         text = self.clean_poppler(layout=True)
-        if self.is_double_col(text):
+        if is_multi_col(text, separator=separator):
             text = self.clean_poppler(layout=False)
         text = self.clean_headers(char_thresh=0.6, page_thresh=0.8,
                                   split_on='\n',