feat: new Ls Parsers for 'ls' commands (#3833)

- See INSGHTCORE-217 - Mark old Ls Parsers as deprecated and will be removed from 3.4.0 - Update the ls_parser to pre-load the `Directory` - All `ls_*` specs require a filterable `ls_*_dirs` to collect the listing target directories from rules. And a `_non_existing_` dir will be appended to the targets, when there is only 1 target being collected. Signed-off-by: Xiangce Liu <[email protected]>
RedHatInsights · Jul 6, 2023 · 887e196 · 887e196
1 parent 462cdc9
commit 887e196
Show file tree

Hide file tree

Showing 44 changed files with 1,571 additions and 114 deletions.
diff --git a/docs/custom_datasources_index.rst b/docs/custom_datasources_index.rst
@@ -132,6 +132,15 @@ insights.specs.datasources.leapp
     :undoc-members:
 
 
+insights.specs.datasources.ls
+-----------------------------
+
+.. automodule:: insights.specs.datasources.ls
+    :members: list_with_la, list_with_la_filtered, list_with_lan, list_with_lan_filtered, list_with_lanL, list_with_lanR, list_with_lanRL, list_with_lanRZ, list_with_lanZ
+    :show-inheritance:
+    :undoc-members:
+
+
 insights.specs.datasources.lpstat
 ---------------------------------
 

diff --git a/docs/shared_parsers_catalog/ls.rst b/docs/shared_parsers_catalog/ls.rst
@@ -0,0 +1,3 @@
+.. automodule:: insights.parsers.ls
+   :members:
+   :show-inheritance:
diff --git a/insights/core/__init__.py b/insights/core/__init__.py
@@ -1643,6 +1643,10 @@ def __repr__(self):
 
 class FileListing(Parser):
     """
+    .. warning::
+        This class is deprecated and will be removed from 3.5.0.
+        Please use the :class:`insights.parsers.ls.FileListing` instead.
+
     Reads a series of concatenated directory listings and turns them into
     a dictionary of entities by name.  Stores all the information for
     each directory entry for every entry that can be parsed, containing:
@@ -1665,7 +1669,8 @@ class FileListing(Parser):
       directory, in the order found in the listing
     * total blocks allocated to all the entities in this directory
 
-    .. note:: For listings that only contain one directory, ``ls`` does not
+    .. note::
+        For listings that only contain one directory, ``ls`` does not
         output the directory name.  The directory is reverse engineered from
         the path given to the parser by Insights - this assumes the
         translation of spaces to underscores and '/' to '.' in paths.  For
@@ -1718,6 +1723,7 @@ def __init__(self, context):
         # the directory name in the output).  Obviously if we don't have the
         # '-R' flag we should grab this but it's probably not worth parsing
         # the flags to ls for this.
+        deprecated(FileListing, "Please use the :class:`insights.parsers.ls.FileListing instead.", "3.5.0")
         self.first_path = None
         path_re = re.compile(r'ls_-\w+(?P<path>.*)$')
         match = path_re.search(context.path)

diff --git a/insights/core/ls_parser.py b/insights/core/ls_parser.py
@@ -2,7 +2,6 @@
 This module contains logic for parsing ls output. It attempts to handle
 output when selinux is enabled or disabled and also skip "bad" lines.
 """
-import six
 
 
 def parse_path(path):
@@ -139,45 +138,13 @@ def parse_rhel8_selinux(parts):
     return result
 
 
-PASS_KEYS = set(["name", "total"])
-DELAYED_KEYS = ["entries", "files", "dirs", "specials"]
-
-
 class Directory(dict):
     def __init__(self, name, total, body):
-        data = dict.fromkeys(DELAYED_KEYS)
-        data["name"] = name
-        data["total"] = total
-        self.body = body
-        self.loaded = False
-        super(Directory, self).__init__(data)
-
-    def iteritems(self):
-        if not self.loaded:
-            self._load()
-        return six.iteritems(super(Directory, self))
-
-    def items(self):
-        if not self.loaded:
-            self._load()
-        return super(Directory, self).items()
-
-    def values(self):
-        if not self.loaded:
-            self._load()
-        return super(Directory, self).values()
-
-    def get(self, key, default=None):
-        if not self.loaded:
-            self._load()
-        return super(Directory, self).get(key, default)
-
-    def _load(self):
         dirs = []
         ents = {}
         files = []
         specials = []
-        for line in self.body:
+        for line in body:
             # we can't split(None, 5) here b/c rhel 6/7 selinux lines only have
             # 4 parts before the path, and the path itself could contain
             # spaces. Unfortunately, this means we have to split the line again
@@ -204,7 +171,7 @@ def _load(self):
             # based on its type.
             entry.update(rest)
             entry["raw_entry"] = line
-            entry["dir"] = self["name"]
+            entry["dir"] = name
             nm = entry["name"]
             ents[nm] = entry
             if typ not in "bcd":
@@ -214,19 +181,16 @@ def _load(self):
             elif typ in "bc":
                 specials.append(nm)
 
-        self.update({"entries": ents,
-                     "files": files,
-                     "dirs": dirs,
-                     "specials": specials})
-
-        self.loaded = True
-        del self.body
-
-    def __getitem__(self, key):
-        if self.loaded or key in PASS_KEYS:
-            return super(Directory, self).__getitem__(key)
-        self._load()
-        return super(Directory, self).__getitem__(key)
+        super(Directory, self).__init__(
+                {
+                    "dirs": dirs,
+                    "entries": ents,
+                    "files": files,
+                    "name": name,
+                    "specials": specials,
+                    "total": total
+                }
+        )
 
 
 def parse(lines, root=None):
@@ -249,7 +213,8 @@ def parse(lines, root=None):
     total = None
     for line in lines:
         line = line.strip()
-        if not line:
+        # Skip empty line and non-exist dir line
+        if not line or ': No such file or directory' in line:
             continue
         if line and line[0] == "/" and line[-1] == ":":
             if name is None: