From 330a7836f32baa57e6b276a53c1cfff7448af441 Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Mon, 30 Sep 2024 08:51:37 -0700 Subject: [PATCH] Add include/exclude extensions options --- README.md | 6 ++++-- docs/configuration_files.md | 6 ++++-- surfactant/cmd/generate.py | 8 +++++++- surfactant/config.py | 2 ++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ba6891e8..bb27db8b 100644 --- a/README.md +++ b/README.md @@ -142,8 +142,10 @@ A configuration file for a sample contains the information about the sample to g **extractPaths**: (required) the absolute path or relative path from location of current working directory that `surfactant` is being run from to the sample folders, cannot be a file (Note that even on Windows, Unix style `/` directory separators should be used in paths)\ **archive**: (optional) the full path, including file name, of the zip, exe installer, or other archive file that the folders in **extractPaths** were extracted from. This is used to collect metadata about the overall sample and will be added as a "Contains" relationship to all software entries found in the various **extractPaths**\ -**installPrefix**: (optional) where the files in **extractPaths** would be if installed correctly on an actual system i.e. "C:/", "C:/Program Files/", etc (Note that even on Windows, Unix style `/` directory separators should be used in the path). If not given then the **extractPaths** will be used as the install paths -**includeAllFiles**: (optional) If present and set to true, include all files in the SBOM, rather than only those recognized by Surfactant. +**installPrefix**: (optional) where the files in **extractPaths** would be if installed correctly on an actual system i.e. "C:/", "C:/Program Files/", etc (Note that even on Windows, Unix style `/` directory separators should be used in the path). If not given then the **extractPaths** will be used as the install\ +**includeAllFiles**: (optional) If present and set to true, include all files in the SBOM, rather than only those recognized by Surfactant\ +**includeFileExts**: (optional) A list of file extensions to include, even if not recognized by Surfactant\ +**excludeFileExts**: (optional) A list of file extensions to exclude, even if recognized by Surfactant. Note that setting both this and includeAllFiles will still exclude the specified extensions #### Create config command diff --git a/docs/configuration_files.md b/docs/configuration_files.md index 4aed84fa..493ffdf7 100644 --- a/docs/configuration_files.md +++ b/docs/configuration_files.md @@ -43,8 +43,10 @@ A sample configuration file contains the information about the sample to gather **extractPaths**: (required) the absolute path or relative path from location of current working directory that `surfactant` is being run from to the sample folders, cannot be a file (Note that even on Windows, Unix style `/` directory separators should be used in paths)\ **archive**: (optional) the full path, including file name, of the zip, exe installer, or other archive file that the folders in **extractPaths** were extracted from. This is used to collect metadata about the overall sample and will be added as a "Contains" relationship to all software entries found in the various **extractPaths**\ -**installPrefix**: (optional) where the files in **extractPaths** would be if installed correctly on an actual system i.e. "C:/", "C:/Program Files/", etc (Note that even on Windows, Unix style `/` directory separators should be used in the path). If not given then the **extractPaths** will be used as the install paths -**includeAllFiles**: (optional) If present and set to true, include all files in the SBOM, rather than only those recognized by Surfactant. +**installPrefix**: (optional) where the files in **extractPaths** would be if installed correctly on an actual system i.e. "C:/", "C:/Program Files/", etc (Note that even on Windows, Unix style `/` directory separators should be used in the path). If not given then the **extractPaths** will be used as the install paths\ +**includeAllFiles**: (optional) If present and set to true, include all files in the SBOM, rather than only those recognized by Surfactant\ +**includeFileExts**: (optional) A list of file extensions to include, even if not recognized by Surfactant\ +**excludeFileExts**: (optional) A list of file extensions to exclude, even if recognized by Surfactant. Note that setting both this and includeAllFiles will still exclude the specified extensions ## Example configuration files diff --git a/surfactant/cmd/generate.py b/surfactant/cmd/generate.py index d8151c21..4e0f25fa 100644 --- a/surfactant/cmd/generate.py +++ b/surfactant/cmd/generate.py @@ -431,10 +431,16 @@ def sbom( except Exception as e: raise RuntimeError(f"Unable to process: {filepath}") from e + if not entry.includeFileExts: + entry.includeFileExts = [] + if not entry.excludeFileExts: + entry.excludeFileExts = [] + if ( ftype := pm.hook.identify_file_type(filepath=filepath) or include_all_files - ): + or os.path.splitext(filepath)[1] in entry.includeFileExts + ) and os.path.splitext(filepath)[1] not in entry.excludeFileExts: try: sw_parent, sw_children = get_software_entry( context, diff --git a/surfactant/config.py b/surfactant/config.py index d2fe2e35..7cd6b2d7 100644 --- a/surfactant/config.py +++ b/surfactant/config.py @@ -12,3 +12,5 @@ class ContextEntry: archive: Optional[str] = None installPrefix: Optional[str] = None includeAllFiles: Optional[bool] = None + includeFileExts: Optional[List[str]] = None + excludeFileExts: Optional[List[str]] = None