From 0a716791f086834c7771be8bdcabdb0746ee838c Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Thu, 15 Jun 2023 14:06:02 -0700 Subject: [PATCH 1/7] Start of Java info extraction --- pyproject.toml | 3 +- surfactant/infoextractors/java_file.py | 71 ++++++++++++++++++++++++++ surfactant/plugin/manager.py | 2 + 3 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 surfactant/infoextractors/java_file.py diff --git a/pyproject.toml b/pyproject.toml index 8a07a9f2..62888e89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,8 @@ dependencies = [ "defusedxml", "spdx-tools>=0.7.1,==0.7.*", "pluggy", - "click" + "click", + "javatools" ] dynamic = ["version"] diff --git a/surfactant/infoextractors/java_file.py b/surfactant/infoextractors/java_file.py new file mode 100644 index 00000000..ba94fc57 --- /dev/null +++ b/surfactant/infoextractors/java_file.py @@ -0,0 +1,71 @@ +import surfactant.plugin +from surfactant.sbomtypes import SBOM, Software +from typing import Any, Dict +import javatools.jarinfo + +# TODO: Add documentation about how to install javatools +# swig and libssl-dev needs to be install on Ubuntu +# https://gitlab.com/m2crypto/m2crypto/-/blob/master/INSTALL.rst + +# TODO: Pull some files off of Maven (or something) to test on + +def supports_file(filetype: str) -> bool: + return filetype in ("JAVACLASS", "JAR", "WAR", "EAR") + + +@surfactant.plugin.hookimpl +def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: str) -> object: + if not supports_file(filetype): + return None + return extract_java_info(filename, filetype) + +# Map from internal major number to Java SE version +# https://docs.oracle.com/javase/specs/jvms/se20/html/jvms-4.html#jvms-4.1-200-B.2 +_JAVA_VERSION_MAPPING = { + 45: "1.1", + 46: "1.2", + 47: "1.3", + 48: "1.4", + 49: "5.0", + 50: "6", + 51: "7", + 52: "8", + 53: "9", + 54: "10", + 55: "11", + 56: "12", + 57: "13", + 58: "14", + 59: "15", + 60: "16", + 61: "17", + 62: "18", + 63: "19", + 64: "20", +} + +def handle_java_class(info: Dict[str, Any], class_info: javatools.JavaClassInfo): + # This shouldn't happen but just in-case it does don't overwrite information + if class_info.get_this() in info["classes"]: + return + info["classes"][class_info.get_this()] = {} + add_to = info["classes"][class_info.get_this()] + (major_version, _) = class_info.get_version() + if major_version in _JAVA_VERSION_MAPPING: + add_to["JavaMinSEVersion"] = _JAVA_VERSION_MAPPING[major_version] + add_to["JavaExports"] = [*class_info.get_provides()] + add_to["JavaImports"] = [*class_info.get_requires()] + + +def extract_java_info(filename: str, filetype: str) -> object: + info = {"classes": {}} + if filetype in ("JAR", "EAR", "WAR"): + with javatools.jarinfo.JarInfo(filename) as jarinfo: + for class_ in jarinfo.get_classes(): + handle_java_class(info, jarinfo.get_classinfo(class_)) + elif filetype == "JAVACLASS": + with open(filename, "rb") as f: + class_info = javatools.JavaClassInfo() + class_info.unpack(javatools.unpack(f)) + handle_java_class(info, class_info) + return info diff --git a/surfactant/plugin/manager.py b/surfactant/plugin/manager.py index 8a5b94b3..9a878b93 100644 --- a/surfactant/plugin/manager.py +++ b/surfactant/plugin/manager.py @@ -15,6 +15,7 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: a_out_file, coff_file, elf_file, + java_file, ole_file, pe_file, ) @@ -31,6 +32,7 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: a_out_file, coff_file, elf_file, + java_file, pe_file, ole_file, dotnet_relationship, From 795965fdf3d6e589e8095d45062aba2b56db4cef Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Thu, 22 Jun 2023 01:46:50 -0700 Subject: [PATCH 2/7] Relationships for Java files. --- surfactant/infoextractors/java_file.py | 24 +++++----- surfactant/plugin/manager.py | 2 + surfactant/relationships/java_relationship.py | 44 +++++++++++++++++++ 3 files changed, 60 insertions(+), 10 deletions(-) create mode 100644 surfactant/relationships/java_relationship.py diff --git a/surfactant/infoextractors/java_file.py b/surfactant/infoextractors/java_file.py index ba94fc57..8add07bf 100644 --- a/surfactant/infoextractors/java_file.py +++ b/surfactant/infoextractors/java_file.py @@ -4,11 +4,9 @@ import javatools.jarinfo # TODO: Add documentation about how to install javatools -# swig and libssl-dev needs to be install on Ubuntu +# swig and libssl-dev needs to be installed on Ubuntu # https://gitlab.com/m2crypto/m2crypto/-/blob/master/INSTALL.rst -# TODO: Pull some files off of Maven (or something) to test on - def supports_file(filetype: str) -> bool: return filetype in ("JAVACLASS", "JAR", "WAR", "EAR") @@ -46,19 +44,25 @@ def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: s def handle_java_class(info: Dict[str, Any], class_info: javatools.JavaClassInfo): # This shouldn't happen but just in-case it does don't overwrite information - if class_info.get_this() in info["classes"]: + if class_info.get_this() in info["javaClasses"]: return - info["classes"][class_info.get_this()] = {} - add_to = info["classes"][class_info.get_this()] + info["javaClasses"][class_info.get_this()] = {} + add_to = info["javaClasses"][class_info.get_this()] (major_version, _) = class_info.get_version() if major_version in _JAVA_VERSION_MAPPING: - add_to["JavaMinSEVersion"] = _JAVA_VERSION_MAPPING[major_version] - add_to["JavaExports"] = [*class_info.get_provides()] - add_to["JavaImports"] = [*class_info.get_requires()] + add_to["javaMinSEVersion"] = _JAVA_VERSION_MAPPING[major_version] + add_to["javaExports"] = [*class_info.get_provides()] + # I've seen this fail for some reason; catch errors on it and just ignore + # them if it fails + try: + add_to["javaImports"] = [*class_info.get_requires()] + except IndexError: + # Should this be set to "Unknown" or similar? + add_to["javaImports"] = [] def extract_java_info(filename: str, filetype: str) -> object: - info = {"classes": {}} + info = {"javaClasses": {}} if filetype in ("JAR", "EAR", "WAR"): with javatools.jarinfo.JarInfo(filename) as jarinfo: for class_ in jarinfo.get_classes(): diff --git a/surfactant/plugin/manager.py b/surfactant/plugin/manager.py index 9a878b93..f193476a 100644 --- a/surfactant/plugin/manager.py +++ b/surfactant/plugin/manager.py @@ -23,6 +23,7 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: from surfactant.relationships import ( dotnet_relationship, elf_relationship, + java_relationship, pe_relationship, ) @@ -37,6 +38,7 @@ def _register_plugins(pm: pluggy.PluginManager) -> None: ole_file, dotnet_relationship, elf_relationship, + java_relationship, pe_relationship, csv_writer, cytrics_writer, diff --git a/surfactant/relationships/java_relationship.py b/surfactant/relationships/java_relationship.py new file mode 100644 index 00000000..3686f8dd --- /dev/null +++ b/surfactant/relationships/java_relationship.py @@ -0,0 +1,44 @@ +from typing import * + +import surfactant.plugin +from surfactant.sbomtypes import SBOM, Relationship, Software + +def has_required_fields(metadata) -> bool: + return "javaClasses" in metadata + + +class _ExportDict: + created = False + supplied_by = {} + + +def _create_export_dict(sbom: SBOM): + if _ExportDict.created: + return + for software_entry in sbom.software: + for metadata in software_entry.metadata: + if "javaClasses" in metadata: + for class_info in metadata["javaClasses"].values(): + for export in class_info["javaExports"]: + _ExportDict.supplied_by[export] = software_entry.UUID + _ExportDict.created = True + + +@surfactant.plugin.hookimpl +def establish_relationships( + sbom: SBOM, software: Software, metadata +) -> Optional[List[Relationship]]: + if not has_required_fields(metadata): + return None + _create_export_dict(sbom) + relationships = [] + dependant_uuid = software.UUID + for class_info in metadata["javaClasses"].values(): + for import_ in class_info["javaImports"]: + if import_ in _ExportDict.supplied_by: + supplier_uuid = _ExportDict.supplied_by[import_] + if supplier_uuid != dependant_uuid: + rel = Relationship(dependant_uuid, supplier_uuid, "Uses") + if rel not in relationships: + relationships.append(rel) + return relationships From 32b7e7acbebd3c286c19706660f7f0337e51d6d7 Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Mon, 26 Jun 2023 12:47:04 -0700 Subject: [PATCH 3/7] Remove start import --- surfactant/relationships/java_relationship.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/surfactant/relationships/java_relationship.py b/surfactant/relationships/java_relationship.py index 3686f8dd..56bf8049 100644 --- a/surfactant/relationships/java_relationship.py +++ b/surfactant/relationships/java_relationship.py @@ -1,4 +1,4 @@ -from typing import * +from typing import Optional, List import surfactant.plugin from surfactant.sbomtypes import SBOM, Relationship, Software From 69425ac582a42a931e92de0578f3934c7f2e21f7 Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Wed, 5 Jul 2023 09:57:20 -0700 Subject: [PATCH 4/7] Java relationship unit test. --- tests/relationships/test_java.py | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 tests/relationships/test_java.py diff --git a/tests/relationships/test_java.py b/tests/relationships/test_java.py new file mode 100644 index 00000000..05a5ec88 --- /dev/null +++ b/tests/relationships/test_java.py @@ -0,0 +1,52 @@ +# Copyright 2023 Lawrence Livermore National Security, LLC +# See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: MIT + +from surfactant.plugin.manager import get_plugin_manager +from surfactant.sbomtypes import SBOM, Relationship, Software + +sbom = SBOM( + software=[ + Software( + UUID="supplier", + fileName=["supplier"], + installPath=["supplier"], + metadata=[ + { + "javaClasses": { + "dummy": { + "javaExports": [ + "someFunc():void" + ] + } + } + } + ], + ), + Software( + UUID="consumer", + fileName=["consumer"], + installPath=["consumer"], + metadata=[ + { + "javaClasses": { + "dummy": { + "javaExports": [], + "javaImports": [ + "someFunc():void" + ], + }, + }, + }, + ], + ), + ], + relationships=[], +) + +def test_java_relationship(): + javaPlugin = get_plugin_manager().get_plugin("surfactant.relationships.java_relationship") + sw = sbom.software[1] + md = sw.metadata[0] + assert javaPlugin.establish_relationships(sbom, sw, md) == [Relationship("consumer", "supplier", "Uses")] From 208cb895975936741e5dc4e0ff0a4f43258a145d Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Wed, 5 Jul 2023 10:22:07 -0700 Subject: [PATCH 5/7] Move create_export_dict to class method of _ExportDict --- surfactant/relationships/java_relationship.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/surfactant/relationships/java_relationship.py b/surfactant/relationships/java_relationship.py index 56bf8049..195efeff 100644 --- a/surfactant/relationships/java_relationship.py +++ b/surfactant/relationships/java_relationship.py @@ -11,17 +11,17 @@ class _ExportDict: created = False supplied_by = {} - -def _create_export_dict(sbom: SBOM): - if _ExportDict.created: - return - for software_entry in sbom.software: - for metadata in software_entry.metadata: - if "javaClasses" in metadata: - for class_info in metadata["javaClasses"].values(): - for export in class_info["javaExports"]: - _ExportDict.supplied_by[export] = software_entry.UUID - _ExportDict.created = True + @classmethod + def create_export_dict(cls, sbom: SBOM): + if cls.created: + return + for software_entry in sbom.software: + for metadata in software_entry.metadata: + if "javaClasses" in metadata: + for class_info in metadata["javaClasses"].values(): + for export in class_info["javaExports"]: + cls.supplied_by[export] = software_entry.UUID + cls.created = True @surfactant.plugin.hookimpl @@ -30,7 +30,7 @@ def establish_relationships( ) -> Optional[List[Relationship]]: if not has_required_fields(metadata): return None - _create_export_dict(sbom) + _ExportDict.create_export_dict(sbom) relationships = [] dependant_uuid = software.UUID for class_info in metadata["javaClasses"].values(): From 4ff618eaa5a16a0e1e5348e87e32798fc522cab9 Mon Sep 17 00:00:00 2001 From: Kendall Harter Date: Tue, 11 Jul 2023 10:00:46 -0700 Subject: [PATCH 6/7] Move more Java import logic into class --- surfactant/infoextractors/java_file.py | 9 ++++++-- surfactant/relationships/java_relationship.py | 12 ++++++++--- tests/relationships/test_java.py | 21 ++++++------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/surfactant/infoextractors/java_file.py b/surfactant/infoextractors/java_file.py index 8add07bf..72e5a3e9 100644 --- a/surfactant/infoextractors/java_file.py +++ b/surfactant/infoextractors/java_file.py @@ -1,12 +1,15 @@ -import surfactant.plugin -from surfactant.sbomtypes import SBOM, Software from typing import Any, Dict + import javatools.jarinfo +import surfactant.plugin +from surfactant.sbomtypes import SBOM, Software + # TODO: Add documentation about how to install javatools # swig and libssl-dev needs to be installed on Ubuntu # https://gitlab.com/m2crypto/m2crypto/-/blob/master/INSTALL.rst + def supports_file(filetype: str) -> bool: return filetype in ("JAVACLASS", "JAR", "WAR", "EAR") @@ -17,6 +20,7 @@ def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: s return None return extract_java_info(filename, filetype) + # Map from internal major number to Java SE version # https://docs.oracle.com/javase/specs/jvms/se20/html/jvms-4.html#jvms-4.1-200-B.2 _JAVA_VERSION_MAPPING = { @@ -42,6 +46,7 @@ def extract_file_info(sbom: SBOM, software: Software, filename: str, filetype: s 64: "20", } + def handle_java_class(info: Dict[str, Any], class_info: javatools.JavaClassInfo): # This shouldn't happen but just in-case it does don't overwrite information if class_info.get_this() in info["javaClasses"]: diff --git a/surfactant/relationships/java_relationship.py b/surfactant/relationships/java_relationship.py index 195efeff..46e42153 100644 --- a/surfactant/relationships/java_relationship.py +++ b/surfactant/relationships/java_relationship.py @@ -1,8 +1,9 @@ -from typing import Optional, List +from typing import List, Optional import surfactant.plugin from surfactant.sbomtypes import SBOM, Relationship, Software + def has_required_fields(metadata) -> bool: return "javaClasses" in metadata @@ -23,6 +24,12 @@ def create_export_dict(cls, sbom: SBOM): cls.supplied_by[export] = software_entry.UUID cls.created = True + @classmethod + def get_supplier(cls, import_name: str) -> Optional[str]: + if import_name in cls.supplied_by: + return cls.supplied_by[import_name] + return None + @surfactant.plugin.hookimpl def establish_relationships( @@ -35,8 +42,7 @@ def establish_relationships( dependant_uuid = software.UUID for class_info in metadata["javaClasses"].values(): for import_ in class_info["javaImports"]: - if import_ in _ExportDict.supplied_by: - supplier_uuid = _ExportDict.supplied_by[import_] + if supplier_uuid := _ExportDict.get_supplier(import_): if supplier_uuid != dependant_uuid: rel = Relationship(dependant_uuid, supplier_uuid, "Uses") if rel not in relationships: diff --git a/tests/relationships/test_java.py b/tests/relationships/test_java.py index 05a5ec88..7a8252cf 100644 --- a/tests/relationships/test_java.py +++ b/tests/relationships/test_java.py @@ -12,17 +12,7 @@ UUID="supplier", fileName=["supplier"], installPath=["supplier"], - metadata=[ - { - "javaClasses": { - "dummy": { - "javaExports": [ - "someFunc():void" - ] - } - } - } - ], + metadata=[{"javaClasses": {"dummy": {"javaExports": ["someFunc():void"]}}}], ), Software( UUID="consumer", @@ -33,9 +23,7 @@ "javaClasses": { "dummy": { "javaExports": [], - "javaImports": [ - "someFunc():void" - ], + "javaImports": ["someFunc():void"], }, }, }, @@ -45,8 +33,11 @@ relationships=[], ) + def test_java_relationship(): javaPlugin = get_plugin_manager().get_plugin("surfactant.relationships.java_relationship") sw = sbom.software[1] md = sw.metadata[0] - assert javaPlugin.establish_relationships(sbom, sw, md) == [Relationship("consumer", "supplier", "Uses")] + assert javaPlugin.establish_relationships(sbom, sw, md) == [ + Relationship("consumer", "supplier", "Uses") + ] From 135b2adf274ab534f3ecc8590dfaed405ce10a0b Mon Sep 17 00:00:00 2001 From: Ryan Mast Date: Thu, 27 Jul 2023 13:00:53 -0700 Subject: [PATCH 7/7] Pin javatools to version >=1.6.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 62888e89..3fae8854 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "spdx-tools>=0.7.1,==0.7.*", "pluggy", "click", - "javatools" + "javatools>=1.6.0" ] dynamic = ["version"]