diff --git a/Dockerfile b/Dockerfile index ee1590807..e3072a582 100644 --- a/Dockerfile +++ b/Dockerfile @@ -46,7 +46,6 @@ COPY --from=golang_121 /usr/local/go /usr/local/go/go1.21 COPY --from=node_223 /usr/local/lib/node_modules/corepack /usr/local/lib/corepack COPY --from=node_223 /usr/local/bin/node /usr/local/bin/node COPY --from=builder /venv /venv -COPY --from=builder /src/utils/merge_syft_sbom.py /usr/local/bin/merge_syft_sbom # link corepack, yarn, and go to standard PATH location RUN ln -s /usr/local/lib/corepack/dist/corepack.js /usr/local/bin/corepack && \ diff --git a/tests/unit/data/sboms/merged.bom.json b/tests/unit/data/sboms/merged.bom.json deleted file mode 100644 index dc876ce57..000000000 --- a/tests/unit/data/sboms/merged.bom.json +++ /dev/null @@ -1,327 +0,0 @@ -{ - "$schema": "http://cyclonedx.org/schema/bom-1.5.schema.json", - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "serialNumber": "urn:uuid:4370d1ba-7643-4579-8313-bc715da2fa90", - "version": 1, - "metadata": { - "timestamp": "2023-05-03T18:19:41Z", - "tools": { - "components": [ - { - "type": "application", - "author": "anchore", - "name": "syft", - "version": "0.100.0" - }, - { - "type": "application", - "author": "red hat", - "name": "cachi2" - } - ] - }, - "component": { - "bom-ref": "6b8edfe5f2756e0", - "type": "file", - "name": "/var/lib/containers/storage/vfs/dir/517aef0ffe20db360d19aa475dbbfbe03f452f53403881a31f9a475c83af788b" - } - }, - "components": [ - { - "bom-ref": "pkg:rpm/rhel/bash@4.4.20-4.el8_6?arch=x86_64&upstream=bash-4.4.20-4.el8_6.src.rpm&distro=rhel-8.7&package-id=5b17560161ffa050", - "type": "library", - "publisher": "Red Hat, Inc.", - "name": "bash", - "version": "4.4.20-4.el8_6", - "cpe": "cpe:2.3:a:redhat:bash:4.4.20-4.el8_6:*:*:*:*:*:*:*", - "purl": "pkg:rpm/rhel/bash@4.4.20-4.el8_6?arch=x86_64&upstream=bash-4.4.20-4.el8_6.src.rpm&distro=rhel-8.7", - "properties": [ - { - "name": "syft:package:foundBy", - "value": "rpmdb-cataloger" - }, - { - "name": "syft:package:metadataType", - "value": "RpmdbMetadata" - }, - { - "name": "syft:package:type", - "value": "rpm" - }, - { - "name": "syft:cpe23", - "value": "cpe:2.3:a:bash:bash:4.4.20-4.el8_6:*:*:*:*:*:*:*" - }, - { - "name": "syft:location:0:path", - "value": "var/lib/rpm/Packages" - }, - { - "name": "syft:metadata:release", - "value": "4.el8_6" - }, - { - "name": "syft:metadata:size", - "value": "6861444" - }, - { - "name": "syft:metadata:sourceRpm", - "value": "bash-4.4.20-4.el8_6.src.rpm" - } - ] - }, - { - "type": "operating-system", - "name": "rhel", - "version": "8.7", - "description": "Red Hat Enterprise Linux 8.7 (Ootpa)", - "cpe": "cpe:/o:redhat:enterprise_linux:8::baseos", - "swid": { - "tagId": "rhel", - "name": "rhel", - "version": "8.7" - }, - "externalReferences": [ - { - "url": "https://bugzilla.redhat.com/", - "type": "issue-tracker" - }, - { - "url": "https://www.redhat.com/", - "type": "website" - } - ], - "properties": [ - { - "name": "syft:distro:id", - "value": "rhel" - }, - { - "name": "syft:distro:idLike:0", - "value": "fedora" - }, - { - "name": "syft:distro:prettyName", - "value": "Red Hat Enterprise Linux 8.7 (Ootpa)" - }, - { - "name": "syft:distro:versionID", - "value": "8.7" - } - ] - }, - { - "name": "aiowsgi", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "0.8", - "purl": "pkg:pypi/aiowsgi@0.8", - "type": "library" - }, - { - "name": "appr", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "purl": "pkg:pypi/appr?checksum=sha256:ee6a0a38bed8cff46a562ed3620bc453141a02262ab0c8dd055824af2829ee5c&download_url=https://github.com/quay/appr/archive/37ff9a487a54ad41b59855ecd76ee092fe206a84.zip", - "type": "library" - }, - { - "name": "archive/tar", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "purl": "pkg:golang/archive/tar?type=package", - "type": "library" - }, - { - "name": "cachi2", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "0.0.1", - "purl": "pkg:pypi/cachi2@0.0.1?vcs_url=git%2Bssh://git%40github.com/containerbuildsystem/cachi2%40fc0d6079c2dc9b2a491c0848e550ad3509986110", - "type": "library" - }, - { - "name": "cachito-npm-without-deps", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "purl": "pkg:npm/cachito-npm-without-deps?vcs_url=git%2Bhttps://github.com/cachito-testing/cachito-npm-without-deps.git%402f0ce1d7b1f8b35572d919428b965285a69583f6", - "type": "library" - }, - { - "name": "code.gitea.io/sdk/gitea", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v0.15.1", - "purl": "pkg:golang/code.gitea.io/sdk/gitea@v0.15.1?type=module", - "type": "library" - }, - { - "name": "code.gitea.io/sdk/gitea", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v0.15.1", - "purl": "pkg:golang/code.gitea.io/sdk/gitea@v0.15.1?type=package", - "type": "library" - }, - { - "name": "fecha", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "purl": "pkg:npm/fecha?checksum=sha512:8ae71e98d68e38e1f6e4c629187684dd85e4dc96647c7219b1dd189598ea52865e947f0ad94a7001fa8fb5eccf58467fe34ad10066e831af3374120134604bd5&download_url=https://github.com/taylorhakes/fecha/archive/91680e4db1415fea33eac878cfd889c80a7b55c7.tar.gz", - "type": "library" - }, - { - "name": "github.com/docker/cli/cli/config", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v23.0.0-rc.3+incompatible", - "purl": "pkg:golang/github.com/docker/cli/cli/config@v23.0.0-rc.3%2Bincompatible?type=package", - "type": "library" - }, - { - "name": "github.com/docker/cli", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v23.0.0-rc.3+incompatible", - "purl": "pkg:golang/github.com/docker/cli@v23.0.0-rc.3%2Bincompatible?type=module", - "type": "library" - }, - { - "name": "knative.dev/pkg/metrics", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v0.0.0-20230125083639-408ad0773f47", - "purl": "pkg:golang/knative.dev/pkg/metrics@v0.0.0-20230125083639-408ad0773f47?type=package", - "type": "library" - }, - { - "name": "knative.dev/pkg", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v0.0.0-20230125083639-408ad0773f47", - "purl": "pkg:golang/knative.dev/pkg@v0.0.0-20230125083639-408ad0773f47?type=module", - "type": "library" - }, - { - "name": "github.com/redhat-appstudio/build-service", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v0.0.0-20230503110830-d1a9e858489d", - "purl": "pkg:golang/github.com/redhat-appstudio/build-service@v0.0.0-20230503110830-d1a9e858489d?type=module", - "type": "library" - }, - { - "name": "github.com/redhat-appstudio/build-service", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v0.0.0-20230503110830-d1a9e858489d", - "purl": "pkg:golang/github.com/redhat-appstudio/build-service@v0.0.0-20230503110830-d1a9e858489d?type=package", - "type": "library" - }, - { - "name": "github.com/cachito-testing/gomod-pandemonium/terminaltor", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v1.0.0", - "purl": "pkg:golang/github.com/cachito-testing/gomod-pandemonium/terminaltor@v1.0.0?type=module", - "type": "library" - }, - { - "name": "github.com/cachito-testing/gomod-pandemonium/terminaltor", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "v1.0.0", - "purl": "pkg:golang/github.com/cachito-testing/gomod-pandemonium/terminaltor@v1.0.0?type=package", - "type": "library" - }, - { - "name": "PyYAML", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "6.0", - "purl": "pkg:pypi/pyyaml@6.0", - "type": "library" - }, - { - "name": "test_package_cachi2", - "properties": [ - { - "name": "cachi2:found_by", - "value": "cachi2" - } - ], - "version": "1.0.0", - "purl": "pkg:pypi/test-package-cachi2@1.0.0?vcs_url=git%2Bssh://git%40github.com/brunoapimentel/pip-e2e-test.git%40294df352deed835cf703ae8a799926418ae5fd3b", - "type": "library" - } - ] -} diff --git a/tests/unit/test_merge_syft_sbom.py b/tests/unit/test_merge_syft_sbom.py deleted file mode 100644 index 6329be85f..000000000 --- a/tests/unit/test_merge_syft_sbom.py +++ /dev/null @@ -1,129 +0,0 @@ -import json -from pathlib import Path -from typing import Any - -import pytest - -from utils.merge_syft_sbom import merge_sboms - -TOOLS_METADATA = { - "syft-cyclonedx-1.4": { - "name": "syft", - "vendor": "anchore", - "version": "0.47.0", - }, - "syft-cyclonedx-1.5": { - "type": "application", - "author": "anchore", - "name": "syft", - "version": "0.100.0", - }, - "cachi2-cyclonedx-1.4": { - "name": "cachi2", - "vendor": "red hat", - }, - "cachi2-cyclonedx-1.5": { - "type": "application", - "author": "red hat", - "name": "cachi2", - }, -} - - -def test_merge_sboms(data_dir: Path) -> None: - result = merge_sboms(f"{data_dir}/sboms/cachi2.bom.json", f"{data_dir}/sboms/syft.bom.json") - - with open(f"{data_dir}/sboms/merged.bom.json") as file: - expected_sbom = json.load(file) - - assert json.loads(result) == expected_sbom - - -@pytest.mark.parametrize( - "syft_tools_metadata, expected_result", - [ - ( - [TOOLS_METADATA["syft-cyclonedx-1.4"]], - [ - TOOLS_METADATA["syft-cyclonedx-1.4"], - TOOLS_METADATA["cachi2-cyclonedx-1.4"], - ], - ), - ( - { - "components": [TOOLS_METADATA["syft-cyclonedx-1.5"]], - }, - { - "components": [ - TOOLS_METADATA["syft-cyclonedx-1.5"], - TOOLS_METADATA["cachi2-cyclonedx-1.5"], - ], - }, - ), - ], -) -def test_merging_tools_metadata( - syft_tools_metadata: str, expected_result: Any, tmpdir: Path -) -> None: - syft_sbom = { - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "metadata": { - "tools": syft_tools_metadata, - }, - "components": [], - } - - cachi2_sbom = { - "bomFormat": "CycloneDX", - "specVersion": "1.4", - "metadata": { - "tools": [TOOLS_METADATA["cachi2-cyclonedx-1.4"]], - }, - "components": [], - } - - syft_sbom_path = f"{tmpdir}/syft.bom.json" - cachi2_sbom_path = f"{tmpdir}/cachi2.bom.json" - - with open(syft_sbom_path, "w") as file: - json.dump(syft_sbom, file) - - with open(cachi2_sbom_path, "w") as file: - json.dump(cachi2_sbom, file) - - result = merge_sboms(cachi2_sbom_path, syft_sbom_path) - - assert json.loads(result)["metadata"]["tools"] == expected_result - - -def test_invalid_tools_format(tmpdir: Path) -> None: - syft_sbom = { - "bomFormat": "CycloneDX", - "specVersion": "1.5", - "metadata": { - "tools": "invalid", - }, - "components": [], - } - - cachi2_sbom = { - "bomFormat": "CycloneDX", - "specVersion": "1.4", - "metadata": { - "tools": [TOOLS_METADATA["cachi2-cyclonedx-1.4"]], - }, - "components": [], - } - - syft_sbom_path = f"{tmpdir}/syft.bom.json" - cachi2_sbom_path = f"{tmpdir}/cachi2.bom.json" - - with open(syft_sbom_path, "w") as file: - json.dump(syft_sbom, file) - - with open(cachi2_sbom_path, "w") as file: - json.dump(cachi2_sbom, file) - - with pytest.raises(RuntimeError): - merge_sboms(cachi2_sbom_path, syft_sbom_path) diff --git a/tox.ini b/tox.ini index 6ef05cb1c..e73da52bc 100644 --- a/tox.ini +++ b/tox.ini @@ -36,7 +36,7 @@ commands_post = [testenv:flake8] skip_install = true commands = - flake8 cachi2 tests utils + flake8 cachi2 tests [testenv:bandit] skip_install = true @@ -47,16 +47,16 @@ commands = description = black checks [Mandatory] skip_install = true commands = - black --check --diff cachi2 tests utils + black --check --diff cachi2 tests [testenv:isort] skip_install = true commands = - isort --check --diff --color cachi2 tests utils + isort --check --diff --color cachi2 tests [testenv:mypy] commands = - mypy --install-types --non-interactive cachi2 tests utils + mypy --install-types --non-interactive cachi2 tests [testenv:integration] passenv = diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/utils/merge_syft_sbom.py b/utils/merge_syft_sbom.py deleted file mode 100755 index 8cd53c349..000000000 --- a/utils/merge_syft_sbom.py +++ /dev/null @@ -1,186 +0,0 @@ -#!/usr/bin/env python3 -import json -from argparse import ArgumentParser -from typing import Any, Callable -from urllib.parse import quote_plus, urlsplit - - -def _is_syft_local_golang_component(component: dict) -> bool: - """ - Check if a Syft Golang reported component is a local replacement. - - Local replacements are reported in a very different way by Cachi2, which is why the same - reports by Syft should be removed. - """ - return component.get("purl", "").startswith("pkg:golang") and ( - component.get("name", "").startswith(".") or component.get("version", "") == "(devel)" - ) - - -def _is_cachi2_non_registry_dependency(component: dict) -> bool: - """ - Check if Cachi2 component was fetched from a VCS or a direct file location. - - Cachi2 reports non-registry components in a different way from Syft, so the reports from - Syft need to be removed. - - Unfortunately, there's no way to determine which components are non-registry by looking - at the Syft report alone. This function is meant to create a list of non-registry components - from Cachi2's SBOM, then remove the corresponding ones reported by Syft for the merged SBOM. - - Note that this function is only applicable for PyPI or NPM components. - """ - purl = component.get("purl", "") - - return (purl.startswith("pkg:pypi") or purl.startswith("pkg:npm")) and ( - "vcs_url=" in purl or "download_url=" in purl - ) - - -def _unique_key_cachi2(component: dict) -> str: - """ - Create a unique key from Cachi2 reported components. - - This is done by taking a purl and removing any qualifiers and subpaths. - - See https://github.com/package-url/purl-spec/tree/master#purl for more info on purls. - """ - url = urlsplit(component["purl"]) - return url.scheme + ":" + url.path - - -def _unique_key_syft(component: dict) -> str: - """ - Create a unique key for Syft reported components. - - This is done by taking a lowercase namespace/name, and URL encoding the version. - - Syft does not set any qualifier for NPM, Pip or Golang, so there's no need to remove them - as done in _unique_key_cachi2. - - If a Syft component lacks a purl (e.g. type OS), we'll use its name and version instead. - """ - if "purl" not in component: - return component.get("name", "") + "@" + component.get("version", "") - - if "@" in component["purl"]: - name, version = component["purl"].split("@") - - if name.startswith("pkg:pypi"): - name = name.lower() - - if name.startswith("pkg:golang"): - version = quote_plus(version) - - return f"{name}@{version}" - else: - return component["purl"] - - -def _get_syft_component_filter(cachi_sbom_components: list[dict[str, Any]]) -> Callable: - """ - Get a function that filters out Syft components for the merged SBOM. - - This function currently considers a Syft component as a duplicate/removable if: - - it has the same key as a Cachi2 component - - it is a local Golang replacement - - is a non-registry component also reported by Cachi2 - - Note that for the last bullet, we can only rely on the Pip dependency's name to find a - duplicate. This is because Cachi2 does not report a non-PyPI Pip dependency's version. - - Even though multiple versions of a same dependency can be available in the same project, - we are removing all Syft instances by name only because Cachi2 will report them correctly, - given that it scans all the source code properly and the image is built hermetically. - """ - cachi2_non_registry_components = [ - component["name"] - for component in cachi_sbom_components - if _is_cachi2_non_registry_dependency(component) - ] - - cachi2_indexed_components = { - _unique_key_cachi2(component): component for component in cachi_sbom_components - } - - def is_duplicate_non_registry_component(component: dict[str, Any]) -> bool: - return component["name"] in cachi2_non_registry_components - - def component_is_duplicated(component: dict[str, Any]) -> bool: - key = _unique_key_syft(component) - - return ( - _is_syft_local_golang_component(component) - or is_duplicate_non_registry_component(component) - or key in cachi2_indexed_components.keys() - ) - - return component_is_duplicated - - -def _merge_tools_metadata(syft_sbom: dict[Any, Any], cachi2_sbom: dict[Any, Any]) -> None: - """Merge the content of tools in the metadata section of the SBOM. - - With CycloneDX 1.5, a new format for specifying tools was introduced, and the format from 1.4 - was marked as deprecated. - - This function aims to support both formats in the Syft SBOM. We're assuming the Cachi2 SBOM - was generated with the same version as this script, and it will be in the older format. - """ - syft_tools = syft_sbom["metadata"]["tools"] - cachi2_tools = cachi2_sbom["metadata"]["tools"] - - if isinstance(syft_tools, dict): - components = [] - - for t in cachi2_tools: - components.append( - { - "author": t["vendor"], - "name": t["name"], - "type": "application", - } - ) - - syft_tools["components"].extend(components) - elif isinstance(syft_tools, list): - syft_tools.extend(cachi2_tools) - else: - raise RuntimeError( - "The .metadata.tools JSON key is in an unexpected format. " - f"Expected dict or list, got {type(syft_tools)}." - ) - - -def merge_sboms(cachi2_sbom_path: str, syft_sbom_path: str) -> str: - """Merge Cachi2 components into the Syft SBOM while removing duplicates.""" - with open(cachi2_sbom_path) as file: - cachi2_sbom = json.load(file) - - with open(syft_sbom_path) as file: - syft_sbom = json.load(file) - - is_duplicate_component = _get_syft_component_filter(cachi2_sbom["components"]) - - filtered_syft_components = [ - c for c in syft_sbom.get("components", []) if not is_duplicate_component(c) - ] - - syft_sbom["components"] = filtered_syft_components + cachi2_sbom["components"] - - _merge_tools_metadata(syft_sbom, cachi2_sbom) - - return json.dumps(syft_sbom, indent=2) - - -if __name__ == "__main__": - parser = ArgumentParser() - - parser.add_argument("cachi2_sbom_path") - parser.add_argument("syft_sbom_path") - - args = parser.parse_args() - - merged_sbom = merge_sboms(args.cachi2_sbom_path, args.syft_sbom_path) - - print(merged_sbom)