diff --git a/CHANGELOG.md b/CHANGELOG.md index 33449065..320b0840 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,11 @@ All versions prior to 0.0.9 are untracked. provided in a requirements file are checked against those reported by PyPI ([#229](https://github.com/trailofbits/pip-audit/pull/229)) +* Vulnerability sources: `pip-audit` now uniques each result based on its + alias set, reducing the amount of duplicate information in the default + columnar output format + ([#232](https://github.com/trailofbits/pip-audit/pull/232)) + ### Fixed * CLI: A regression causing excess output during `pip audit -r` diff --git a/pip_audit/_audit.py b/pip_audit/_audit.py index cf330cda..a3bf08df 100644 --- a/pip_audit/_audit.py +++ b/pip_audit/_audit.py @@ -4,7 +4,7 @@ import logging from dataclasses import dataclass -from typing import Iterator, List, Tuple +from typing import Iterator, List, Set, Tuple from pip_audit._dependency_source import DependencySource from pip_audit._service import Dependency, VulnerabilityResult, VulnerabilityService @@ -49,6 +49,12 @@ def audit( ) -> Iterator[Tuple[Dependency, List[VulnerabilityResult]]]: """ Perform the auditing step, collecting dependencies from `source`. + + Individual vulnerability results are uniqued based on their `aliases` sets: + any two results for the same dependency that share an alias are collapsed + into a single result with a union of all aliases. + + `PYSEC`-identified results are given priority over other results. """ specs = source.collect() @@ -57,4 +63,32 @@ def audit( logger.info(f"Dry run: would have audited {len(list(specs))} packages") return {} else: - yield from self._service.query_all(specs) + for dep, vulns in self._service.query_all(specs): + unique_vulns: List[VulnerabilityResult] = [] + seen_aliases: Set[str] = set() + + # First pass, add all PYSEC vulnerabilities and track their + # alias sets. + for v in vulns: + if not v.id.startswith("PYSEC"): + continue + + seen_aliases.update(v.aliases | {v.id}) + unique_vulns.append(v) + + # Second pass: add any non-PYSEC vulnerabilities. + for v in vulns: + # If we've already seen this vulnerability by another name, + # don't add it. Instead, find the previous result and update + # its alias set. + if seen_aliases.intersection(v.aliases | {v.id}): + idx, previous = next( + (i, p) for (i, p) in enumerate(unique_vulns) if p.alias_of(v) + ) + unique_vulns[idx] = previous.merge_aliases(v) + continue + + seen_aliases.update(v.aliases | {v.id}) + unique_vulns.append(v) + + yield (dep, unique_vulns) diff --git a/pip_audit/_service/interface.py b/pip_audit/_service/interface.py index b34b5ca5..f4209a5d 100644 --- a/pip_audit/_service/interface.py +++ b/pip_audit/_service/interface.py @@ -3,9 +3,11 @@ of vulnerability information for fully resolved Python packages. """ +from __future__ import annotations + from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Dict, Iterator, List, Tuple +from typing import Dict, Iterator, List, Set, Tuple from packaging.utils import canonicalize_name from packaging.version import Version @@ -88,6 +90,30 @@ class VulnerabilityResult: A list of versions that can be upgraded to that resolve the vulnerability. """ + aliases: Set[str] + """ + A set of aliases (alternative identifiers) for this result. + """ + + def alias_of(self, other: VulnerabilityResult) -> bool: + """ + Returns whether this result is an "alias" of another result. + + Two results are said to be aliases if their respective sets of + `{id, *aliases}` intersect at all. A result is therefore its own alias. + """ + return bool((self.aliases | {self.id}).intersection(other.aliases | {other.id})) + + def merge_aliases(self, other: VulnerabilityResult) -> VulnerabilityResult: + """ + Merge `other`'s aliases into this result, returning a new result. + """ + + # Our own ID should never occur in the alias set. + return VulnerabilityResult( + self.id, self.description, self.fix_versions, self.aliases | other.aliases - {self.id} + ) + class VulnerabilityService(ABC): """ diff --git a/pip_audit/_service/osv.py b/pip_audit/_service/osv.py index e1a3c7ca..cfbd56fa 100644 --- a/pip_audit/_service/osv.py +++ b/pip_audit/_service/osv.py @@ -78,6 +78,7 @@ def query(self, spec: Dependency) -> Tuple[Dependency, List[VulnerabilityResult] for vuln in response_json["vulns"]: id = vuln["id"] description = vuln["details"] + aliases = set(vuln["aliases"]) fix_versions: List[Version] = [] for affected in vuln["affected"]: pkg = affected["package"] @@ -100,6 +101,6 @@ def query(self, spec: Dependency) -> Tuple[Dependency, List[VulnerabilityResult] # The ranges aren't guaranteed to come in chronological order fix_versions.sort() - results.append(VulnerabilityResult(id, description, fix_versions)) + results.append(VulnerabilityResult(id, description, fix_versions, aliases)) return spec, results diff --git a/pip_audit/_service/pypi.py b/pip_audit/_service/pypi.py index 0b69c80b..2151230b 100644 --- a/pip_audit/_service/pypi.py +++ b/pip_audit/_service/pypi.py @@ -110,6 +110,8 @@ def query(self, spec: Dependency) -> Tuple[Dependency, List[VulnerabilityResult] # The ranges aren't guaranteed to come in chronological order fix_versions.sort() - results.append(VulnerabilityResult(v["id"], v["details"], fix_versions)) + results.append( + VulnerabilityResult(v["id"], v["details"], fix_versions, set(v["aliases"])) + ) return spec, results diff --git a/test/conftest.py b/test/conftest.py index 5234f472..e8bfbc31 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -35,6 +35,7 @@ def query(self, spec): id="fake-id", description="this is not a real result", fix_versions=[fixed], + aliases=set(), ) ] diff --git a/test/format/conftest.py b/test/format/conftest.py index 189589f9..3fa36c31 100644 --- a/test/format/conftest.py +++ b/test/format/conftest.py @@ -19,11 +19,13 @@ Version("1.1"), Version("1.4"), ], + aliases=set(), ), service.VulnerabilityResult( id="VULN-1", description="The second vulnerability", fix_versions=[Version("1.0")], + aliases=set(), ), ], _RESOLVED_DEP_BAR: [ @@ -31,6 +33,7 @@ id="VULN-2", description="The third vulnerability", fix_versions=[], + aliases=set(), ) ], } @@ -44,6 +47,7 @@ Version("1.1"), Version("1.4"), ], + aliases=set(), ), ], _SKIPPED_DEP: [], diff --git a/test/service/test_interface.py b/test/service/test_interface.py index 6a8de884..b10725e4 100644 --- a/test/service/test_interface.py +++ b/test/service/test_interface.py @@ -1,7 +1,12 @@ import pytest from packaging.version import Version -from pip_audit._service.interface import Dependency, ResolvedDependency, SkippedDependency +from pip_audit._service.interface import ( + Dependency, + ResolvedDependency, + SkippedDependency, + VulnerabilityResult, +) def test_dependency_typing(): @@ -41,3 +46,19 @@ def test_vulnerability_service_no_results(vuln_service, spec): _, vulns = service.query(spec) assert len(vulns) == 0 + + +def test_vulnerability_result_update_aliases(): + result1 = VulnerabilityResult( + id="FOO", description="stub", fix_versions=[Version("1.0.0")], aliases={"BAR", "BAZ", "ZAP"} + ) + result2 = VulnerabilityResult( + id="BAR", + description="stub", + fix_versions=[Version("1.0.0")], + aliases={"FOO", "BAZ", "QUUX"}, + ) + + merged = result1.merge_aliases(result2) + assert merged.id == "FOO" + assert merged.aliases == {"BAR", "BAZ", "ZAP", "QUUX"} diff --git a/test/service/test_osv.py b/test/service/test_osv.py index e4cc2bc0..8891e44d 100644 --- a/test/service/test_osv.py +++ b/test/service/test_osv.py @@ -8,6 +8,17 @@ import pip_audit._service as service +def get_mock_session(func): + class MockSession: + def __init__(self, create_response): + self.create_response = create_response + + def post(self, url, **kwargs): + return self.create_response() + + return MockSession(func) + + def test_osv(): osv = service.OsvService() dep = service.ResolvedDependency("jinja2", Version("2.4.1")) diff --git a/test/service/test_pypi.py b/test/service/test_pypi.py index 5eca0e9d..b14ad563 100644 --- a/test/service/test_pypi.py +++ b/test/service/test_pypi.py @@ -114,6 +114,7 @@ def json(self): return { "vulnerabilities": [ { + "aliases": ["foo", "bar"], "id": "VULN-0", "details": "The first vulnerability", "fixed_in": ["1.1", "1.4"], @@ -139,6 +140,7 @@ def json(self): id="VULN-0", description="The first vulnerability", fix_versions=[Version("1.1"), Version("1.4")], + aliases={"foo", "bar"}, ) @@ -177,6 +179,7 @@ def json(self): return { "vulnerabilities": [ { + "aliases": ["foo", "bar"], "id": "VULN-0", "details": "The first vulnerability", "fixed_in": ["invalid_version"], diff --git a/test/test_audit.py b/test/test_audit.py index 795670a9..7cb548df 100644 --- a/test/test_audit.py +++ b/test/test_audit.py @@ -1,10 +1,12 @@ +import itertools + import pretend # type: ignore import pytest from packaging.version import Version from pip_audit import _audit as audit from pip_audit._audit import AuditOptions, Auditor -from pip_audit._service.interface import VulnerabilityResult +from pip_audit._service.interface import VulnerabilityResult, VulnerabilityService def test_audit(vuln_service, dep_source): @@ -21,6 +23,7 @@ def test_audit(vuln_service, dep_source): id="fake-id", description="this is not a real result", fix_versions=[Version("1.1.0")], + aliases=set(), ) ], ) @@ -46,3 +49,84 @@ def test_audit_dry_run(monkeypatch, vuln_service, dep_source): # but an appropriate number of logging calls should be made. assert service.query_all.calls == [] assert len(logger.info.calls) == len(list(source.collect())) + + +@pytest.mark.parametrize( + "vulns", + itertools.permutations( + [ + VulnerabilityResult( + id="PYSEC-0", + description="fake", + fix_versions=[Version("1.1.0")], + aliases={"alias-1"}, + ), + VulnerabilityResult( + id="FAKE-1", + description="fake", + fix_versions=[Version("1.1.0")], + aliases={"alias-1", "alias-2"}, + ), + ] + ), +) +def test_audit_dedupes_aliases(dep_source, vulns): + class Service(VulnerabilityService): + def query(self, spec): + return spec, vulns + + service = Service() + source = dep_source() + + auditor = Auditor(service) + results = list(auditor.audit(source)) + + # One dependency, one unique vulnerability result for that dependency. + assert len(results) == 1 + assert len(results[0][1]) == 1 + assert results[0][1][0].id == "PYSEC-0" + + +@pytest.mark.parametrize( + "vulns", + itertools.permutations( + [ + VulnerabilityResult( + id="PYSEC-0", + description="fake", + fix_versions=[Version("1.1.0")], + aliases={"CVE-XXXX-YYYYY"}, + ), + VulnerabilityResult( + id="FAKE-1", + description="fake", + fix_versions=[Version("1.1.0")], + aliases={"CVE-XXXX-YYYYY"}, + ), + VulnerabilityResult( + id="CVE-XXXX-YYYYY", + description="fake", + fix_versions=[Version("1.1.0")], + aliases={"FAKE-1"}, + ), + ] + ), +) +def test_audit_dedupes_aliases_by_id(dep_source, vulns): + class Service(VulnerabilityService): + def query(self, spec): + return spec, vulns + + service = Service() + source = dep_source() + + auditor = Auditor(service) + results = list(auditor.audit(source)) + + # One dependency, one unique vulnerability result for that dependency. + assert len(results) == 1 + assert len(results[0][1]) == 1 + assert results[0][1][0].id == "PYSEC-0" + + # The result contains the merged alias set for all aliases. + assert results[0][1][0].aliases == {"FAKE-1", "CVE-XXXX-YYYYY"} diff --git a/test/test_fix.py b/test/test_fix.py index 71cfe789..5aa16915 100644 --- a/test/test_fix.py +++ b/test/test_fix.py @@ -19,6 +19,7 @@ def test_fix(vuln_service): id="fake-id", description="this is not a real result", fix_versions=[Version("1.0.0")], + aliases=set(), ) ] } @@ -36,6 +37,7 @@ def test_fix_skipped_deps(vuln_service): id="fake-id", description="this is not a real result", fix_versions=[Version("1.0.0")], + aliases=set(), ) ] } @@ -55,7 +57,10 @@ def test_fix_resolution_impossible(vuln_service): result: Dict[Dependency, List[VulnerabilityResult]] = { dep: [ VulnerabilityResult( - id="fake-id", description="this is not a real result", fix_versions=list() + id="fake-id", + description="this is not a real result", + fix_versions=list(), + aliases=set(), ) ] }