Skip to content

Commit

Permalink
pip_audit: dedupe vulnerability reports by alias (#232)
Browse files Browse the repository at this point in the history
* pip_audit: dedupe vulnerability reports by alias

* pip_audit: add aliases to each VulnerabilityResult

This will make normalizing OSV's results a little easier,
and gives us the ability to add more detail to the
output formats in the future.

* pip_audit: OSV: handle aliases

* pip_audit, test: refactor alias handling

* alias set merging

* CHANGELOG: record changes

Co-authored-by: Alex Cameron <[email protected]>
  • Loading branch information
woodruffw and tetsuo-cpp authored Feb 7, 2022
1 parent b2c8227 commit 6a6b1fc
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 8 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ All versions prior to 0.0.9 are untracked.
provided in a requirements file are checked against those reported by PyPI
([#229](https://github.com/trailofbits/pip-audit/pull/229))

* Vulnerability sources: `pip-audit` now uniques each result based on its
alias set, reducing the amount of duplicate information in the default
columnar output format
([#232](https://github.com/trailofbits/pip-audit/pull/232))

### Fixed

* CLI: A regression causing excess output during `pip audit -r`
Expand Down
38 changes: 36 additions & 2 deletions pip_audit/_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import logging
from dataclasses import dataclass
from typing import Iterator, List, Tuple
from typing import Iterator, List, Set, Tuple

from pip_audit._dependency_source import DependencySource
from pip_audit._service import Dependency, VulnerabilityResult, VulnerabilityService
Expand Down Expand Up @@ -49,6 +49,12 @@ def audit(
) -> Iterator[Tuple[Dependency, List[VulnerabilityResult]]]:
"""
Perform the auditing step, collecting dependencies from `source`.
Individual vulnerability results are uniqued based on their `aliases` sets:
any two results for the same dependency that share an alias are collapsed
into a single result with a union of all aliases.
`PYSEC`-identified results are given priority over other results.
"""
specs = source.collect()

Expand All @@ -57,4 +63,32 @@ def audit(
logger.info(f"Dry run: would have audited {len(list(specs))} packages")
return {}
else:
yield from self._service.query_all(specs)
for dep, vulns in self._service.query_all(specs):
unique_vulns: List[VulnerabilityResult] = []
seen_aliases: Set[str] = set()

# First pass, add all PYSEC vulnerabilities and track their
# alias sets.
for v in vulns:
if not v.id.startswith("PYSEC"):
continue

seen_aliases.update(v.aliases | {v.id})
unique_vulns.append(v)

# Second pass: add any non-PYSEC vulnerabilities.
for v in vulns:
# If we've already seen this vulnerability by another name,
# don't add it. Instead, find the previous result and update
# its alias set.
if seen_aliases.intersection(v.aliases | {v.id}):
idx, previous = next(
(i, p) for (i, p) in enumerate(unique_vulns) if p.alias_of(v)
)
unique_vulns[idx] = previous.merge_aliases(v)
continue

seen_aliases.update(v.aliases | {v.id})
unique_vulns.append(v)

yield (dep, unique_vulns)
28 changes: 27 additions & 1 deletion pip_audit/_service/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
of vulnerability information for fully resolved Python packages.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Dict, Iterator, List, Tuple
from typing import Dict, Iterator, List, Set, Tuple

from packaging.utils import canonicalize_name
from packaging.version import Version
Expand Down Expand Up @@ -88,6 +90,30 @@ class VulnerabilityResult:
A list of versions that can be upgraded to that resolve the vulnerability.
"""

aliases: Set[str]
"""
A set of aliases (alternative identifiers) for this result.
"""

def alias_of(self, other: VulnerabilityResult) -> bool:
"""
Returns whether this result is an "alias" of another result.
Two results are said to be aliases if their respective sets of
`{id, *aliases}` intersect at all. A result is therefore its own alias.
"""
return bool((self.aliases | {self.id}).intersection(other.aliases | {other.id}))

def merge_aliases(self, other: VulnerabilityResult) -> VulnerabilityResult:
"""
Merge `other`'s aliases into this result, returning a new result.
"""

# Our own ID should never occur in the alias set.
return VulnerabilityResult(
self.id, self.description, self.fix_versions, self.aliases | other.aliases - {self.id}
)


class VulnerabilityService(ABC):
"""
Expand Down
3 changes: 2 additions & 1 deletion pip_audit/_service/osv.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def query(self, spec: Dependency) -> Tuple[Dependency, List[VulnerabilityResult]
for vuln in response_json["vulns"]:
id = vuln["id"]
description = vuln["details"]
aliases = set(vuln["aliases"])
fix_versions: List[Version] = []
for affected in vuln["affected"]:
pkg = affected["package"]
Expand All @@ -100,6 +101,6 @@ def query(self, spec: Dependency) -> Tuple[Dependency, List[VulnerabilityResult]
# The ranges aren't guaranteed to come in chronological order
fix_versions.sort()

results.append(VulnerabilityResult(id, description, fix_versions))
results.append(VulnerabilityResult(id, description, fix_versions, aliases))

return spec, results
4 changes: 3 additions & 1 deletion pip_audit/_service/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def query(self, spec: Dependency) -> Tuple[Dependency, List[VulnerabilityResult]
# The ranges aren't guaranteed to come in chronological order
fix_versions.sort()

results.append(VulnerabilityResult(v["id"], v["details"], fix_versions))
results.append(
VulnerabilityResult(v["id"], v["details"], fix_versions, set(v["aliases"]))
)

return spec, results
1 change: 1 addition & 0 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def query(self, spec):
id="fake-id",
description="this is not a real result",
fix_versions=[fixed],
aliases=set(),
)
]

Expand Down
4 changes: 4 additions & 0 deletions test/format/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,21 @@
Version("1.1"),
Version("1.4"),
],
aliases=set(),
),
service.VulnerabilityResult(
id="VULN-1",
description="The second vulnerability",
fix_versions=[Version("1.0")],
aliases=set(),
),
],
_RESOLVED_DEP_BAR: [
service.VulnerabilityResult(
id="VULN-2",
description="The third vulnerability",
fix_versions=[],
aliases=set(),
)
],
}
Expand All @@ -44,6 +47,7 @@
Version("1.1"),
Version("1.4"),
],
aliases=set(),
),
],
_SKIPPED_DEP: [],
Expand Down
23 changes: 22 additions & 1 deletion test/service/test_interface.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pytest
from packaging.version import Version

from pip_audit._service.interface import Dependency, ResolvedDependency, SkippedDependency
from pip_audit._service.interface import (
Dependency,
ResolvedDependency,
SkippedDependency,
VulnerabilityResult,
)


def test_dependency_typing():
Expand Down Expand Up @@ -41,3 +46,19 @@ def test_vulnerability_service_no_results(vuln_service, spec):

_, vulns = service.query(spec)
assert len(vulns) == 0


def test_vulnerability_result_update_aliases():
result1 = VulnerabilityResult(
id="FOO", description="stub", fix_versions=[Version("1.0.0")], aliases={"BAR", "BAZ", "ZAP"}
)
result2 = VulnerabilityResult(
id="BAR",
description="stub",
fix_versions=[Version("1.0.0")],
aliases={"FOO", "BAZ", "QUUX"},
)

merged = result1.merge_aliases(result2)
assert merged.id == "FOO"
assert merged.aliases == {"BAR", "BAZ", "ZAP", "QUUX"}
11 changes: 11 additions & 0 deletions test/service/test_osv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@
import pip_audit._service as service


def get_mock_session(func):
class MockSession:
def __init__(self, create_response):
self.create_response = create_response

def post(self, url, **kwargs):
return self.create_response()

return MockSession(func)


def test_osv():
osv = service.OsvService()
dep = service.ResolvedDependency("jinja2", Version("2.4.1"))
Expand Down
3 changes: 3 additions & 0 deletions test/service/test_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def json(self):
return {
"vulnerabilities": [
{
"aliases": ["foo", "bar"],
"id": "VULN-0",
"details": "The first vulnerability",
"fixed_in": ["1.1", "1.4"],
Expand All @@ -139,6 +140,7 @@ def json(self):
id="VULN-0",
description="The first vulnerability",
fix_versions=[Version("1.1"), Version("1.4")],
aliases={"foo", "bar"},
)


Expand Down Expand Up @@ -177,6 +179,7 @@ def json(self):
return {
"vulnerabilities": [
{
"aliases": ["foo", "bar"],
"id": "VULN-0",
"details": "The first vulnerability",
"fixed_in": ["invalid_version"],
Expand Down
86 changes: 85 additions & 1 deletion test/test_audit.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import itertools

import pretend # type: ignore
import pytest
from packaging.version import Version

from pip_audit import _audit as audit
from pip_audit._audit import AuditOptions, Auditor
from pip_audit._service.interface import VulnerabilityResult
from pip_audit._service.interface import VulnerabilityResult, VulnerabilityService


def test_audit(vuln_service, dep_source):
Expand All @@ -21,6 +23,7 @@ def test_audit(vuln_service, dep_source):
id="fake-id",
description="this is not a real result",
fix_versions=[Version("1.1.0")],
aliases=set(),
)
],
)
Expand All @@ -46,3 +49,84 @@ def test_audit_dry_run(monkeypatch, vuln_service, dep_source):
# but an appropriate number of logging calls should be made.
assert service.query_all.calls == []
assert len(logger.info.calls) == len(list(source.collect()))


@pytest.mark.parametrize(
"vulns",
itertools.permutations(
[
VulnerabilityResult(
id="PYSEC-0",
description="fake",
fix_versions=[Version("1.1.0")],
aliases={"alias-1"},
),
VulnerabilityResult(
id="FAKE-1",
description="fake",
fix_versions=[Version("1.1.0")],
aliases={"alias-1", "alias-2"},
),
]
),
)
def test_audit_dedupes_aliases(dep_source, vulns):
class Service(VulnerabilityService):
def query(self, spec):
return spec, vulns

service = Service()
source = dep_source()

auditor = Auditor(service)
results = list(auditor.audit(source))

# One dependency, one unique vulnerability result for that dependency.
assert len(results) == 1
assert len(results[0][1]) == 1
assert results[0][1][0].id == "PYSEC-0"


@pytest.mark.parametrize(
"vulns",
itertools.permutations(
[
VulnerabilityResult(
id="PYSEC-0",
description="fake",
fix_versions=[Version("1.1.0")],
aliases={"CVE-XXXX-YYYYY"},
),
VulnerabilityResult(
id="FAKE-1",
description="fake",
fix_versions=[Version("1.1.0")],
aliases={"CVE-XXXX-YYYYY"},
),
VulnerabilityResult(
id="CVE-XXXX-YYYYY",
description="fake",
fix_versions=[Version("1.1.0")],
aliases={"FAKE-1"},
),
]
),
)
def test_audit_dedupes_aliases_by_id(dep_source, vulns):
class Service(VulnerabilityService):
def query(self, spec):
return spec, vulns

service = Service()
source = dep_source()

auditor = Auditor(service)
results = list(auditor.audit(source))

# One dependency, one unique vulnerability result for that dependency.
assert len(results) == 1
assert len(results[0][1]) == 1
assert results[0][1][0].id == "PYSEC-0"

# The result contains the merged alias set for all aliases.
assert results[0][1][0].aliases == {"FAKE-1", "CVE-XXXX-YYYYY"}
7 changes: 6 additions & 1 deletion test/test_fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def test_fix(vuln_service):
id="fake-id",
description="this is not a real result",
fix_versions=[Version("1.0.0")],
aliases=set(),
)
]
}
Expand All @@ -36,6 +37,7 @@ def test_fix_skipped_deps(vuln_service):
id="fake-id",
description="this is not a real result",
fix_versions=[Version("1.0.0")],
aliases=set(),
)
]
}
Expand All @@ -55,7 +57,10 @@ def test_fix_resolution_impossible(vuln_service):
result: Dict[Dependency, List[VulnerabilityResult]] = {
dep: [
VulnerabilityResult(
id="fake-id", description="this is not a real result", fix_versions=list()
id="fake-id",
description="this is not a real result",
fix_versions=list(),
aliases=set(),
)
]
}
Expand Down

0 comments on commit 6a6b1fc

Please sign in to comment.