Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

requirement, test: Correct --fix for subdependencies in requirements files #297

Merged
merged 17 commits into from
Jun 15, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 80 additions & 37 deletions pip_audit/_dependency_source/requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from contextlib import ExitStack
from pathlib import Path
from tempfile import NamedTemporaryFile
from typing import IO, Iterator, List, Set, Union, cast
from typing import IO, Dict, Iterator, List, Set, Union, cast

from packaging.requirements import Requirement
from packaging.specifiers import SpecifierSet
Expand Down Expand Up @@ -71,6 +71,7 @@ def __init__(
self._require_hashes = require_hashes
self._no_deps = no_deps
self.state = state
self._dep_cache: Dict[Path, Set[Dependency]] = {}

def collect(self) -> Iterator[Dependency]:
"""
Expand All @@ -83,44 +84,17 @@ def collect(self) -> Iterator[Dependency]:
try:
reqs = parse_requirements(filename=filename)
except PipError as pe:
raise RequirementSourceError("requirement parsing raised an error") from pe

# There are three cases where we skip dependency resolution:
#
# 1. The user has explicitly specified `--require-hashes`.
# 2. One or more parsed requirements has hashes specified, enabling
# hash checking for all requirements.
# 3. The user has explicitly specified `--no-deps`.
require_hashes = self._require_hashes or any(
isinstance(req, ParsedRequirement) and req.hashes for req in reqs.values()
)
skip_deps = require_hashes or self._no_deps
if skip_deps:
yield from self._collect_preresolved_deps(
iter(reqs.values()), require_hashes=require_hashes
)
continue

# Invoke the dependency resolver to turn requirements into dependencies
req_values: List[Requirement] = [Requirement(str(req)) for req in reqs.values()]
raise RequirementSourceError(
f"requirement parsing raised an error: {filename}"
) from pe
try:
for _, deps in self._resolver.resolve_all(iter(req_values)):
for dep in deps:
# Don't allow duplicate dependencies to be returned
if dep in collected:
continue

if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

collected.add(dep)
yield dep
for dep in self._collect_cached_deps(filename, list(reqs.values())):
if dep in collected:
continue
collected.add(dep)
yield dep
except DependencyResolverError as dre:
raise RequirementSourceError("dependency resolver raised an error") from dre
raise RequirementSourceError from dre

def fix(self, fix_version: ResolvedFixVersion) -> None:
"""
Expand Down Expand Up @@ -164,16 +138,38 @@ def _fix_file(self, filename: Path, fix_version: ResolvedFixVersion) -> None:

# Now write out the new requirements file
with filename.open("w") as f:
fixed = False
for req in req_list:
if (
req.name == fix_version.dep.name
and req.specifier.contains(fix_version.dep.version)
and not req.specifier.contains(fix_version.version)
):
req.specifier = SpecifierSet(f"=={fix_version.version}")
fixed = True
assert req.marker is None or req.marker.evaluate()
f.write(str(req) + os.linesep)

# The vulnerable dependency may not be explicitly listed in the requirements file if it
# is a subdependency of a requirement. In this case, we should explicitly add the fixed
# dependency into the requirements file.
#
# To know whether this is the case, we'll need to resolve dependencies if we haven't
# already in order to figure out whether this subdependency belongs to this file or
# another.
try:
if not fixed and fix_version.dep in self._collect_cached_deps(
filename, list(reqs.values())
):
logger.warning(
f"added fixed subdependency explicitly to requirements file {filename}: "
f"{fix_version.dep.canonical_name}"
)
f.write("# added by pip-audit to fix subdependency" + os.linesep)
tetsuo-cpp marked this conversation as resolved.
Show resolved Hide resolved
f.write(f"{fix_version.dep.canonical_name}=={fix_version.version}" + os.linesep)
tetsuo-cpp marked this conversation as resolved.
Show resolved Hide resolved
except DependencyResolverError as dre:
raise RequirementFixError from dre

def _recover_files(self, tmp_files: List[IO[str]]) -> None:
for (filename, tmp_file) in zip(self._filenames, tmp_files):
try:
Expand Down Expand Up @@ -214,6 +210,53 @@ def _collect_preresolved_deps(
req.name, Version(pinned_specifier.group("version")), req.hashes
)

def _collect_cached_deps(
self, filename: Path, reqs: List[Union[ParsedRequirement, UnparsedRequirement]]
) -> Iterator[Dependency]:
"""
Collect resolved dependencies for a given requirements file, retrieving them from the
dependency cache if possible.
"""
# See if we've already have cached dependencies for this file
cached_deps_for_file = self._dep_cache.get(filename, None)
if cached_deps_for_file is not None:
yield from cached_deps_for_file

new_cached_deps_for_file = set()

# There are three cases where we skip dependency resolution:
#
# 1. The user has explicitly specified `--require-hashes`.
# 2. One or more parsed requirements has hashes specified, enabling
# hash checking for all requirements.
# 3. The user has explicitly specified `--no-deps`.
require_hashes = self._require_hashes or any(
isinstance(req, ParsedRequirement) and req.hashes for req in reqs
)
skip_deps = require_hashes or self._no_deps
if skip_deps:
for dep in self._collect_preresolved_deps(iter(reqs), require_hashes=require_hashes):
new_cached_deps_for_file.add(dep)
yield dep
else:
# Invoke the dependency resolver to turn requirements into dependencies
req_values: List[Requirement] = [Requirement(str(req)) for req in reqs]
for _, deps in self._resolver.resolve_all(iter(req_values)):
for dep in deps:
new_cached_deps_for_file.add(dep)

if dep.is_skipped(): # pragma: no cover
dep = cast(SkippedDependency, dep)
self.state.update_state(f"Skipping {dep.name}: {dep.skip_reason}")
else:
dep = cast(ResolvedDependency, dep)
self.state.update_state(f"Collecting {dep.name} ({dep.version})")

yield dep

# Cache the collected dependencies
self._dep_cache[filename] = new_cached_deps_for_file


class RequirementSourceError(DependencySourceError):
"""A requirements-parsing specific `DependencySourceError`."""
Expand Down
139 changes: 138 additions & 1 deletion test/dependency_source/test_requirement.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from pathlib import Path
from typing import List
from typing import List, Optional

import pretend # type: ignore
import pytest
Expand Down Expand Up @@ -384,3 +384,140 @@ def test_requirement_source_no_deps_unpinned(monkeypatch):
# When dependency resolution is disabled, all requirements must be pinned.
with pytest.raises(DependencySourceError):
list(source.collect())


def test_requirement_source_dep_caching(monkeypatch):
source = requirement.RequirementSource(
[Path("requirements.txt")], ResolveLibResolver(), no_deps=True
)

monkeypatch.setattr(
_parse_requirements,
"_read_file",
lambda _: ["flask==2.0.1"],
)

specs = list(source.collect())

class MockResolver(DependencyResolver):
def resolve(self, req: Requirement) -> List[Dependency]:
raise DependencyResolverError

# Now run collect again and check that dependency resolution doesn't get repeated
source._resolver = MockResolver()

cached_specs = list(source.collect())
assert specs == cached_specs


def test_requirement_source_fix_explicit_subdep(monkeypatch, req_file):
logger = pretend.stub(warning=pretend.call_recorder(lambda s: None))
monkeypatch.setattr(requirement, "logger", logger)

# We're going to simulate the situation where a subdependency of `flask` has a vulnerability.
# In this case, we're choosing `jinja2`.
flask_deps = ResolveLibResolver().resolve(Requirement("flask==2.0.1"))

# Firstly, get a handle on the `jinja2` dependency. The version cannot be hardcoded since it
# depends what versions are available on PyPI when dependency resolution runs.
jinja_dep: Optional[ResolvedDependency] = None
for dep in flask_deps:
if isinstance(dep, ResolvedDependency) and dep.canonical_name == "jinja2":
jinja_dep = dep
break
assert jinja_dep is not None

# Check that the `jinja2` dependency is explicitly added to the requirements file with an
# associated comment.
_check_fixes(
["flask==2.0.1"],
["flask==2.0.1\n# added by pip-audit to fix subdependency\njinja2==4.0.0"],
[req_file()],
[
ResolvedFixVersion(
dep=jinja_dep,
version=Version("4.0.0"),
)
],
)

# When explicitly listing a fixed subdependency, we issue a warning.
assert len(logger.warning.calls) == 1


def test_requirement_source_fix_explicit_subdep_resolver_error(req_file):
# Pass the requirement source a resolver that automatically raises errors
class MockResolver(DependencyResolver):
def resolve(self, req: Requirement) -> List[Dependency]:
raise DependencyResolverError

req_file_name = req_file()
with open(req_file_name, "w") as f:
f.write("flask==2.0.1")

# Recreate the vulnerable subdependency case.
flask_deps = ResolveLibResolver().resolve(Requirement("flask==2.0.1"))
jinja_dep: Optional[ResolvedDependency] = None
for dep in flask_deps:
if isinstance(dep, ResolvedDependency) and dep.canonical_name == "jinja2":
jinja_dep = dep
break
assert jinja_dep is not None

# When we try to fix a vulnerable subdependency, we need to resolve dependencies if that
# information isn't already cached.
#
# Test the case where we hit a resolver error.
source = requirement.RequirementSource([req_file_name], MockResolver())
with pytest.raises(DependencyFixError):
source.fix(
ResolvedFixVersion(
dep=jinja_dep,
version=Version("4.0.0"),
)
)


def test_requirement_source_fix_explicit_subdep_comment_removal(req_file):
# This test is documenting a weakness in the current fix implementation.
#
# When fixing a subdependency and explicitly adding it to the requirements file, we add a
# comment above the line to explain its presence since it's unusual to explicitly pin a
# subdependency like this.
#
# When we "fix" dependencies, we use `pip-api` to parse the requirements file and write it back
# out with the relevant line amended or added. One downside of this method is that `pip-api`
# filters out comments so applying fixes removes all comments in the file.
# See: https://github.com/di/pip-api/issues/120
#
# Therefore, when we apply a subdependency fix, the automated comment will be removed
# by any subsequent fixes.

# Recreate the vulnerable subdependency case.
flask_deps = ResolveLibResolver().resolve(Requirement("flask==2.0.1"))
jinja_dep: Optional[ResolvedDependency] = None
for dep in flask_deps:
if isinstance(dep, ResolvedDependency) and dep.canonical_name == "jinja2":
jinja_dep = dep
break
assert jinja_dep is not None

# Now place a fix for the top-level `flask` requirement after the `jinja2` subdependency fix.
#
# When applying the `flask` fix, `pip-audit` reparses the requirements file, stripping out the
# comment and writes it back out with the fixed `flask` version.
_check_fixes(
["flask==2.0.1"],
["flask==3.0.0\njinja2==4.0.0"],
[req_file()],
[
ResolvedFixVersion(
dep=jinja_dep,
version=Version("4.0.0"),
),
ResolvedFixVersion(
dep=ResolvedDependency(name="flask", version=Version("2.0.1")),
version=Version("3.0.0"),
),
],
)