Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

_cli, resolvelib: Support for custom indices #238

Merged
merged 11 commits into from
Feb 11, 2022
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,14 @@ All versions prior to 0.0.9 are untracked.
conjunction with `-r` to check that all requirements in the file have an
associated hash ([#229](https://github.com/trailofbits/pip-audit/pull/229))

* CLI: The `--index-url` flag has been added, allowing users to use custom
package indices when running with the `-r` flag
([#238](https://github.com/trailofbits/pip-audit/pull/238))

* CLI: The `--extra-index-url` flag has been added, allowing users to use
multiple package indices when running with the `-r` flag
([#238](https://github.com/trailofbits/pip-audit/pull/238))

### Changed

* `pip-audit`'s minimum Python version is now 3.7.
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ usage: pip-audit [-h] [-V] [-l] [-r REQUIREMENTS] [-f FORMAT] [-s SERVICE]
[-d] [-S] [--desc [{on,off,auto}]] [--cache-dir CACHE_DIR]
[--progress-spinner {on,off}] [--timeout TIMEOUT]
[--path PATHS] [-v] [--fix] [--require-hashes]
[--index-url INDEX_URL] [--extra-index-url EXTRA_INDEX_URLS]

audit the Python environment for dependencies with known vulnerabilities

Expand Down Expand Up @@ -119,6 +120,15 @@ optional arguments:
repeatable audits; this option is implied when any
package in a requirements file has a `--hash` option.
(default: False)
--index-url INDEX_URL
base URL of the Python Package Index; this should
point to a repository compliant with PEP 503 (the
simple repository API) (default:
https://pypi.org/simple)
--extra-index-url EXTRA_INDEX_URLS
extra URLs of package indexes to use in addition to
`--index-url`; should follow the same rules as
`--index-url` (default: [])
```
<!-- @end-pip-audit-help@ -->

Expand Down
31 changes: 27 additions & 4 deletions pip_audit/_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pip_audit import __version__
from pip_audit._audit import AuditOptions, Auditor
from pip_audit._dependency_source import (
PYPI_URL,
DependencySource,
PipSource,
RequirementSource,
Expand Down Expand Up @@ -245,6 +246,22 @@ def _parser() -> argparse.ArgumentParser:
help="require a hash to check each requirement against, for repeatable audits; this option "
"is implied when any package in a requirements file has a `--hash` option.",
)
parser.add_argument(
"--index-url",
type=str,
help="base URL of the Python Package Index; this should point to a repository compliant "
"with PEP 503 (the simple repository API)",
default=PYPI_URL,
)
parser.add_argument(
"--extra-index-url",
type=str,
action="append",
dest="extra_index_urls",
default=[],
help="extra URLs of package indexes to use in addition to `--index-url`; should follow the "
"same rules as `--index-url`",
)
return parser


Expand All @@ -268,9 +285,14 @@ def audit() -> None:
output_desc = args.desc.to_bool(args.format)
formatter = args.format.to_format(output_desc)

# The `--require-hashes` flag is only valid with requirements files
if args.require_hashes and args.requirements is None:
parser.error("The --require-hashes flag can only be used with --requirement (-r)")
# Check for flags that are only valid with requirements files
if args.requirements is None:
if args.require_hashes:
parser.error("The --require-hashes flag can only be used with --requirement (-r)")
elif args.index_url != PYPI_URL:
parser.error("The --index-url flag can only be used with --requirement (-r)")
elif args.extra_index_urls:
parser.error("The --extra-index-url flag can only be used with --requirement (-r)")

with ExitStack() as stack:
actors = []
Expand All @@ -280,10 +302,11 @@ def audit() -> None:

source: DependencySource
if args.requirements is not None:
index_urls = [args.index_url] + args.extra_index_urls
req_files: List[Path] = [Path(req.name) for req in args.requirements]
source = RequirementSource(
req_files,
ResolveLibResolver(args.timeout, args.cache_dir, state),
ResolveLibResolver(index_urls, args.timeout, args.cache_dir, state),
args.require_hashes,
state,
)
Expand Down
3 changes: 2 additions & 1 deletion pip_audit/_dependency_source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
)
from .pip import PipSource, PipSourceError
from .requirement import RequirementSource
from .resolvelib import ResolveLibResolver
from .resolvelib import PYPI_URL, ResolveLibResolver

__all__ = [
"PYPI_URL",
"DependencyFixError",
"DependencyResolver",
"DependencyResolverError",
Expand Down
3 changes: 2 additions & 1 deletion pip_audit/_dependency_source/resolvelib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
`resolvelib` interactions for `pip-audit`.
"""

from .resolvelib import ResolveLibResolver, ResolveLibResolverError
from .resolvelib import PYPI_URL, ResolveLibResolver, ResolveLibResolverError

__all__ = [
"PYPI_URL",
"ResolveLibResolver",
"ResolveLibResolverError",
]
37 changes: 30 additions & 7 deletions pip_audit/_dependency_source/resolvelib/pypi_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,33 @@ def _get_metadata_for_sdist(self):
return metadata


def get_project_from_pypi(
session, project, extras, timeout: Optional[int], state: AuditState
def get_project_from_indexes(
index_urls: List[str], session, project, extras, timeout: Optional[int], state: AuditState
) -> Iterator[Candidate]:
"""Return candidates created from the project name and extras."""
url = "https://pypi.org/simple/{}".format(project)
"""Return candidates from all indexes created from the project name and extras."""
project_found = False
for index_url in index_urls:
# Not all indexes are guaranteed to have the project so this isn't an error
# We should only return an error if it can't be found on ANY of the supplied index URLs
try:
yield from get_project_from_index(index_url, session, project, extras, timeout, state)
project_found = True
except PyPINotFoundError:
pass
if not project_found:
raise PyPINotFoundError(
f'Could not find project "{project}" on any of the supplied index URLs: {index_urls}'
)


def get_project_from_index(
index_url: str, session, project, extras, timeout: Optional[int], state: AuditState
) -> Iterator[Candidate]:
"""Return candidates from an index created from the project name and extras."""
url = index_url + "/" + project
response: requests.Response = session.get(url, timeout=timeout)
if response.status_code == 404:
raise PyPINotFoundError(f'Could not find project "{project}" on PyPI')
raise PyPINotFoundError
response.raise_for_status()
data = response.content
doc = html5lib.parse(data, namespaceHTMLElements=False)
Expand Down Expand Up @@ -231,20 +250,24 @@ class PyPIProvider(AbstractProvider):

def __init__(
self,
index_urls: List[str],
timeout: Optional[int] = None,
cache_dir: Optional[Path] = None,
state: AuditState = AuditState(),
):
"""
Create a new `PyPIProvider`.

`index_urls` is a list of package index URLs.

`timeout` is an optional argument to control how many seconds the component should wait for
responses to network requests.

`cache_dir` is an optional argument to override the default HTTP caching directory.

`state` is an `AuditState` to use for state callbacks.
"""
self.index_urls = index_urls
self.timeout = timeout
self.session = caching_session(cache_dir, use_pip=True)
self._state = state
Expand Down Expand Up @@ -282,8 +305,8 @@ def find_matches(self, identifier, requirements, incompatibilities):
candidates = sorted(
[
candidate
for candidate in get_project_from_pypi(
self.session, identifier, extras, self.timeout, self._state
for candidate in get_project_from_indexes(
self.index_urls, self.session, identifier, extras, self.timeout, self._state
)
if candidate.version not in bad_versions
and all(candidate.version in r.specifier for r in requirements)
Expand Down
5 changes: 4 additions & 1 deletion pip_audit/_dependency_source/resolvelib/resolvelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

logger = logging.getLogger(__name__)

PYPI_URL = "https://pypi.org/simple"


class ResolveLibResolver(DependencyResolver):
"""
Expand All @@ -28,6 +30,7 @@ class ResolveLibResolver(DependencyResolver):

def __init__(
self,
index_urls: List[str] = [PYPI_URL],
timeout: Optional[int] = None,
cache_dir: Optional[Path] = None,
state: AuditState = AuditState(),
Expand All @@ -40,7 +43,7 @@ def __init__(

`state` is an `AuditState` to use for state callbacks.
"""
self.provider = PyPIProvider(timeout, cache_dir, state)
self.provider = PyPIProvider(index_urls, timeout, cache_dir, state)
self.reporter = BaseReporter()
self.resolver: Resolver = Resolver(self.provider, self.reporter)

Expand Down
100 changes: 99 additions & 1 deletion test/dependency_source/test_resolvelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,105 @@ def __init__(self):
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert len(resolved_deps) == 1
expected_deps = [
SkippedDependency(name="flask", skip_reason='Could not find project "flask" on PyPI')
SkippedDependency(
name="flask",
skip_reason='Could not find project "flask" on any of the supplied index URLs: '
"['https://pypi.org/simple']",
)
]
assert req in resolved_deps
assert resolved_deps[req] == expected_deps


def test_resolvelib_multiple_indexes(monkeypatch):
url1 = "https://index1"
url2 = "https://index2"
package_url1 = f"{url1}/flask"
package_url2 = f"{url2}/flask"
data1 = (
'<a href="https://files.pythonhosted.org/packages/d4/6a/'
"93500f2a7089b4e993fb095215979890b6204a5ba3f6b0f63dc6c3c6c827/Flask-0.5.tar.gz#"
'sha256=20e176b1db0e2bfe92d869f7b5d0ee3e5d6cb60e793755aaf2284bd78a6202ea">Flask-0.5.tar.gz'
"</a><br/>"
)
data2 = (
'<a href="https://files.pythonhosted.org/packages/44/86/'
"481371798994529e105633a50b2332638105a1e191053bc0f4bbc9b91791/Flask-0.6.tar.gz#"
'sha256=9dc18a7c673bf0a6fada51e011fc411285a8301f6dfc1c000ebfa272b5e609e4">Flask-0.6.tar.gz'
"</a><br/>"
)

monkeypatch.setattr(
pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock()
)

def get_multiple_index_package_mock(url):
if url == package_url1:
return get_package_mock(data1)
else:
assert url == package_url2
return get_package_mock(data2)

resolver = resolvelib.ResolveLibResolver([url1, url2])
monkeypatch.setattr(
resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url)
)

# We want to check that dependency resolution is considering packages found on both indexes
#
# Test with a requirement that will resolve to a package on the first index
req = Requirement("flask<=0.5")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]

# Now test with a requirement that will resolve to a package on the second index
req = Requirement("flask<=0.6")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.6"))]


def test_resolvelib_package_missing_on_one_index(monkeypatch):
url1 = "https://index1"
url2 = "https://index2"
package_url1 = f"{url1}/flask"
package_url2 = f"{url2}/flask"
data1 = (
'<a href="https://files.pythonhosted.org/packages/d4/6a/'
"93500f2a7089b4e993fb095215979890b6204a5ba3f6b0f63dc6c3c6c827/Flask-0.5.tar.gz#"
'sha256=20e176b1db0e2bfe92d869f7b5d0ee3e5d6cb60e793755aaf2284bd78a6202ea">Flask-0.5.tar.gz'
"</a><br/>"
)

monkeypatch.setattr(
pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock()
)

# Simulate the package not existing on the second index
def get_multiple_index_package_mock(url):
if url == package_url1:
return get_package_mock(data1)
else:
assert url == package_url2
pkg = get_package_mock(str())
pkg.status_code = 404
return pkg

resolver = resolvelib.ResolveLibResolver([url1, url2])
monkeypatch.setattr(
resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url)
)

# If a package doesn't exist on one index, we shouldn't expect an error. We should just skip it
# and only use the other index for finding candidates.
req = Requirement("flask<=0.5")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]

# Now test with a requirement that will resolve to a package on the second index
req = Requirement("flask<=0.6")
resolved_deps = dict(resolver.resolve_all(iter([req])))
assert req in resolved_deps
assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]