From f0c0bacacef9200b00c5f9ede0ede04e9854f81b Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 9 Feb 2022 15:45:29 +1100 Subject: [PATCH 01/11] _cli, resolvelib: Initial support for multiple indices --- pip_audit/_cli.py | 19 ++++++++++- .../resolvelib/pypi_provider.py | 32 +++++++++++++++---- .../resolvelib/resolvelib.py | 4 ++- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/pip_audit/_cli.py b/pip_audit/_cli.py index 4891ae4a..cf3a5f6f 100644 --- a/pip_audit/_cli.py +++ b/pip_audit/_cli.py @@ -245,6 +245,22 @@ def _parser() -> argparse.ArgumentParser: help="require a hash to check each requirement against, for repeatable audits; this option " "is implied when any package in a requirements file has a `--hash` option.", ) + parser.add_argument( + "--index-url", + type=str, + help="base URL of the Python Package Index; this should point to a repository compliant " + "with PEP 503 (the simple repository API)", + default="https://pypi.org/simple", + ) + parser.add_argument( + "--extra-index-url", + type=str, + action="append", + dest="extra_index_urls", + default=[], + help="extra URLs of package indexes to use in addition to `--index-url`; should follow the " + "same rules as `--index-url`", + ) return parser @@ -280,10 +296,11 @@ def audit() -> None: source: DependencySource if args.requirements is not None: + index_urls = [args.index_url] + args.extra_index_urls req_files: List[Path] = [Path(req.name) for req in args.requirements] source = RequirementSource( req_files, - ResolveLibResolver(args.timeout, args.cache_dir, state), + ResolveLibResolver(index_urls, args.timeout, args.cache_dir, state), args.require_hashes, state, ) diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index 14963751..cc7252f7 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -172,14 +172,32 @@ def _get_metadata_for_sdist(self): return metadata -def get_project_from_pypi( - session, project, extras, timeout: Optional[int], state: AuditState +def get_project_from_indexes( + index_urls: List[str], session, project, extras, timeout: Optional[int], state: AuditState +) -> Iterator[Candidate]: + project_found = False + for index_url in index_urls: + # Not all indexes are guaranteed to have the project so this isn't an error + # We should only return an error if it can't be found on ANY of the supplied index URLs + try: + yield from get_project_from_index(index_url, session, project, extras, timeout, state) + project_found = True + except PyPINotFoundError: + pass + if not project_found: + raise PyPINotFoundError( + f"Could not find project {project} on any of the supplied index URLs: {index_urls}" + ) + + +def get_project_from_index( + index_url: str, session, project, extras, timeout: Optional[int], state: AuditState ) -> Iterator[Candidate]: """Return candidates created from the project name and extras.""" - url = "https://pypi.org/simple/{}".format(project) + url = index_url + "/" + project response: requests.Response = session.get(url, timeout=timeout) if response.status_code == 404: - raise PyPINotFoundError(f'Could not find project "{project}" on PyPI') + raise PyPINotFoundError response.raise_for_status() data = response.content doc = html5lib.parse(data, namespaceHTMLElements=False) @@ -231,6 +249,7 @@ class PyPIProvider(AbstractProvider): def __init__( self, + index_urls: List[str], timeout: Optional[int] = None, cache_dir: Optional[Path] = None, state: AuditState = AuditState(), @@ -245,6 +264,7 @@ def __init__( `state` is an `AuditState` to use for state callbacks. """ + self.index_urls = index_urls self.timeout = timeout self.session = caching_session(cache_dir, use_pip=True) self._state = state @@ -282,8 +302,8 @@ def find_matches(self, identifier, requirements, incompatibilities): candidates = sorted( [ candidate - for candidate in get_project_from_pypi( - self.session, identifier, extras, self.timeout, self._state + for candidate in get_project_from_indexes( + self.index_urls, self.session, identifier, extras, self.timeout, self._state ) if candidate.version not in bad_versions and all(candidate.version in r.specifier for r in requirements) diff --git a/pip_audit/_dependency_source/resolvelib/resolvelib.py b/pip_audit/_dependency_source/resolvelib/resolvelib.py index 9279e02d..9a532f83 100644 --- a/pip_audit/_dependency_source/resolvelib/resolvelib.py +++ b/pip_audit/_dependency_source/resolvelib/resolvelib.py @@ -28,6 +28,8 @@ class ResolveLibResolver(DependencyResolver): def __init__( self, + # TODO(alex): Make this non-optional + index_urls: List[str] = [], timeout: Optional[int] = None, cache_dir: Optional[Path] = None, state: AuditState = AuditState(), @@ -40,7 +42,7 @@ def __init__( `state` is an `AuditState` to use for state callbacks. """ - self.provider = PyPIProvider(timeout, cache_dir, state) + self.provider = PyPIProvider(index_urls, timeout, cache_dir, state) self.reporter = BaseReporter() self.resolver: Resolver = Resolver(self.provider, self.reporter) From 4124c5d787c3a3f7506a8654608cc39d8061a848 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 9 Feb 2022 16:30:02 +1100 Subject: [PATCH 02/11] pypi_provider: Fix docstrings --- pip_audit/_dependency_source/resolvelib/pypi_provider.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index cc7252f7..5975f82f 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -175,6 +175,7 @@ def _get_metadata_for_sdist(self): def get_project_from_indexes( index_urls: List[str], session, project, extras, timeout: Optional[int], state: AuditState ) -> Iterator[Candidate]: + """Return candidates from all indexes created from the project name and extras.""" project_found = False for index_url in index_urls: # Not all indexes are guaranteed to have the project so this isn't an error @@ -193,7 +194,7 @@ def get_project_from_indexes( def get_project_from_index( index_url: str, session, project, extras, timeout: Optional[int], state: AuditState ) -> Iterator[Candidate]: - """Return candidates created from the project name and extras.""" + """Return candidates from an index created from the project name and extras.""" url = index_url + "/" + project response: requests.Response = session.get(url, timeout=timeout) if response.status_code == 404: From 968d842701df5125c444e4759dd80db99e8508c6 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 9 Feb 2022 20:52:43 +1100 Subject: [PATCH 03/11] _cli, resolvelib: Create constant for PyPI url --- pip_audit/_cli.py | 3 ++- pip_audit/_dependency_source/__init__.py | 3 ++- pip_audit/_dependency_source/resolvelib/__init__.py | 3 ++- pip_audit/_dependency_source/resolvelib/pypi_provider.py | 2 +- pip_audit/_dependency_source/resolvelib/resolvelib.py | 5 +++-- test/dependency_source/test_resolvelib.py | 6 +++++- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pip_audit/_cli.py b/pip_audit/_cli.py index cf3a5f6f..2e918049 100644 --- a/pip_audit/_cli.py +++ b/pip_audit/_cli.py @@ -14,6 +14,7 @@ from pip_audit import __version__ from pip_audit._audit import AuditOptions, Auditor from pip_audit._dependency_source import ( + PYPI_URL, DependencySource, PipSource, RequirementSource, @@ -250,7 +251,7 @@ def _parser() -> argparse.ArgumentParser: type=str, help="base URL of the Python Package Index; this should point to a repository compliant " "with PEP 503 (the simple repository API)", - default="https://pypi.org/simple", + default=PYPI_URL, ) parser.add_argument( "--extra-index-url", diff --git a/pip_audit/_dependency_source/__init__.py b/pip_audit/_dependency_source/__init__.py index ab19146d..cacb099e 100644 --- a/pip_audit/_dependency_source/__init__.py +++ b/pip_audit/_dependency_source/__init__.py @@ -11,9 +11,10 @@ ) from .pip import PipSource, PipSourceError from .requirement import RequirementSource -from .resolvelib import ResolveLibResolver +from .resolvelib import PYPI_URL, ResolveLibResolver __all__ = [ + "PYPI_URL", "DependencyFixError", "DependencyResolver", "DependencyResolverError", diff --git a/pip_audit/_dependency_source/resolvelib/__init__.py b/pip_audit/_dependency_source/resolvelib/__init__.py index f4f4bf84..600bc420 100644 --- a/pip_audit/_dependency_source/resolvelib/__init__.py +++ b/pip_audit/_dependency_source/resolvelib/__init__.py @@ -2,9 +2,10 @@ `resolvelib` interactions for `pip-audit`. """ -from .resolvelib import ResolveLibResolver, ResolveLibResolverError +from .resolvelib import PYPI_URL, ResolveLibResolver, ResolveLibResolverError __all__ = [ + "PYPI_URL", "ResolveLibResolver", "ResolveLibResolverError", ] diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index 5975f82f..ea164d35 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -187,7 +187,7 @@ def get_project_from_indexes( pass if not project_found: raise PyPINotFoundError( - f"Could not find project {project} on any of the supplied index URLs: {index_urls}" + f'Could not find project "{project}" on any of the supplied index URLs: {index_urls}' ) diff --git a/pip_audit/_dependency_source/resolvelib/resolvelib.py b/pip_audit/_dependency_source/resolvelib/resolvelib.py index 9a532f83..ee41ff10 100644 --- a/pip_audit/_dependency_source/resolvelib/resolvelib.py +++ b/pip_audit/_dependency_source/resolvelib/resolvelib.py @@ -19,6 +19,8 @@ logger = logging.getLogger(__name__) +PYPI_URL = "https://pypi.org/simple" + class ResolveLibResolver(DependencyResolver): """ @@ -28,8 +30,7 @@ class ResolveLibResolver(DependencyResolver): def __init__( self, - # TODO(alex): Make this non-optional - index_urls: List[str] = [], + index_urls: List[str] = [PYPI_URL], timeout: Optional[int] = None, cache_dir: Optional[Path] = None, state: AuditState = AuditState(), diff --git a/test/dependency_source/test_resolvelib.py b/test/dependency_source/test_resolvelib.py index ada7b607..619b4c79 100644 --- a/test/dependency_source/test_resolvelib.py +++ b/test/dependency_source/test_resolvelib.py @@ -295,7 +295,11 @@ def __init__(self): resolved_deps = dict(resolver.resolve_all(iter([req]))) assert len(resolved_deps) == 1 expected_deps = [ - SkippedDependency(name="flask", skip_reason='Could not find project "flask" on PyPI') + SkippedDependency( + name="flask", + skip_reason='Could not find project "flask" on any of the supplied index URLs: ' + "['https://pypi.org/simple']", + ) ] assert req in resolved_deps assert resolved_deps[req] == expected_deps From fdc02cc181bbab361bf2b7c43bcc64194de5ccb3 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Wed, 9 Feb 2022 21:09:35 +1100 Subject: [PATCH 04/11] resolvelib: Document `index_urls` param --- pip_audit/_dependency_source/resolvelib/pypi_provider.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pip_audit/_dependency_source/resolvelib/pypi_provider.py b/pip_audit/_dependency_source/resolvelib/pypi_provider.py index ea164d35..e10ecfe5 100644 --- a/pip_audit/_dependency_source/resolvelib/pypi_provider.py +++ b/pip_audit/_dependency_source/resolvelib/pypi_provider.py @@ -258,6 +258,8 @@ def __init__( """ Create a new `PyPIProvider`. + `index_urls` is a list of package index URLs. + `timeout` is an optional argument to control how many seconds the component should wait for responses to network requests. From 90a5a4015d30388e0a3b9ae5a8fe91c6fbf42602 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 01:07:01 +1100 Subject: [PATCH 05/11] test: Add unit test for multiple indices --- test/dependency_source/test_resolvelib.py | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/dependency_source/test_resolvelib.py b/test/dependency_source/test_resolvelib.py index 619b4c79..d5faaca2 100644 --- a/test/dependency_source/test_resolvelib.py +++ b/test/dependency_source/test_resolvelib.py @@ -303,3 +303,48 @@ def __init__(self): ] assert req in resolved_deps assert resolved_deps[req] == expected_deps + + +def test_resolvelib_multiple_indexes(monkeypatch): + url1 = "https://index1" + url2 = "https://index2" + package_url1 = f"{url1}/flask" + package_url2 = f"{url2}/flask" + data1 = ( + 'Flask-0.5.tar.gz' + "
" + ) + data2 = ( + 'Flask-0.6.tar.gz' + "
" + ) + + monkeypatch.setattr( + pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock() + ) + + def get_multiple_index_package_mock(url): + if url == package_url1: + return get_package_mock(data1) + else: + assert url == package_url2 + return get_package_mock(data2) + + resolver = resolvelib.ResolveLibResolver([url1, url2]) + monkeypatch.setattr( + resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url) + ) + + req = Requirement("flask<=0.5") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))] + + req = Requirement("flask<=0.6") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.6"))] From 46170ac1ea921810c88e7835e280845750c1d993 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 01:08:54 +1100 Subject: [PATCH 06/11] test: Comments --- test/dependency_source/test_resolvelib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/dependency_source/test_resolvelib.py b/test/dependency_source/test_resolvelib.py index d5faaca2..98694538 100644 --- a/test/dependency_source/test_resolvelib.py +++ b/test/dependency_source/test_resolvelib.py @@ -339,11 +339,15 @@ def get_multiple_index_package_mock(url): resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url) ) + # We want to check that dependency resolution is considering packages found on both indexes + # + # Test with a requirement that will resolve to a package on the first index req = Requirement("flask<=0.5") resolved_deps = dict(resolver.resolve_all(iter([req]))) assert req in resolved_deps assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))] + # Now test with a requirement that will resolve to a package on the second index req = Requirement("flask<=0.6") resolved_deps = dict(resolver.resolve_all(iter([req]))) assert req in resolved_deps From 888868bb32b95742dc409a1820b897de46e0e55e Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 01:14:19 +1100 Subject: [PATCH 07/11] CHANGELOG: Update changelog --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 320b0840..6ee4120a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,14 @@ All versions prior to 0.0.9 are untracked. conjunction with `-r` to check that all requirements in the file have an associated hash ([#229](https://github.com/trailofbits/pip-audit/pull/229)) +* CLI: The `--index-url` flag has been added, allowing users to use custom + package indices when running with the `-r` flag + ([#238](https://github.com/trailofbits/pip-audit/pull/238)) + +* CLI: The `--extra-index-url` flag has been added, allowing users to use + multiple package indices when running with the `-r` flag + ([#238](https://github.com/trailofbits/pip-audit/pull/238)) + ### Changed * `pip-audit`'s minimum Python version is now 3.7. From 75569cc1fb92ca965123ef32cc3c5ce5b3051c99 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 01:18:27 +1100 Subject: [PATCH 08/11] _cli: Check for improper usage of new index flags --- pip_audit/_cli.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pip_audit/_cli.py b/pip_audit/_cli.py index 2e918049..1d64594c 100644 --- a/pip_audit/_cli.py +++ b/pip_audit/_cli.py @@ -285,9 +285,14 @@ def audit() -> None: output_desc = args.desc.to_bool(args.format) formatter = args.format.to_format(output_desc) - # The `--require-hashes` flag is only valid with requirements files - if args.require_hashes and args.requirements is None: - parser.error("The --require-hashes flag can only be used with --requirement (-r)") + # Check for flags that are only valid with requirements files + if args.requirements is None: + if args.require_hashes: + parser.error("The --require-hashes flag can only be used with --requirement (-r)") + elif args.index_url != PYPI_URL: + parser.error("The --index-url flag can only be used with --requirement (-r)") + elif args.extra_index_urls: + parser.error("The --extra-index-url flag can only be used with --requirement (-r)") with ExitStack() as stack: actors = [] From eaaacdf78f0533e967dae009ade8f0b4a46eea7f Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 01:21:13 +1100 Subject: [PATCH 09/11] README: Update help text --- README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/README.md b/README.md index cd0f73ea..ef854633 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,7 @@ usage: pip-audit [-h] [-V] [-l] [-r REQUIREMENTS] [-f FORMAT] [-s SERVICE] [-d] [-S] [--desc [{on,off,auto}]] [--cache-dir CACHE_DIR] [--progress-spinner {on,off}] [--timeout TIMEOUT] [--path PATHS] [-v] [--fix] [--require-hashes] + [--index-url INDEX_URL] [--extra-index-url EXTRA_INDEX_URLS] audit the Python environment for dependencies with known vulnerabilities @@ -119,6 +120,14 @@ optional arguments: repeatable audits; this option is implied when any package in a requirements file has a `--hash` option. (default: False) + --index-url INDEX_URL + base URL of the Python Package Index; this should point + to a repository compliant with PEP 503 (the simple + repository API) (default: https://pypi.org/simple) + --extra-index-url EXTRA_INDEX_URLS + extra URLs of package indexes to use in addition to + `--index-url`; should follow the same rules as + `--index-url` (default: []) ``` From 027a6de19794b7346b31879ae39363a8a2fb64db Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 01:24:41 +1100 Subject: [PATCH 10/11] README: Adjust help text --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ef854633..16ec7937 100644 --- a/README.md +++ b/README.md @@ -121,9 +121,10 @@ optional arguments: package in a requirements file has a `--hash` option. (default: False) --index-url INDEX_URL - base URL of the Python Package Index; this should point - to a repository compliant with PEP 503 (the simple - repository API) (default: https://pypi.org/simple) + base URL of the Python Package Index; this should + point to a repository compliant with PEP 503 (the + simple repository API) (default: + https://pypi.org/simple) --extra-index-url EXTRA_INDEX_URLS extra URLs of package indexes to use in addition to `--index-url`; should follow the same rules as From 7eec632b1f7f11522e080af4ed24d0c7ec680558 Mon Sep 17 00:00:00 2001 From: Alex Cameron Date: Thu, 10 Feb 2022 09:52:40 +1100 Subject: [PATCH 11/11] test: Add a test for when a package only exists on one index --- test/dependency_source/test_resolvelib.py | 45 +++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/test/dependency_source/test_resolvelib.py b/test/dependency_source/test_resolvelib.py index 98694538..d1124a54 100644 --- a/test/dependency_source/test_resolvelib.py +++ b/test/dependency_source/test_resolvelib.py @@ -352,3 +352,48 @@ def get_multiple_index_package_mock(url): resolved_deps = dict(resolver.resolve_all(iter([req]))) assert req in resolved_deps assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.6"))] + + +def test_resolvelib_package_missing_on_one_index(monkeypatch): + url1 = "https://index1" + url2 = "https://index2" + package_url1 = f"{url1}/flask" + package_url2 = f"{url2}/flask" + data1 = ( + 'Flask-0.5.tar.gz' + "
" + ) + + monkeypatch.setattr( + pypi_provider.Candidate, "_get_metadata_for_sdist", lambda _: get_metadata_mock() + ) + + # Simulate the package not existing on the second index + def get_multiple_index_package_mock(url): + if url == package_url1: + return get_package_mock(data1) + else: + assert url == package_url2 + pkg = get_package_mock(str()) + pkg.status_code = 404 + return pkg + + resolver = resolvelib.ResolveLibResolver([url1, url2]) + monkeypatch.setattr( + resolver.provider.session, "get", lambda url, **kwargs: get_multiple_index_package_mock(url) + ) + + # If a package doesn't exist on one index, we shouldn't expect an error. We should just skip it + # and only use the other index for finding candidates. + req = Requirement("flask<=0.5") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))] + + # Now test with a requirement that will resolve to a package on the second index + req = Requirement("flask<=0.6") + resolved_deps = dict(resolver.resolve_all(iter([req]))) + assert req in resolved_deps + assert resolved_deps[req] == [ResolvedDependency("flask", Version("0.5"))]