diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index 72081f7b8..7f6975ccc 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -9,7 +9,7 @@ from pip._internal.cache import WheelCache from pip._internal.commands import create_command -from pip._internal.models.index import PyPI +from pip._internal.models.index import PackageIndex, PyPI from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.req import RequirementSet @@ -18,12 +18,14 @@ from pip._internal.utils.misc import normalize_path from pip._internal.utils.temp_dir import TempDirectory, global_tempdir_manager from pip._internal.utils.urls import path_to_url, url_to_path +from pip._vendor.requests import RequestException from .._compat import PIP_VERSION, TemporaryDirectory, contextlib from ..click import progressbar from ..exceptions import NoCandidateFound from ..logging import log from ..utils import ( + as_tuple, fs_str, is_pinned_requirement, is_url_requirement, @@ -227,6 +229,47 @@ def get_dependencies(self, ireq): return self._dependencies_cache[ireq] + def _get_project(self, ireq): + """ + Return a dict of a project info from PyPI JSON API for a given + InstallRequirement. Return None on HTTP/JSON error or if a package + is not found on PyPI server. + + API reference: https://warehouse.readthedocs.io/api-reference/json/ + """ + package_indexes = ( + PackageIndex(url=index_url, file_storage_domain="") + for index_url in self.finder.search_scope.index_urls + ) + for package_index in package_indexes: + url = "{url}/{name}/json".format(url=package_index.pypi_url, name=ireq.name) + try: + response = self.session.get(url) + except RequestException as e: + log.debug( + "Fetch package info from PyPI failed: {url}: {e}".format( + url=url, e=e + ) + ) + continue + + # Skip this PyPI server, because there is no package + # or JSON API might be not supported + if response.status_code == 404: + continue + + try: + data = response.json() + except ValueError as e: + log.debug( + "Cannot parse JSON response from PyPI: {url}: {e}".format( + url=url, e=e + ) + ) + continue + return data + return None + def get_hashes(self, ireq): """ Given an InstallRequirement, return a set of hashes that represent all @@ -257,6 +300,50 @@ def get_hashes(self, ireq): if not is_pinned_requirement(ireq): raise TypeError("Expected pinned requirement, got {}".format(ireq)) + log.debug("{}".format(ireq.name)) + + with log.indentation(): + hashes = self._get_hashes_from_pypi(ireq) + if hashes is None: + log.log("Couldn't get hashes from PyPI, fallback to hashing files") + return self._get_hashes_from_files(ireq) + + return hashes + + def _get_hashes_from_pypi(self, ireq): + """ + Return a set of hashes from PyPI JSON API for a given InstallRequirement. + Return None if fetching data is failed or missing digests. + """ + project = self._get_project(ireq) + if project is None: + return None + + _, version, _ = as_tuple(ireq) + + try: + release_files = project["releases"][version] + except KeyError: + log.debug("Missing release files on PyPI") + return None + + try: + hashes = { + "{algo}:{digest}".format( + algo=FAVORITE_HASH, digest=file_["digests"][FAVORITE_HASH] + ) + for file_ in release_files + } + except KeyError: + log.debug("Missing digests of release files on PyPI") + return None + + return hashes + + def _get_hashes_from_files(self, ireq): + """ + Return a set of hashes for all release files of a given InstallRequirement. + """ # We need to get all of the candidates that match our current version # pin, these will represent all of the files that could possibly # satisfy this constraint. @@ -267,12 +354,9 @@ def get_hashes(self, ireq): ) matching_candidates = candidates_by_version[matching_versions[0]] - log.debug(ireq.name) - - with log.indentation(): - return { - self._get_file_hash(candidate.link) for candidate in matching_candidates - } + return { + self._get_file_hash(candidate.link) for candidate in matching_candidates + } def _get_file_hash(self, link): log.debug("Hashing {}".format(link.url_without_fragment)) diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index c04608f16..07054e250 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -4,7 +4,7 @@ import pytest from pip._internal.models.link import Link from pip._internal.utils.urls import path_to_url -from pip._vendor.requests import Session +from pip._vendor.requests import HTTPError, Session from piptools._compat import PIP_VERSION from piptools.repositories import PyPIRepository @@ -166,3 +166,138 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ) assert not pypi_repository.options.cache_dir + + +@pytest.mark.parametrize( + "project_data, expected_hashes", + ( + pytest.param( + {"releases": {"0.1": [{"digests": {"sha256": "fake-hash"}}]}}, + {"sha256:fake-hash"}, + id="return single hash", + ), + pytest.param( + { + "releases": { + "0.1": [ + {"digests": {"sha256": "fake-hash-number1"}}, + {"digests": {"sha256": "fake-hash-number2"}}, + ] + } + }, + {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + id="return multiple hashes", + ), + pytest.param(None, None, id="not found project data"), + pytest.param({}, None, id="not found releases key"), + pytest.param({"releases": {}}, None, id="not found version"), + pytest.param({"releases": {"0.1": [{}]}}, None, id="not found digests"), + pytest.param( + {"releases": {"0.1": [{"digests": {}}]}}, None, id="digests are empty" + ), + pytest.param( + {"releases": {"0.1": [{"digests": {"md5": "fake-hash"}}]}}, + None, + id="not found sha256 algo", + ), + ), +) +def test_get_hashes_from_pypi(from_line, tmpdir, project_data, expected_hashes): + """ + Test PyPIRepository._get_hashes_from_pypi() returns expected hashes or None. + """ + + class MockPyPIRepository(PyPIRepository): + def _get_project(self, ireq): + return project_data + + pypi_repository = MockPyPIRepository( + ["--no-cache-dir"], cache_dir=str(tmpdir / "pypi-repo-cache") + ) + ireq = from_line("fake-package==0.1") + + actual_hashes = pypi_repository._get_hashes_from_pypi(ireq) + assert actual_hashes == expected_hashes + + +def test_get_project__returns_data(from_line, tmpdir, monkeypatch, pypi_repository): + """ + Test PyPIRepository._get_project() returns expected project data. + """ + expected_data = {"releases": {"0.1": [{"digests": {"sha256": "fake-hash"}}]}} + + class MockResponse: + status_code = 200 + + @staticmethod + def json(): + return expected_data + + def mock_get(*args, **kwargs): + return MockResponse() + + monkeypatch.setattr(pypi_repository.session, "get", mock_get) + ireq = from_line("fake-package==0.1") + + actual_data = pypi_repository._get_project(ireq) + assert actual_data == expected_data + + +def test_get_project__handles_http_error( + from_line, tmpdir, monkeypatch, pypi_repository +): + """ + Test PyPIRepository._get_project() returns None if HTTP error is raised. + """ + + def mock_get(*args, **kwargs): + raise HTTPError("test http error") + + monkeypatch.setattr(pypi_repository.session, "get", mock_get) + ireq = from_line("fake-package==0.1") + + actual_data = pypi_repository._get_project(ireq) + assert actual_data is None + + +def test_get_project__handles_json_decode_error( + from_line, tmpdir, monkeypatch, pypi_repository +): + """ + Test PyPIRepository._get_project() returns None if JSON decode error is raised. + """ + + class MockResponse: + status_code = 200 + + @staticmethod + def json(): + raise ValueError("test json error") + + def mock_get(*args, **kwargs): + return MockResponse() + + monkeypatch.setattr(pypi_repository.session, "get", mock_get) + ireq = from_line("fake-package==0.1") + + actual_data = pypi_repository._get_project(ireq) + assert actual_data is None + + +def test_get_project__handles_404(from_line, tmpdir, monkeypatch, pypi_repository): + """ + Test PyPIRepository._get_project() returns None if PyPI + response's status code is 404. + """ + + class MockResponse: + status_code = 404 + + def mock_get(*args, **kwargs): + return MockResponse() + + monkeypatch.setattr(pypi_repository.session, "get", mock_get) + ireq = from_line("fake-package==0.1") + + actual_data = pypi_repository._get_project(ireq) + assert actual_data is None