diff --git a/docs/uri_backends.rst b/docs/uri_backends.rst index 11a42cb..d3edd6a 100644 --- a/docs/uri_backends.rst +++ b/docs/uri_backends.rst @@ -1,6 +1,26 @@ URI Schemes and Backends ======================== +BaseURIBackend +-------------- + +``Py-EthPM`` uses the ``BaseURIBackend`` as the parent class for all of its URI backends. To write your own backend, it must implement the following methods. + +.. py:method:: BaseURIBackend.can_resolve_uri(uri) + + Returns a bool indicating whether or not this backend is capable of resolving the given URI to a manifest. + A content-addressed URI pointing to valid manifest is said to be capable of "resolving". + +.. py:method:: BaseURIBackend.can_translate_uri(uri) + + Returns a bool indicating whether this backend class can translate the given URI to a corresponding content-addressed URI. + A registry URI is said to be capable of "transalating" if it points to another content-addressed URI in its respective on-chain registry. + +.. py:method:: BaseURIBackend.fetch_uri_contents(uri) + + Fetches the contents stored at the provided uri, if an available backend is capable of resolving the URI. Validates that contents stored at uri match the content hash suffixing the uri. + + IPFS ---- @@ -8,24 +28,36 @@ IPFS - ``InfuraIPFSBackend`` (default) - `https://ipfs.infura.io` -- ``IPFSGatewayBackend`` +- ``IPFSGatewayBackend`` (temporarily deprecated) - `https://ipfs.io/ipfs/` - ``LocalIPFSBacked`` - connects to a local IPFS API gateway running on port 5001. - ``DummyIPFSBackend`` - Won't pin/fetch files to an actual IPFS node, but mocks out this behavior. -.. py:method:: BaseIPFSBackend.can_resolve_uri(uri) +.. py:method:: BaseIPFSBackend.pin_assets(file_or_directory_path) - Returns a bool indicating whether or not this backend is capable of handling the given URI. + Pins asset(s) found at the given path and returns the pinned asset data. -.. py:method:: BaseIPFSBackend.fetch_uri_contents(uri) - Fetches the contents stored at a URI. +HTTP +---- -.. py:method:: BaseIPFSBackend.pin_assets(file_or_directory_path) +``Py-EthPM`` offers a backend to fetch files from Github, ``GithubOverHTTPSBackend``. + +A valid Github URI *should* conform to the following scheme. + +.. code:: python + + https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash + +To generate a valid Github PM URI. + +- Go to the target manifest in your browser. +- Press ``y`` to generate the permalink in the address bar. +- Replace ``"github"`` with ``"raw.githubusercontent"``, and remove the ``"blob"`` namespace from the URI. +- Suffix the URI with ``#`` followed by the ``keccak`` hash of the bytes found at the Github URI. - Pins asset(s) found at the given path and returns the pinned asset data. Registry URIs ------------- diff --git a/ethpm/backends/base.py b/ethpm/backends/base.py index 2b03230..fb755d3 100644 --- a/ethpm/backends/base.py +++ b/ethpm/backends/base.py @@ -22,7 +22,6 @@ def can_translate_uri(self, uri: str) -> bool: """ Return a bool indicating whether this backend class can translate the given URI to a corresponding content-addressed URI. - i.e. a registry URI pointing to another content-addressed URI onchain """ pass diff --git a/ethpm/backends/http.py b/ethpm/backends/http.py index 7c2fdd1..6ecb377 100644 --- a/ethpm/backends/http.py +++ b/ethpm/backends/http.py @@ -1,9 +1,9 @@ import requests from ethpm.backends.base import BaseURIBackend -from ethpm.constants import GITHUB_AUTHORITY +from ethpm.constants import RAW_GITHUB_AUTHORITY from ethpm.utils.uri import is_valid_github_uri -from ethpm.validation import validate_github_uri_contents +from ethpm.validation import validate_uri_contents class GithubOverHTTPSBackend(BaseURIBackend): @@ -16,7 +16,8 @@ def can_resolve_uri(self, uri: str) -> bool: def can_translate_uri(self, uri: str) -> bool: """ - GithubOverHTTPSBackend uri's must resolve to a valid manifest. + GithubOverHTTPSBackend uri's must resolve to a valid manifest, + and cannot translate to another content-addressed URI. """ return False @@ -24,9 +25,9 @@ def fetch_uri_contents(self, uri: str) -> bytes: http_uri, validation_hash = uri.split("#") response = requests.get(http_uri) response.raise_for_status() - validate_github_uri_contents(response.content, validation_hash) + validate_uri_contents(response.content, validation_hash) return response.content @property def base_uri(self) -> str: - return GITHUB_AUTHORITY + return RAW_GITHUB_AUTHORITY diff --git a/ethpm/backends/ipfs.py b/ethpm/backends/ipfs.py index 59846d1..c800607 100644 --- a/ethpm/backends/ipfs.py +++ b/ethpm/backends/ipfs.py @@ -13,8 +13,13 @@ INFURA_GATEWAY_PREFIX, IPFS_GATEWAY_PREFIX, ) -from ethpm.exceptions import CannotHandleURI -from ethpm.utils.ipfs import dummy_ipfs_pin, extract_ipfs_path_from_uri, is_ipfs_uri +from ethpm.exceptions import CannotHandleURI, ValidationError +from ethpm.utils.ipfs import ( + dummy_ipfs_pin, + extract_ipfs_path_from_uri, + generate_file_hash, + is_ipfs_uri, +) class BaseIPFSBackend(BaseURIBackend): @@ -56,7 +61,14 @@ def __init__(self) -> None: def fetch_uri_contents(self, uri: str) -> bytes: ipfs_hash = extract_ipfs_path_from_uri(uri) - return self.client.cat(ipfs_hash) + contents = self.client.cat(ipfs_hash) + validation_hash = generate_file_hash(contents) + if validation_hash != ipfs_hash: + raise ValidationError( + "Hashed IPFS contents retrieved from uri: {0} " + "do not match its content hash.".format(uri) + ) + return contents @property @abstractmethod diff --git a/ethpm/constants.py b/ethpm/constants.py index fd22b06..eb49dc0 100644 --- a/ethpm/constants.py +++ b/ethpm/constants.py @@ -13,4 +13,6 @@ INFURA_GATEWAY_PREFIX = "https://ipfs.infura.io" -GITHUB_AUTHORITY = "https://raw.githubusercontent.com/" +INTERNET_SCHEMES = ["http", "https"] + +RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com" diff --git a/ethpm/utils/backend.py b/ethpm/utils/backend.py index d0a7fd7..ea9f02b 100644 --- a/ethpm/utils/backend.py +++ b/ethpm/utils/backend.py @@ -3,6 +3,7 @@ from eth_utils import to_tuple from ethpm.backends.base import BaseURIBackend +from ethpm.backends.http import GithubOverHTTPSBackend from ethpm.backends.ipfs import ( DummyIPFSBackend, InfuraIPFSBackend, @@ -16,6 +17,7 @@ InfuraIPFSBackend, DummyIPFSBackend, LocalIPFSBackend, + GithubOverHTTPSBackend, RegistryURIBackend, ] diff --git a/ethpm/utils/ipfs.py b/ethpm/utils/ipfs.py index bfb515b..cc9ed2a 100644 --- a/ethpm/utils/ipfs.py +++ b/ethpm/utils/ipfs.py @@ -12,7 +12,7 @@ def dummy_ipfs_pin(path: Path) -> Dict[str, str]: Return IPFS data as if file was pinned to an actual node. """ ipfs_return = { - "Hash": generate_file_hash(path), + "Hash": generate_file_hash(path.read_bytes()), "Name": path.name, "Size": str(path.stat().st_size), } @@ -69,12 +69,11 @@ def multihash(value: bytes) -> bytes: return multihash_bytes -def serialize_file(file_path: Path) -> PBNode: - file_data = open(file_path, "rb").read() - file_size = len(file_data) +def serialize_bytes(file_bytes: bytes) -> PBNode: + file_size = len(file_bytes) data_protobuf = Data( - Type=Data.DataType.Value("File"), Data=file_data, filesize=file_size + Type=Data.DataType.Value("File"), Data=file_bytes, filesize=file_size ) data_protobuf_bytes = data_protobuf.SerializeToString() @@ -83,8 +82,8 @@ def serialize_file(file_path: Path) -> PBNode: return file_protobuf -def generate_file_hash(file_path: Path) -> str: - file_protobuf = serialize_file(file_path) +def generate_file_hash(content_bytes: bytes) -> str: + file_protobuf = serialize_bytes(content_bytes) file_protobuf_bytes = file_protobuf.SerializeToString() file_multihash = multihash(file_protobuf_bytes) return b58encode(file_multihash) diff --git a/ethpm/utils/uri.py b/ethpm/utils/uri.py index ff9517f..8b25bc4 100644 --- a/ethpm/utils/uri.py +++ b/ethpm/utils/uri.py @@ -1,19 +1,8 @@ -import json -from typing import Any, Dict from urllib import parse -from eth_utils import is_text, to_text +from eth_utils import is_text -from ethpm.backends.ipfs import get_ipfs_backend -from ethpm.exceptions import CannotHandleURI - -IPFS_SCHEME = "ipfs" - -INTERNET_SCHEMES = ["http", "https"] - -SWARM_SCHEMES = ["bzz", "bzz-immutable", "bzz-raw"] - -RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com" +from ethpm.constants import INTERNET_SCHEMES, RAW_GITHUB_AUTHORITY def is_valid_github_uri(uri: str) -> bool: @@ -23,8 +12,8 @@ def is_valid_github_uri(uri: str) -> bool: - Have 'http' or 'https' scheme - Have 'raw.githubusercontent.com' authority - Have any path (*should* include a commit hash in path) - - Have ending fragment containing any content hash - i.e. 'https://raw.githubusercontent.com/any/path#content_hash + - Have ending fragment containing the keccak hash of the uri contents + ex. 'https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash' """ if not is_text(uri): return False @@ -43,32 +32,3 @@ def is_valid_github_uri(uri: str) -> bool: if authority != RAW_GITHUB_AUTHORITY: return False return True - - -def get_manifest_from_content_addressed_uri(uri: str) -> Dict[str, Any]: - """ - Return manifest data stored at a content addressed URI. - """ - parse_result = parse.urlparse(uri) - scheme = parse_result.scheme - - if scheme == IPFS_SCHEME: - ipfs_backend = get_ipfs_backend() - if ipfs_backend.can_resolve_uri(uri): - raw_manifest_data = ipfs_backend.fetch_uri_contents(uri) - manifest_data = to_text(raw_manifest_data) - return json.loads(manifest_data) - else: - raise TypeError( - "The IPFS Backend: {0} cannot handle the given URI: {1}.".format( - type(ipfs_backend).__name__, uri - ) - ) - - if scheme in INTERNET_SCHEMES: - raise CannotHandleURI("Internet URIs are not yet supported.") - - if scheme in SWARM_SCHEMES: - raise CannotHandleURI("Swarm URIs are not yet supported.") - - raise CannotHandleURI("The URI scheme:{0} is not supported.".format(scheme)) diff --git a/ethpm/validation.py b/ethpm/validation.py index 93b1af6..76450f2 100644 --- a/ethpm/validation.py +++ b/ethpm/validation.py @@ -176,15 +176,15 @@ def validate_single_matching_uri(all_blockchain_uris: List[str], w3: Web3) -> st return matching_uris[0] -def validate_github_uri_contents(contents: bytes, validation_hash: str) -> None: +def validate_uri_contents(contents: bytes, validation_hash: str) -> None: """ - Validate that the contents match the validation_hash associated with a Github URI. + Validate that the keccak(contents) matches the validation_hash. """ hashed_contents = keccak(contents) decoded_validation = decode_hex(validation_hash) if hashed_contents != decoded_validation: raise ValidationError( - "Invalid Github content-addressed URI. " + "Invalid content-addressed URI. " "Validation hash:{0} does not match the hash of URI contents: {1}.".format( decoded_validation, hashed_contents ) diff --git a/setup.py b/setup.py index ee2982e..7c33a63 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ 'jsonschema>=2.6.0,<3', 'protobuf>=3.0.0,<4', 'py-solc>=2.1.0,<3', - 'pytest-ethereum==0.1.2a.6', + 'pytest-ethereum==0.1.2a.7', 'rlp>=1.0.1,<2', 'web3[tester]>=4.7,<5', ], diff --git a/tests/conftest.py b/tests/conftest.py index 56f9a51..a50cc78 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -106,7 +106,7 @@ def _get_factory(package, factory_name): @pytest.fixture def owned_contract(): - with open(str(V2_PACKAGES_DIR / "owned" / "contracts" / "Owned.sol")) as file_obj: + with open(V2_PACKAGES_DIR / "owned" / "contracts" / "Owned.sol") as file_obj: return file_obj.read() diff --git a/tests/ethpm/backends/test_http_backends.py b/tests/ethpm/backends/test_http_backends.py index f2fe30b..9b6fab4 100644 --- a/tests/ethpm/backends/test_http_backends.py +++ b/tests/ethpm/backends/test_http_backends.py @@ -1,20 +1,27 @@ import pytest -import requests_mock +from ethpm import Package from ethpm.backends.http import GithubOverHTTPSBackend -from ethpm.constants import GITHUB_AUTHORITY +from ethpm.constants import RAW_GITHUB_AUTHORITY +from ethpm.exceptions import ValidationError @pytest.mark.parametrize( "uri", ( - "https://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#bfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd60", # noqa: E501 + "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501 + "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#0x01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501 ), ) -def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract): +def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract, w3): backend = GithubOverHTTPSBackend() - assert backend.base_uri == GITHUB_AUTHORITY - with requests_mock.Mocker() as m: - m.get(requests_mock.ANY, text=owned_contract) - response = backend.fetch_uri_contents(uri) - assert response.startswith(b"pragma") + assert backend.base_uri == RAW_GITHUB_AUTHORITY + # integration with Package.from_uri + owned_package = Package.from_uri(uri, w3) + assert owned_package.name == "owned" + + +def test_github_over_https_backend_raises_error_with_invalid_content_hash(w3): + invalid_uri = "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2ca111" # noqa: E501 + with pytest.raises(ValidationError): + Package.from_uri(invalid_uri, w3) diff --git a/tests/ethpm/backends/test_ipfs_backends.py b/tests/ethpm/backends/test_ipfs_backends.py index e99c49f..c028303 100644 --- a/tests/ethpm/backends/test_ipfs_backends.py +++ b/tests/ethpm/backends/test_ipfs_backends.py @@ -3,7 +3,6 @@ from eth_utils import to_text import pytest -import requests_mock from ethpm import V2_PACKAGES_DIR from ethpm.backends.ipfs import ( @@ -38,24 +37,18 @@ def add(self, file_or_dir_path, recursive): @pytest.mark.parametrize( "base_uri,backend", ((INFURA_GATEWAY_PREFIX, InfuraIPFSBackend()),) ) -def test_ipfs_and_infura_gateway_backends_fetch_uri_contents( - base_uri, backend, safe_math_manifest -): +def test_ipfs_and_infura_gateway_backends_fetch_uri_contents(base_uri, backend): uri = "ipfs://Qme4otpS88NV8yQi8TfTP89EsQC5bko3F5N1yhRoi6cwGV" assert backend.base_uri == base_uri - with requests_mock.Mocker() as m: - m.get(requests_mock.ANY, text=json.dumps(safe_math_manifest)) - contents = backend.fetch_uri_contents(uri) - contents_dict = json.loads(to_text(contents)) - assert contents_dict["package_name"] == "safe-math-lib" + contents = backend.fetch_uri_contents(uri) + assert contents.startswith(b"pragma solidity") -def test_local_ipfs_backend(monkeypatch, fake_client): +def test_local_ipfs_backend(monkeypatch): uri = "ipfs://Qme4otpS88NV8yQi8TfTP89EsQC5bko3F5N1yhRoi6cwGV" backend = LocalIPFSBackend() - backend.client = fake_client contents = backend.fetch_uri_contents(uri) - assert contents.startswith("Qm") + assert contents.startswith(b"pragma") @pytest.mark.parametrize( diff --git a/tests/ethpm/test_dependencies.py b/tests/ethpm/test_dependencies.py index bb62528..7bf8c88 100644 --- a/tests/ethpm/test_dependencies.py +++ b/tests/ethpm/test_dependencies.py @@ -43,7 +43,7 @@ def test_get_dependency_package(dependencies): def test_validate_build_dependencies(dummy_ipfs_backend, piper_coin_manifest): result = validate_build_dependency( - "standard-token", "ipfs://QmVu9zuza5mkJwwcFdh2SXBugm1oSgZVuEKkph9XLsbUwg" + "standard-token", "ipfs://QmVu9zuza5mkJwwcFdh2SXBugm1oSgZVuEKkph9XLsbUwg#0x123" ) assert result is None diff --git a/tests/ethpm/utils/test_ipfs_utils.py b/tests/ethpm/utils/test_ipfs_utils.py index 759901c..f901a65 100644 --- a/tests/ethpm/utils/test_ipfs_utils.py +++ b/tests/ethpm/utils/test_ipfs_utils.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from ethpm.utils.ipfs import extract_ipfs_path_from_uri, generate_file_hash, is_ipfs_uri @@ -111,5 +113,5 @@ def test_is_ipfs_uri(value, expected): def test_generate_file_hash(tmpdir, file_name, file_contents, expected): p = tmpdir.mkdir("sub").join(file_name) p.write(file_contents) - ipfs_multihash = generate_file_hash(p) + ipfs_multihash = generate_file_hash(Path(p).read_bytes()) assert ipfs_multihash == expected diff --git a/tests/ethpm/utils/test_uri_utils.py b/tests/ethpm/utils/test_uri_utils.py index adff893..625ac8a 100644 --- a/tests/ethpm/utils/test_uri_utils.py +++ b/tests/ethpm/utils/test_uri_utils.py @@ -1,45 +1,8 @@ import pytest -from ethpm.exceptions import CannotHandleURI, ValidationError -from ethpm.utils.uri import get_manifest_from_content_addressed_uri, is_valid_github_uri -from ethpm.validation import validate_github_uri_contents - - -@pytest.mark.parametrize( - "uri,source", (("ipfs://QmbeVyFLSuEUxiXKwSsEjef6icpdTdA4kGG9BcrJXKNKUW", "ipfs"),) -) -def test_get_manifest_from_content_addressed_uris_for_supported_schemes( - uri, source, dummy_ipfs_backend -): - manifest = get_manifest_from_content_addressed_uri(uri) - assert "version" in manifest - assert "package_name" in manifest - assert "manifest_version" in manifest - - -@pytest.mark.parametrize( - "uri", - ( - # filesystem - ("file:///path_to_erc20.json"), - # registry URI scheme - ("erc1128://packages.zeppelinos.eth/erc20/v1.0.0"), - # swarm - ("bzz://da6adeeb4589d8652bbe5679aae6b6409ec85a20e92a8823c7c99e25dba9493d"), - ( - "bzz-immutable:://da6adeeb4589d8652bbe5679aae6b6409ec85a20e92a8823c7c99e25dba9493d" - ), - ("bzz-raw://da6adeeb4589d8652bbe5679aae6b6409ec85a20e92a8823c7c99e25dba9493d"), - # internet - ("http://github.com/ethpm/ethpm-spec/examples/owned/1.0.0.json#content_hash"), - ("https://github.com/ethpm/ethpm-spec/examples/owned/1.0.0.json#content_hash"), - ), -) -def test_get_manfifest_from_content_addressed_uri_raises_exception_for_unsupported_schemes( - uri -): - with pytest.raises(CannotHandleURI): - get_manifest_from_content_addressed_uri(uri) +from ethpm.exceptions import ValidationError +from ethpm.utils.uri import is_valid_github_uri +from ethpm.validation import validate_uri_contents @pytest.mark.parametrize( @@ -84,13 +47,13 @@ def test_is_valid_github_uri(uri, expected): (b"xxx", "0xbc6bb462e38af7da48e0ae7b5cbae860141c04e5af2cf92328cd6548df111fcb"), ), ) -def test_validate_github_uri_contents(contents, hashed): - assert validate_github_uri_contents(contents, hashed) is None +def test_validate_uri_contents(contents, hashed): + assert validate_uri_contents(contents, hashed) is None @pytest.mark.parametrize( "contents,hashed", ((123, "1234"), (b"xxx", "1234"), (b"123", "0x1234")) ) -def test_validate_github_uri_contents_invalidates_incorrect_matches(contents, hashed): +def test_validate_uri_contents_invalidates_incorrect_matches(contents, hashed): with pytest.raises(ValidationError): - validate_github_uri_contents(contents, hashed) + validate_uri_contents(contents, hashed)