From 5c0d25ad2412d1870166702e67642dc7e4ad41f1 Mon Sep 17 00:00:00 2001 From: Nick Gheorghita Date: Mon, 2 Jul 2018 17:01:16 -0600 Subject: [PATCH] Write github uri backend --- ethpm/backends/http.py | 31 +++++++++++++++ ethpm/constants.py | 2 + ethpm/exceptions.py | 3 +- ethpm/utils/uri.py | 46 ++++++++++++++++++++++ tests/conftest.py | 7 ++++ tests/ethpm/backends/test_http_backends.py | 44 +++++++++++++++++++++ tests/ethpm/utils/test_uri_utils.py | 27 +++++++++++++ 7 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 ethpm/backends/http.py create mode 100644 tests/ethpm/backends/test_http_backends.py diff --git a/ethpm/backends/http.py b/ethpm/backends/http.py new file mode 100644 index 0000000..f59deaf --- /dev/null +++ b/ethpm/backends/http.py @@ -0,0 +1,31 @@ +import requests + +from ethpm.backends.base import ( + BaseURIBackend, +) +from ethpm.constants import ( + GITHUB_PREFIX, +) +from ethpm.utils.uri import ( + is_valid_github_uri, + validate_github_uri_contents, +) + + +class GithubOverHTTPSBackend(BaseURIBackend): + """ + Base class for all URIs pointing to a content-addressed Github URI. + """ + def can_handle_uri(self, uri: str) -> bool: + return is_valid_github_uri(uri) + + def fetch_uri_contents(self, uri: str) -> bytes: + http_uri, validation_hash = uri.split('#') + response = requests.get(http_uri) + response.raise_for_status() + validate_github_uri_contents(response.content, validation_hash) + return response.content + + @property + def base_uri(self) -> str: + return GITHUB_PREFIX diff --git a/ethpm/constants.py b/ethpm/constants.py index 429c888..c933895 100644 --- a/ethpm/constants.py +++ b/ethpm/constants.py @@ -6,3 +6,5 @@ IPFS_GATEWAY_PREFIX = 'https://gateway.ipfs.io/ipfs/' INFURA_GATEWAY_PREFIX = 'https://ipfs.infura.io/ipfs/' + +GITHUB_PREFIX = 'https://raw.githubusercontent.com/' diff --git a/ethpm/exceptions.py b/ethpm/exceptions.py index 7a2db6e..7fc1e15 100644 --- a/ethpm/exceptions.py +++ b/ethpm/exceptions.py @@ -7,8 +7,7 @@ class PyEthPMError(Exception): class InsufficientAssetsError(PyEthPMError): """ - Raised when a Manifest or Package does not - contain the required assets to do something. + Raised when a Manifest or Package does not contain the required assets to do something. """ pass diff --git a/ethpm/utils/uri.py b/ethpm/utils/uri.py index 2099e89..723870a 100644 --- a/ethpm/utils/uri.py +++ b/ethpm/utils/uri.py @@ -8,6 +8,8 @@ ) from eth_utils import ( + decode_hex, + keccak, to_text, ) @@ -16,6 +18,7 @@ ) from ethpm.exceptions import ( UriNotSupportedError, + ValidationError, ) IPFS_SCHEME = 'ipfs' @@ -24,6 +27,49 @@ SWARM_SCHEMES = ['bzz', 'bzz-immutable', 'bzz-raw'] +GITHUB_AUTHORITY = 'raw.githubusercontent.com' + + +def is_valid_github_uri(uri: str) -> bool: + """ + Return a bool indicating whether or not the URI is a valid Github URI. + Valid Github URIs *must*: + - Have 'http' or 'https' scheme + - Have 'raw.githubusercontent.com' authority + - Have any path + - Have ending fragment containing any content hash + i.e. 'https://raw.githubusercontent.com/any/path#content_hash + """ + parse_result = parse.urlparse(uri) + path = parse_result.path + scheme = parse_result.scheme + authority = parse_result.netloc + content_hash = parse_result.fragment + + if not path or not scheme or not content_hash: + return False + + if scheme not in INTERNET_SCHEMES: + return False + + if authority != GITHUB_AUTHORITY: + return False + return True + + +def validate_github_uri_contents(contents: bytes, validation_hash: str) -> None: + """ + """ + hashed_contents = keccak(contents) + decoded_validation = decode_hex(validation_hash) + if hashed_contents != decoded_validation: + raise ValidationError( + "Invalid Github content-address URI. " + "Validation hash:{0} does not match the hash of URI contents: {1}.".format( + decoded_validation, hashed_contents + ) + ) + def get_manifest_from_content_addressed_uri(uri: str) -> Dict[str, Any]: """ diff --git a/tests/conftest.py b/tests/conftest.py index 8c2cdc5..cb17d75 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -58,6 +58,13 @@ def owned_manifest(): with open(str(V2_PACKAGES_DIR / 'owned' / '1.0.0.json')) as file_obj: return json.load(file_obj) + +@pytest.fixture +def owned_contract(): + with open(str(V2_PACKAGES_DIR / 'owned' / 'contracts' / 'Owned.sol')) as file_obj: + return file_obj.read() + + # standalone = no `build_dependencies` which aren't fully supported yet @pytest.fixture def all_standalone_manifests(all_manifests): diff --git a/tests/ethpm/backends/test_http_backends.py b/tests/ethpm/backends/test_http_backends.py new file mode 100644 index 0000000..f7e0e14 --- /dev/null +++ b/tests/ethpm/backends/test_http_backends.py @@ -0,0 +1,44 @@ +import pytest +import requests_mock + +from ethpm.backends.http import ( + GithubOverHTTPSBackend, +) +from ethpm.exceptions import ( + ValidationError, +) + + +@pytest.mark.parametrize( + 'uri', + ( + 'http://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#0xbfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd60', # noqa: E501 + 'http://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#bfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd60', # noqa: E501 + 'https://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#bfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd60', # noqa: E501 + 'https://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#0xbfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd60', # noqa: E501 + ) +) +def test_github_over_https_backend(uri, owned_contract): + base_uri = 'https://raw.githubusercontent.com/' + backend = GithubOverHTTPSBackend() + assert backend.base_uri == base_uri + assert backend.can_handle_uri(uri) is True + with requests_mock.Mocker() as m: + m.get(requests_mock.ANY, text=owned_contract) + response = backend.fetch_uri_contents(uri) + assert response.startswith(b'pragma') + + +@pytest.mark.parametrize( + 'uri', + ( + 'http://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#bfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd61', # noqa: E501 + 'https://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#0xbfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd61', # noqa: E501 + ) +) +def test_fetch_uri_contents_raises_exception_with_invalid_content_hash(uri, owned_contract): + backend = GithubOverHTTPSBackend() + with pytest.raises(ValidationError): + with requests_mock.Mocker() as m: + m.get(requests_mock.ANY, text=owned_contract) + backend.fetch_uri_contents(uri) diff --git a/tests/ethpm/utils/test_uri_utils.py b/tests/ethpm/utils/test_uri_utils.py index 8fd135e..5ac2667 100644 --- a/tests/ethpm/utils/test_uri_utils.py +++ b/tests/ethpm/utils/test_uri_utils.py @@ -7,6 +7,7 @@ ) from ethpm.utils.uri import ( get_manifest_from_content_addressed_uri, + is_valid_github_uri, ) @@ -44,3 +45,29 @@ def test_get_manifest_from_content_addressed_uris_for_supported_schemes(uri, sou def test_get_manfifest_from_content_addressed_uri_raises_exception_for_unsupported_schemes(uri): with pytest.raises(UriNotSupportedError): get_manifest_from_content_addressed_uri(uri) + + +@pytest.mark.parametrize( + 'uri,expected', + ( + # no scheme + ('raw.githubusercontent.com/any/path#0x123', False), + # invalid authority + ('http://github.com/any/path#0x123', False), + ('https://github.com/any/path#0x123', False), + # no path + ('http://raw.githubusercontent.com#0x123', False), + ('https://raw.githubusercontent.com#0x123', False), + # no content hash + ('http://raw.githubusercontent.com/any/path', False), + ('https://raw.githubusercontent.com/any/path', False), + ('http://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol', False), # noqa: E501 + # valid github urls + ('http://raw.githubusercontent.com/any/path#0x123', True), + ('https://raw.githubusercontent.com/any/path#0x123', True), + ('http://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#0x123', True), # noqa: E501 + ) +) +def test_is_valid_github_uri(uri, expected): + actual = is_valid_github_uri(uri) + assert actual is expected