Skip to content

Commit

Permalink
Merge pull request #61 from njgheorghita/http-backend
Browse files Browse the repository at this point in the history
Http backend
  • Loading branch information
njgheorghita authored Oct 17, 2018
2 parents 17331eb + 1f977b5 commit 19cc54a
Show file tree
Hide file tree
Showing 16 changed files with 264 additions and 36 deletions.
48 changes: 40 additions & 8 deletions docs/uri_backends.rst
Original file line number Diff line number Diff line change
@@ -1,31 +1,63 @@
URI Schemes and Backends
========================

BaseURIBackend
--------------

``Py-EthPM`` uses the ``BaseURIBackend`` as the parent class for all of its URI backends. To write your own backend, it must implement the following methods.

.. py:method:: BaseURIBackend.can_resolve_uri(uri)
Return a bool indicating whether or not this backend is capable of resolving the given URI to a manifest.
A content-addressed URI pointing to valid manifest is said to be capable of "resolving".

.. py:method:: BaseURIBackend.can_translate_uri(uri)
Return a bool indicating whether this backend class can translate the given URI to a corresponding content-addressed URI.
A registry URI is said to be capable of "transalating" if it points to another content-addressed URI in its respective on-chain registry.

.. py:method:: BaseURIBackend.fetch_uri_contents(uri)
Fetch the contents stored at the provided uri, if an available backend is capable of resolving the URI. Validates that contents stored at uri match the content hash suffixing the uri.


IPFS
----

``Py-EthPM`` has multiple backends available to fetch/pin files to IPFS. The desired backend can be set via the environment variable: ``ETHPM_IPFS_BACKEND_CLASS``.

- ``InfuraIPFSBackend`` (default)
- `https://ipfs.infura.io`
- ``IPFSGatewayBackend``
- ``IPFSGatewayBackend`` (temporarily deprecated)
- `https://ipfs.io/ipfs/`
- ``LocalIPFSBacked``
- connects to a local IPFS API gateway running on port 5001.
- Connect to a local IPFS API gateway running on port 5001.
- ``DummyIPFSBackend``
- Won't pin/fetch files to an actual IPFS node, but mocks out this behavior.

.. py:method:: BaseIPFSBackend.can_resolve_uri(uri)
.. py:method:: BaseIPFSBackend.pin_assets(file_or_directory_path)
Pin asset(s) found at the given path and returns the pinned asset data.

Returns a bool indicating whether or not this backend is capable of handling the given URI.

.. py:method:: BaseIPFSBackend.fetch_uri_contents(uri)
HTTP
----

Fetches the contents stored at a URI.
``Py-EthPM`` offers a backend to fetch files from Github, ``GithubOverHTTPSBackend``.

.. py:method:: BaseIPFSBackend.pin_assets(file_or_directory_path)
A valid Github URI *should* conform to the following scheme.

.. code:: python
https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash
To generate a valid Github PM URI.

- Go to the target manifest in your browser.
- Press ``y`` to generate the permalink in the address bar.
- Replace ``"github"`` with ``"raw.githubusercontent"``, and remove the ``"blob"`` namespace from the URI.
- Suffix the URI with ``#`` followed by the ``keccak`` hash of the bytes found at the Github URI.

Pins asset(s) found at the given path and returns the pinned asset data.

Registry URIs
-------------
Expand Down
37 changes: 37 additions & 0 deletions ethpm/backends/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from urllib import parse

import requests

from ethpm.backends.base import BaseURIBackend
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.utils.uri import is_valid_github_uri
from ethpm.validation import validate_uri_contents


class GithubOverHTTPSBackend(BaseURIBackend):
"""
Base class for all URIs pointing to a content-addressed Github URI.
"""

def can_resolve_uri(self, uri: str) -> bool:
return is_valid_github_uri(uri)

def can_translate_uri(self, uri: str) -> bool:
"""
GithubOverHTTPSBackend uri's must resolve to a valid manifest,
and cannot translate to another content-addressed URI.
"""
return False

def fetch_uri_contents(self, uri: str) -> bytes:
parsed_uri = parse.urlparse(uri)
validation_hash = parsed_uri.fragment
http_uri = f"{parsed_uri.scheme}://{parsed_uri.netloc}{parsed_uri.path}"
response = requests.get(http_uri)
response.raise_for_status()
validate_uri_contents(response.content, validation_hash)
return response.content

@property
def base_uri(self) -> str:
return RAW_GITHUB_AUTHORITY
17 changes: 14 additions & 3 deletions ethpm/backends/ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@
INFURA_GATEWAY_PREFIX,
IPFS_GATEWAY_PREFIX,
)
from ethpm.exceptions import CannotHandleURI
from ethpm.utils.ipfs import dummy_ipfs_pin, extract_ipfs_path_from_uri, is_ipfs_uri
from ethpm.exceptions import CannotHandleURI, ValidationError
from ethpm.utils.ipfs import (
dummy_ipfs_pin,
extract_ipfs_path_from_uri,
generate_file_hash,
is_ipfs_uri,
)


class BaseIPFSBackend(BaseURIBackend):
Expand Down Expand Up @@ -56,7 +61,13 @@ def __init__(self) -> None:

def fetch_uri_contents(self, uri: str) -> bytes:
ipfs_hash = extract_ipfs_path_from_uri(uri)
return self.client.cat(ipfs_hash)
contents = self.client.cat(ipfs_hash)
validation_hash = generate_file_hash(contents)
if validation_hash != ipfs_hash:
raise ValidationError(
f"Hashed IPFS contents retrieved from uri: {uri} do not match its content hash."
)
return contents

@property
@abstractmethod
Expand Down
4 changes: 4 additions & 0 deletions ethpm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@
INFURA_API_KEY = "4f1a358967c7474aae6f8f4a7698aefc"

INFURA_GATEWAY_PREFIX = "https://ipfs.infura.io"

INTERNET_SCHEMES = ["http", "https"]

RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com"
3 changes: 1 addition & 2 deletions ethpm/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ class PyEthPMError(Exception):

class InsufficientAssetsError(PyEthPMError):
"""
Raised when a Manifest or Package does not
contain the required assets to do something.
Raised when a Manifest or Package does not contain the required assets to do something.
"""

pass
Expand Down
2 changes: 2 additions & 0 deletions ethpm/utils/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from eth_utils import to_tuple

from ethpm.backends.base import BaseURIBackend
from ethpm.backends.http import GithubOverHTTPSBackend
from ethpm.backends.ipfs import (
DummyIPFSBackend,
InfuraIPFSBackend,
Expand All @@ -16,6 +17,7 @@
InfuraIPFSBackend,
DummyIPFSBackend,
LocalIPFSBackend,
GithubOverHTTPSBackend,
RegistryURIBackend,
]

Expand Down
13 changes: 6 additions & 7 deletions ethpm/utils/ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def dummy_ipfs_pin(path: Path) -> Dict[str, str]:
Return IPFS data as if file was pinned to an actual node.
"""
ipfs_return = {
"Hash": generate_file_hash(path),
"Hash": generate_file_hash(path.read_bytes()),
"Name": path.name,
"Size": str(path.stat().st_size),
}
Expand Down Expand Up @@ -69,12 +69,11 @@ def multihash(value: bytes) -> bytes:
return multihash_bytes


def serialize_file(file_path: Path) -> PBNode:
file_data = open(file_path, "rb").read()
file_size = len(file_data)
def serialize_bytes(file_bytes: bytes) -> PBNode:
file_size = len(file_bytes)

data_protobuf = Data(
Type=Data.DataType.Value("File"), Data=file_data, filesize=file_size
Type=Data.DataType.Value("File"), Data=file_bytes, filesize=file_size
)
data_protobuf_bytes = data_protobuf.SerializeToString()

Expand All @@ -83,8 +82,8 @@ def serialize_file(file_path: Path) -> PBNode:
return file_protobuf


def generate_file_hash(file_path: Path) -> str:
file_protobuf = serialize_file(file_path)
def generate_file_hash(content_bytes: bytes) -> str:
file_protobuf = serialize_bytes(content_bytes)
file_protobuf_bytes = file_protobuf.SerializeToString()
file_multihash = multihash(file_protobuf_bytes)
return b58encode(file_multihash)
34 changes: 34 additions & 0 deletions ethpm/utils/uri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from urllib import parse

from eth_utils import is_text

from ethpm.constants import INTERNET_SCHEMES, RAW_GITHUB_AUTHORITY


def is_valid_github_uri(uri: str) -> bool:
"""
Return a bool indicating whether or not the URI is a valid Github URI.
Valid Github URIs *must*:
- Have 'http' or 'https' scheme
- Have 'raw.githubusercontent.com' authority
- Have any path (*should* include a commit hash in path)
- Have ending fragment containing the keccak hash of the uri contents
ex. 'https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash'
"""
if not is_text(uri):
return False
parse_result = parse.urlparse(uri)
path = parse_result.path
scheme = parse_result.scheme
authority = parse_result.netloc
content_hash = parse_result.fragment

if not path or not scheme or not content_hash:
return False

if scheme not in INTERNET_SCHEMES:
return False

if authority != RAW_GITHUB_AUTHORITY:
return False
return True
25 changes: 24 additions & 1 deletion ethpm/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
from typing import Any, List
from urllib import parse

from eth_utils import is_address, is_canonical_address, is_checksum_address, is_text
from eth_utils import (
decode_hex,
is_address,
is_canonical_address,
is_checksum_address,
is_text,
keccak,
to_hex,
)
from web3 import Web3

from ethpm.constants import PACKAGE_NAME_REGEX, REGISTRY_URI_SCHEME
Expand Down Expand Up @@ -167,3 +175,18 @@ def validate_single_matching_uri(all_blockchain_uris: List[str], w3: Web3) -> st
)
)
return matching_uris[0]


def validate_uri_contents(contents: bytes, validation_hash: str) -> None:
"""
Validate that the keccak(contents) matches the validation_hash.
"""
hashed_contents = keccak(contents)
decoded_validation = decode_hex(validation_hash)
if hashed_contents != decoded_validation:
raise ValidationError(
"Invalid content-addressed URI. "
"Validation hash:{0} does not match the hash of URI contents: {1}.".format(
to_hex(decoded_validation), to_hex(hashed_contents)
)
)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
'jsonschema>=2.6.0,<3',
'protobuf>=3.0.0,<4',
'py-solc>=2.1.0,<3',
'pytest-ethereum==0.1.2a.6',
'pytest-ethereum==0.1.2a.7',
'rlp>=1.0.1,<2',
'web3[tester]>=4.7,<5',
],
Expand Down
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ def _get_factory(package, factory_name):
return _get_factory


@pytest.fixture
def owned_contract():
return (V2_PACKAGES_DIR / "owned" / "contracts" / "Owned.sol").read_text()


@pytest.fixture
def invalid_manifest(safe_math_manifest):
safe_math_manifest["manifest_version"] = 1
Expand Down
28 changes: 28 additions & 0 deletions tests/ethpm/backends/test_http_backends.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pytest

from ethpm import Package
from ethpm.backends.http import GithubOverHTTPSBackend
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.exceptions import ValidationError


@pytest.mark.parametrize(
"uri",
(
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#0x01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
),
)
def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract, w3):
# these tests may occassionally fail CI as a result of their network requests
backend = GithubOverHTTPSBackend()
assert backend.base_uri == RAW_GITHUB_AUTHORITY
# integration with Package.from_uri
owned_package = Package.from_uri(uri, w3)
assert owned_package.name == "owned"


def test_github_over_https_backend_raises_error_with_invalid_content_hash(w3):
invalid_uri = "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2ca111" # noqa: E501
with pytest.raises(ValidationError):
Package.from_uri(invalid_uri, w3)
17 changes: 5 additions & 12 deletions tests/ethpm/backends/test_ipfs_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from eth_utils import to_text
import pytest
import requests_mock

from ethpm import V2_PACKAGES_DIR
from ethpm.backends.ipfs import (
Expand Down Expand Up @@ -38,24 +37,18 @@ def add(self, file_or_dir_path, recursive):
@pytest.mark.parametrize(
"base_uri,backend", ((INFURA_GATEWAY_PREFIX, InfuraIPFSBackend()),)
)
def test_ipfs_and_infura_gateway_backends_fetch_uri_contents(
base_uri, backend, safe_math_manifest
):
def test_ipfs_and_infura_gateway_backends_fetch_uri_contents(base_uri, backend):
uri = "ipfs://Qme4otpS88NV8yQi8TfTP89EsQC5bko3F5N1yhRoi6cwGV"
assert backend.base_uri == base_uri
with requests_mock.Mocker() as m:
m.get(requests_mock.ANY, text=json.dumps(safe_math_manifest))
contents = backend.fetch_uri_contents(uri)
contents_dict = json.loads(to_text(contents))
assert contents_dict["package_name"] == "safe-math-lib"
contents = backend.fetch_uri_contents(uri)
assert contents.startswith(b"pragma solidity")


def test_local_ipfs_backend(monkeypatch, fake_client):
def test_local_ipfs_backend():
uri = "ipfs://Qme4otpS88NV8yQi8TfTP89EsQC5bko3F5N1yhRoi6cwGV"
backend = LocalIPFSBackend()
backend.client = fake_client
contents = backend.fetch_uri_contents(uri)
assert contents.startswith("Qm")
assert contents.startswith(b"pragma")


@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion tests/ethpm/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_get_dependency_package(dependencies):
assert result.name == "standard-token"


def test_validate_build_dependencies(dummy_ipfs_backend, piper_coin_manifest):
def test_validate_build_dependencies(dummy_ipfs_backend):
result = validate_build_dependency(
"standard-token", "ipfs://QmVu9zuza5mkJwwcFdh2SXBugm1oSgZVuEKkph9XLsbUwg"
)
Expand Down
4 changes: 3 additions & 1 deletion tests/ethpm/utils/test_ipfs_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import pytest

from ethpm.utils.ipfs import extract_ipfs_path_from_uri, generate_file_hash, is_ipfs_uri
Expand Down Expand Up @@ -111,5 +113,5 @@ def test_is_ipfs_uri(value, expected):
def test_generate_file_hash(tmpdir, file_name, file_contents, expected):
p = tmpdir.mkdir("sub").join(file_name)
p.write(file_contents)
ipfs_multihash = generate_file_hash(p)
ipfs_multihash = generate_file_hash(Path(p).read_bytes())
assert ipfs_multihash == expected
Loading

0 comments on commit 19cc54a

Please sign in to comment.