Skip to content

Commit

Permalink
Add ipfs hash validation
Browse files Browse the repository at this point in the history
  • Loading branch information
njgheorghita committed Oct 16, 2018
1 parent a5664fa commit 5277de2
Show file tree
Hide file tree
Showing 16 changed files with 112 additions and 140 deletions.
46 changes: 39 additions & 7 deletions docs/uri_backends.rst
Original file line number Diff line number Diff line change
@@ -1,31 +1,63 @@
URI Schemes and Backends
========================

BaseURIBackend
--------------

``Py-EthPM`` uses the ``BaseURIBackend`` as the parent class for all of its URI backends. To write your own backend, it must implement the following methods.

.. py:method:: BaseURIBackend.can_resolve_uri(uri)
Returns a bool indicating whether or not this backend is capable of resolving the given URI to a manifest.
A content-addressed URI pointing to valid manifest is said to be capable of "resolving".

.. py:method:: BaseURIBackend.can_translate_uri(uri)
Returns a bool indicating whether this backend class can translate the given URI to a corresponding content-addressed URI.
A registry URI is said to be capable of "transalating" if it points to another content-addressed URI in its respective on-chain registry.

.. py:method:: BaseURIBackend.fetch_uri_contents(uri)
Fetches the contents stored at the provided uri, if an available backend is capable of resolving the URI. Validates that contents stored at uri match the content hash suffixing the uri.


IPFS
----

``Py-EthPM`` has multiple backends available to fetch/pin files to IPFS. The desired backend can be set via the environment variable: ``ETHPM_IPFS_BACKEND_CLASS``.

- ``InfuraIPFSBackend`` (default)
- `https://ipfs.infura.io`
- ``IPFSGatewayBackend``
- ``IPFSGatewayBackend`` (temporarily deprecated)
- `https://ipfs.io/ipfs/`
- ``LocalIPFSBacked``
- connects to a local IPFS API gateway running on port 5001.
- ``DummyIPFSBackend``
- Won't pin/fetch files to an actual IPFS node, but mocks out this behavior.

.. py:method:: BaseIPFSBackend.can_resolve_uri(uri)
.. py:method:: BaseIPFSBackend.pin_assets(file_or_directory_path)
Returns a bool indicating whether or not this backend is capable of handling the given URI.
Pins asset(s) found at the given path and returns the pinned asset data.

.. py:method:: BaseIPFSBackend.fetch_uri_contents(uri)

Fetches the contents stored at a URI.
HTTP
----

.. py:method:: BaseIPFSBackend.pin_assets(file_or_directory_path)
``Py-EthPM`` offers a backend to fetch files from Github, ``GithubOverHTTPSBackend``.

A valid Github URI *should* conform to the following scheme.

.. code:: python
https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash
To generate a valid Github PM URI.

- Go to the target manifest in your browser.
- Press ``y`` to generate the permalink in the address bar.
- Replace ``"github"`` with ``"raw.githubusercontent"``, and remove the ``"blob"`` namespace from the URI.
- Suffix the URI with ``#`` followed by the ``keccak`` hash of the bytes found at the Github URI.

Pins asset(s) found at the given path and returns the pinned asset data.

Registry URIs
-------------
Expand Down
1 change: 0 additions & 1 deletion ethpm/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def can_translate_uri(self, uri: str) -> bool:
"""
Return a bool indicating whether this backend class can
translate the given URI to a corresponding content-addressed URI.
i.e. a registry URI pointing to another content-addressed URI onchain
"""
pass

Expand Down
11 changes: 6 additions & 5 deletions ethpm/backends/http.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import requests

from ethpm.backends.base import BaseURIBackend
from ethpm.constants import GITHUB_AUTHORITY
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.utils.uri import is_valid_github_uri
from ethpm.validation import validate_github_uri_contents
from ethpm.validation import validate_uri_contents


class GithubOverHTTPSBackend(BaseURIBackend):
Expand All @@ -16,17 +16,18 @@ def can_resolve_uri(self, uri: str) -> bool:

def can_translate_uri(self, uri: str) -> bool:
"""
GithubOverHTTPSBackend uri's must resolve to a valid manifest.
GithubOverHTTPSBackend uri's must resolve to a valid manifest,
and cannot translate to another content-addressed URI.
"""
return False

def fetch_uri_contents(self, uri: str) -> bytes:
http_uri, validation_hash = uri.split("#")
response = requests.get(http_uri)
response.raise_for_status()
validate_github_uri_contents(response.content, validation_hash)
validate_uri_contents(response.content, validation_hash)
return response.content

@property
def base_uri(self) -> str:
return GITHUB_AUTHORITY
return RAW_GITHUB_AUTHORITY
18 changes: 15 additions & 3 deletions ethpm/backends/ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@
INFURA_GATEWAY_PREFIX,
IPFS_GATEWAY_PREFIX,
)
from ethpm.exceptions import CannotHandleURI
from ethpm.utils.ipfs import dummy_ipfs_pin, extract_ipfs_path_from_uri, is_ipfs_uri
from ethpm.exceptions import CannotHandleURI, ValidationError
from ethpm.utils.ipfs import (
dummy_ipfs_pin,
extract_ipfs_path_from_uri,
generate_file_hash,
is_ipfs_uri,
)


class BaseIPFSBackend(BaseURIBackend):
Expand Down Expand Up @@ -56,7 +61,14 @@ def __init__(self) -> None:

def fetch_uri_contents(self, uri: str) -> bytes:
ipfs_hash = extract_ipfs_path_from_uri(uri)
return self.client.cat(ipfs_hash)
contents = self.client.cat(ipfs_hash)
validation_hash = generate_file_hash(contents)
if validation_hash != ipfs_hash:
raise ValidationError(
"Hashed IPFS contents retrieved from uri: {0} "
"do not match its content hash.".format(uri)
)
return contents

@property
@abstractmethod
Expand Down
4 changes: 3 additions & 1 deletion ethpm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@

INFURA_GATEWAY_PREFIX = "https://ipfs.infura.io"

GITHUB_AUTHORITY = "https://raw.githubusercontent.com/"
INTERNET_SCHEMES = ["http", "https"]

RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com"
2 changes: 2 additions & 0 deletions ethpm/utils/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from eth_utils import to_tuple

from ethpm.backends.base import BaseURIBackend
from ethpm.backends.http import GithubOverHTTPSBackend
from ethpm.backends.ipfs import (
DummyIPFSBackend,
InfuraIPFSBackend,
Expand All @@ -16,6 +17,7 @@
InfuraIPFSBackend,
DummyIPFSBackend,
LocalIPFSBackend,
GithubOverHTTPSBackend,
RegistryURIBackend,
]

Expand Down
13 changes: 6 additions & 7 deletions ethpm/utils/ipfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def dummy_ipfs_pin(path: Path) -> Dict[str, str]:
Return IPFS data as if file was pinned to an actual node.
"""
ipfs_return = {
"Hash": generate_file_hash(path),
"Hash": generate_file_hash(path.read_bytes()),
"Name": path.name,
"Size": str(path.stat().st_size),
}
Expand Down Expand Up @@ -69,12 +69,11 @@ def multihash(value: bytes) -> bytes:
return multihash_bytes


def serialize_file(file_path: Path) -> PBNode:
file_data = open(file_path, "rb").read()
file_size = len(file_data)
def serialize_bytes(file_bytes: bytes) -> PBNode:
file_size = len(file_bytes)

data_protobuf = Data(
Type=Data.DataType.Value("File"), Data=file_data, filesize=file_size
Type=Data.DataType.Value("File"), Data=file_bytes, filesize=file_size
)
data_protobuf_bytes = data_protobuf.SerializeToString()

Expand All @@ -83,8 +82,8 @@ def serialize_file(file_path: Path) -> PBNode:
return file_protobuf


def generate_file_hash(file_path: Path) -> str:
file_protobuf = serialize_file(file_path)
def generate_file_hash(content_bytes: bytes) -> str:
file_protobuf = serialize_bytes(content_bytes)
file_protobuf_bytes = file_protobuf.SerializeToString()
file_multihash = multihash(file_protobuf_bytes)
return b58encode(file_multihash)
48 changes: 4 additions & 44 deletions ethpm/utils/uri.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,8 @@
import json
from typing import Any, Dict
from urllib import parse

from eth_utils import is_text, to_text
from eth_utils import is_text

from ethpm.backends.ipfs import get_ipfs_backend
from ethpm.exceptions import CannotHandleURI

IPFS_SCHEME = "ipfs"

INTERNET_SCHEMES = ["http", "https"]

SWARM_SCHEMES = ["bzz", "bzz-immutable", "bzz-raw"]

RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com"
from ethpm.constants import INTERNET_SCHEMES, RAW_GITHUB_AUTHORITY


def is_valid_github_uri(uri: str) -> bool:
Expand All @@ -23,8 +12,8 @@ def is_valid_github_uri(uri: str) -> bool:
- Have 'http' or 'https' scheme
- Have 'raw.githubusercontent.com' authority
- Have any path (*should* include a commit hash in path)
- Have ending fragment containing any content hash
i.e. 'https://raw.githubusercontent.com/any/path#content_hash
- Have ending fragment containing the keccak hash of the uri contents
ex. 'https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash'
"""
if not is_text(uri):
return False
Expand All @@ -43,32 +32,3 @@ def is_valid_github_uri(uri: str) -> bool:
if authority != RAW_GITHUB_AUTHORITY:
return False
return True


def get_manifest_from_content_addressed_uri(uri: str) -> Dict[str, Any]:
"""
Return manifest data stored at a content addressed URI.
"""
parse_result = parse.urlparse(uri)
scheme = parse_result.scheme

if scheme == IPFS_SCHEME:
ipfs_backend = get_ipfs_backend()
if ipfs_backend.can_resolve_uri(uri):
raw_manifest_data = ipfs_backend.fetch_uri_contents(uri)
manifest_data = to_text(raw_manifest_data)
return json.loads(manifest_data)
else:
raise TypeError(
"The IPFS Backend: {0} cannot handle the given URI: {1}.".format(
type(ipfs_backend).__name__, uri
)
)

if scheme in INTERNET_SCHEMES:
raise CannotHandleURI("Internet URIs are not yet supported.")

if scheme in SWARM_SCHEMES:
raise CannotHandleURI("Swarm URIs are not yet supported.")

raise CannotHandleURI("The URI scheme:{0} is not supported.".format(scheme))
6 changes: 3 additions & 3 deletions ethpm/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,15 @@ def validate_single_matching_uri(all_blockchain_uris: List[str], w3: Web3) -> st
return matching_uris[0]


def validate_github_uri_contents(contents: bytes, validation_hash: str) -> None:
def validate_uri_contents(contents: bytes, validation_hash: str) -> None:
"""
Validate that the contents match the validation_hash associated with a Github URI.
Validate that the keccak(contents) matches the validation_hash.
"""
hashed_contents = keccak(contents)
decoded_validation = decode_hex(validation_hash)
if hashed_contents != decoded_validation:
raise ValidationError(
"Invalid Github content-addressed URI. "
"Invalid content-addressed URI. "
"Validation hash:{0} does not match the hash of URI contents: {1}.".format(
decoded_validation, hashed_contents
)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
'jsonschema>=2.6.0,<3',
'protobuf>=3.0.0,<4',
'py-solc>=2.1.0,<3',
'pytest-ethereum==0.1.2a.6',
'pytest-ethereum==0.1.2a.7',
'rlp>=1.0.1,<2',
'web3[tester]>=4.7,<5',
],
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _get_factory(package, factory_name):

@pytest.fixture
def owned_contract():
with open(str(V2_PACKAGES_DIR / "owned" / "contracts" / "Owned.sol")) as file_obj:
with open(V2_PACKAGES_DIR / "owned" / "contracts" / "Owned.sol") as file_obj:
return file_obj.read()


Expand Down
25 changes: 16 additions & 9 deletions tests/ethpm/backends/test_http_backends.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,27 @@
import pytest
import requests_mock

from ethpm import Package
from ethpm.backends.http import GithubOverHTTPSBackend
from ethpm.constants import GITHUB_AUTHORITY
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.exceptions import ValidationError


@pytest.mark.parametrize(
"uri",
(
"https://raw.githubusercontent.com/ethpm/ethpm-spec/481739f6138907db88602558711e9d3c1301c269/examples/owned/contracts/Owned.sol#bfdea1fa5f33c30fee8443c5ffa1020027f8813e0007bb6f82aaa2843a7fdd60", # noqa: E501
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#0x01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
),
)
def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract):
def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract, w3):
backend = GithubOverHTTPSBackend()
assert backend.base_uri == GITHUB_AUTHORITY
with requests_mock.Mocker() as m:
m.get(requests_mock.ANY, text=owned_contract)
response = backend.fetch_uri_contents(uri)
assert response.startswith(b"pragma")
assert backend.base_uri == RAW_GITHUB_AUTHORITY
# integration with Package.from_uri
owned_package = Package.from_uri(uri, w3)
assert owned_package.name == "owned"


def test_github_over_https_backend_raises_error_with_invalid_content_hash(w3):
invalid_uri = "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2ca111" # noqa: E501
with pytest.raises(ValidationError):
Package.from_uri(invalid_uri, w3)
17 changes: 5 additions & 12 deletions tests/ethpm/backends/test_ipfs_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from eth_utils import to_text
import pytest
import requests_mock

from ethpm import V2_PACKAGES_DIR
from ethpm.backends.ipfs import (
Expand Down Expand Up @@ -38,24 +37,18 @@ def add(self, file_or_dir_path, recursive):
@pytest.mark.parametrize(
"base_uri,backend", ((INFURA_GATEWAY_PREFIX, InfuraIPFSBackend()),)
)
def test_ipfs_and_infura_gateway_backends_fetch_uri_contents(
base_uri, backend, safe_math_manifest
):
def test_ipfs_and_infura_gateway_backends_fetch_uri_contents(base_uri, backend):
uri = "ipfs://Qme4otpS88NV8yQi8TfTP89EsQC5bko3F5N1yhRoi6cwGV"
assert backend.base_uri == base_uri
with requests_mock.Mocker() as m:
m.get(requests_mock.ANY, text=json.dumps(safe_math_manifest))
contents = backend.fetch_uri_contents(uri)
contents_dict = json.loads(to_text(contents))
assert contents_dict["package_name"] == "safe-math-lib"
contents = backend.fetch_uri_contents(uri)
assert contents.startswith(b"pragma solidity")


def test_local_ipfs_backend(monkeypatch, fake_client):
def test_local_ipfs_backend(monkeypatch):
uri = "ipfs://Qme4otpS88NV8yQi8TfTP89EsQC5bko3F5N1yhRoi6cwGV"
backend = LocalIPFSBackend()
backend.client = fake_client
contents = backend.fetch_uri_contents(uri)
assert contents.startswith("Qm")
assert contents.startswith(b"pragma")


@pytest.mark.parametrize(
Expand Down
2 changes: 1 addition & 1 deletion tests/ethpm/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_get_dependency_package(dependencies):

def test_validate_build_dependencies(dummy_ipfs_backend, piper_coin_manifest):
result = validate_build_dependency(
"standard-token", "ipfs://QmVu9zuza5mkJwwcFdh2SXBugm1oSgZVuEKkph9XLsbUwg"
"standard-token", "ipfs://QmVu9zuza5mkJwwcFdh2SXBugm1oSgZVuEKkph9XLsbUwg#0x123"
)
assert result is None

Expand Down
Loading

0 comments on commit 5277de2

Please sign in to comment.