Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update content addressed github uri scheme #129

Merged
merged 2 commits into from
Dec 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions docs/uri_backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,35 @@ IPFS
Pin asset(s) found at the given path and returns the pinned asset data.


HTTP
----
HTTPS
-----

``Py-EthPM`` offers a backend to fetch files from Github, ``GithubOverHTTPSBackend``.

A valid Github URI *should* conform to the following scheme.
A valid content-addressed Github URI *must* conform to the following scheme, as described in `ERC1319 <https://github.com/ethereum/EIPs/issues/1319>`__, to be used with this backend.

.. code:: python

https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha


.. py:method:: create_content_addressed_github_uri(uri)

This util function will return a content-addressed URI, as defined by Github's `blob <https://developer.github.com/v3/git/blobs/>`__ scheme. To generate a content-addressed URI for any manifest stored on github, this function requires accepts a Github API uri that follows the following scheme.

::

https://api.github.com/repos/:owner/:repo/contents/:path/:to/manifest.json

.. doctest::

>>> from ethpm.utils.uri import create_content_addressed_github_uri

To generate a valid Github PM URI.
>>> owned_github_api_uri = "https://api.github.com/repos/ethpm/py-ethpm/contents/ethpm/assets/owned/1.0.1.json"
>>> content_addressed_uri = "https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03480"

- Go to the target manifest in your browser.
- Press ``y`` to generate the permalink in the address bar.
- Replace ``"github"`` with ``"raw.githubusercontent"``, and remove the ``"blob"`` namespace from the URI.
- Suffix the URI with ``#`` followed by the ``keccak`` hash of the bytes found at the Github URI.
>>> actual_blob_uri = create_content_addressed_github_uri(owned_github_api_uri)
>>> assert actual_blob_uri == content_addressed_uri


Registry URIs
Expand Down
8 changes: 5 additions & 3 deletions ethpm/backends/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod

from ethpm.typing import URI


class BaseURIBackend(ABC):
"""
Expand All @@ -10,23 +12,23 @@ class BaseURIBackend(ABC):
"""

@abstractmethod
def can_resolve_uri(self, uri: str) -> bool:
def can_resolve_uri(self, uri: URI) -> bool:
"""
Return a bool indicating whether this backend class can
resolve the given URI to it's contents.
"""
pass

@abstractmethod
def can_translate_uri(self, uri: str) -> bool:
def can_translate_uri(self, uri: URI) -> bool:
"""
Return a bool indicating whether this backend class can
translate the given URI to a corresponding content-addressed URI.
"""
pass

@abstractmethod
def fetch_uri_contents(self, uri: str) -> bytes:
def fetch_uri_contents(self, uri: URI) -> bytes:
"""
Fetch the contents stored at a URI.
"""
Expand Down
42 changes: 27 additions & 15 deletions ethpm/backends/http.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,49 @@
from urllib import parse
import base64
import json

import requests

from ethpm.backends.base import BaseURIBackend
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.utils.uri import is_valid_github_uri
from ethpm.validation import validate_uri_contents
from ethpm.constants import GITHUB_API_AUTHORITY
from ethpm.exceptions import CannotHandleURI
from ethpm.typing import URI
from ethpm.utils.uri import (
is_valid_content_addressed_github_uri,
validate_blob_uri_contents,
)


class GithubOverHTTPSBackend(BaseURIBackend):
"""
Base class for all URIs pointing to a content-addressed Github URI.
"""

def can_resolve_uri(self, uri: str) -> bool:
return is_valid_github_uri(uri)
def can_resolve_uri(self, uri: URI) -> bool:
return is_valid_content_addressed_github_uri(uri)

def can_translate_uri(self, uri: str) -> bool:
def can_translate_uri(self, uri: URI) -> bool:
"""
GithubOverHTTPSBackend uri's must resolve to a valid manifest,
and cannot translate to another content-addressed URI.
"""
return False

def fetch_uri_contents(self, uri: str) -> bytes:
parsed_uri = parse.urlparse(uri)
validation_hash = parsed_uri.fragment
http_uri = f"{parsed_uri.scheme}://{parsed_uri.netloc}{parsed_uri.path}"
response = requests.get(http_uri)
def fetch_uri_contents(self, uri: URI) -> bytes:
if not self.can_resolve_uri(uri):
raise CannotHandleURI(f"GithubOverHTTPSBackend cannot resolve {uri}.")

response = requests.get(uri)
response.raise_for_status()
validate_uri_contents(response.content, validation_hash)
return response.content
contents = json.loads(response.content)
if contents["encoding"] != "base64":
raise CannotHandleURI(
"Expected contents returned from Github to be base64 encoded, "
f"instead received {contents['encoding']}."
)
decoded_contents = base64.b64decode(contents["content"])
validate_blob_uri_contents(decoded_contents, uri)
return decoded_contents

@property
def base_uri(self) -> str:
return RAW_GITHUB_AUTHORITY
return GITHUB_API_AUTHORITY
4 changes: 1 addition & 3 deletions ethpm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,4 @@

INFURA_GATEWAY_PREFIX = "https://ipfs.infura.io"

INTERNET_SCHEMES = ["http", "https"]

RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com"
GITHUB_API_AUTHORITY = "api.github.com"
2 changes: 1 addition & 1 deletion ethpm/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def from_uri(cls, uri: str, w3: Web3) -> "Package":
A valid ``Web3`` instance is also required.
URI schemes supported:
- IPFS `ipfs://Qm...`
- HTTP `https://raw.githubusercontent.com/repo/path.json#hash`
- HTTP `https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha`
- Registry `ercXXX://registry.eth/greeter?version=1.0.0`

.. code:: python
Expand Down
92 changes: 75 additions & 17 deletions ethpm/utils/uri.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,92 @@
import hashlib
import json
from typing import Tuple
from urllib import parse

from eth_utils import is_text
from eth_utils import is_text, to_bytes, to_text
import requests

from ethpm.constants import INTERNET_SCHEMES, RAW_GITHUB_AUTHORITY
from ethpm.constants import GITHUB_API_AUTHORITY
from ethpm.exceptions import CannotHandleURI, ValidationError
from ethpm.typing import URI


def is_valid_github_uri(uri: str) -> bool:
def create_content_addressed_github_uri(uri: URI) -> URI:
"""
Return a bool indicating whether or not the URI is a valid Github URI.
Returns a content-addressed Github "git_url" that conforms to this scheme.
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha

Accepts Github-defined "url" that conforms to this scheme
https://api.github.com/repos/:owner/:repo/contents/:path/:to/manifest.json
"""
if not is_valid_api_github_uri(uri):
raise CannotHandleURI(f"{uri} does not conform to Github's API 'url' scheme.")
response = requests.get(uri)
response.raise_for_status()
contents = json.loads(response.content)
if contents["type"] != "file":
raise CannotHandleURI(
f"Expected url to point to a 'file' type, instead received {contents['type']}."
)
return contents["git_url"]


def is_valid_content_addressed_github_uri(uri: URI) -> bool:
"""
Returns a bool indicating whether the given uri conforms to this scheme.
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha
"""
return is_valid_github_uri(uri, ("/repos/", "/git/", "/blobs/"))


def is_valid_api_github_uri(uri: URI) -> bool:
"""
Returns a bool indicating whether the given uri conforms to this scheme.
https://api.github.com/repos/:owner/:repo/contents/:path/:to/:file
"""
return is_valid_github_uri(uri, ("/repos/", "/contents/"))


def is_valid_github_uri(uri: URI, expected_path_terms: Tuple[str, ...]) -> bool:
"""
Return a bool indicating whether or not the URI fulfills the following specs
Valid Github URIs *must*:
- Have 'http' or 'https' scheme
- Have 'raw.githubusercontent.com' authority
- Have any path (*should* include a commit hash in path)
- Have ending fragment containing the keccak hash of the uri contents
ex. 'https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash'
- Have 'https' scheme
- Have 'api.github.com' authority
- Have a path that contains all "expected_path_terms"
"""
if not is_text(uri):
return False
parse_result = parse.urlparse(uri)
path = parse_result.path
scheme = parse_result.scheme
authority = parse_result.netloc
content_hash = parse_result.fragment

if not path or not scheme or not content_hash:
parsed = parse.urlparse(uri)
path, scheme, authority = parsed.path, parsed.scheme, parsed.netloc
if not all((path, scheme, authority)):
return False

if any(term for term in expected_path_terms if term not in path):
return False

if scheme not in INTERNET_SCHEMES:
if scheme != "https":
return False

if authority != RAW_GITHUB_AUTHORITY:
if authority != GITHUB_API_AUTHORITY:
return False
return True


def validate_blob_uri_contents(contents: bytes, blob_uri: str) -> None:
"""
Raises an exception if the sha1 hash of the contents does not match the hash found in te
blob_uri. Formula for how git calculates the hash found here:
http://alblue.bandlem.com/2011/08/git-tip-of-week-objects.html
"""
blob_path = parse.urlparse(blob_uri).path
blob_hash = blob_path.split("/")[-1]
contents_str = to_text(contents)
content_length = len(contents_str)
hashable_contents = "blob " + str(content_length) + "\0" + contents_str
hash_object = hashlib.sha1(to_bytes(text=hashable_contents))
if hash_object.hexdigest() != blob_hash:
raise ValidationError(
f"Hash of contents fetched from {blob_uri} do not match its hash: {blob_hash}."
)
24 changes: 1 addition & 23 deletions ethpm/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@
from typing import Any, List
from urllib import parse

from eth_utils import (
decode_hex,
is_address,
is_canonical_address,
is_checksum_address,
is_text,
keccak,
to_hex,
)
from eth_utils import is_address, is_canonical_address, is_checksum_address, is_text
from web3 import Web3

from ethpm.constants import PACKAGE_NAME_REGEX, REGISTRY_URI_SCHEME
Expand Down Expand Up @@ -166,17 +158,3 @@ def validate_single_matching_uri(all_blockchain_uris: List[str], w3: Web3) -> st
f"Package has too many ({len(matching_uris)}) matching URIs: {matching_uris}."
)
return matching_uris[0]


def validate_uri_contents(contents: bytes, validation_hash: str) -> None:
"""
Validate that the keccak(contents) matches the validation_hash.
"""
hashed_contents = keccak(contents)
decoded_validation = decode_hex(validation_hash)
if hashed_contents != decoded_validation:
raise ValidationError(
"Invalid content-addressed URI. "
f"Validation hash:{to_hex(decoded_validation)} does not match the "
f"hash of URI contents: {to_hex(hashed_contents)}."
)
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
extras_require={
'test': [
'pytest>=3.2.1,<4',
'requests-mock>=1.5.0,<2',
'tox>=1.8.0,<2',
],
'lint': [
Expand Down Expand Up @@ -55,13 +54,13 @@
include_package_data=True,
install_requires=[
'bumpversion>=0.5.3,<1',
'eth-abi>=1.2.2,<2',
'eth-abi>=1.2.2,<1.3.0',
'eth-keys>=0.2.0b3,<1',
'eth-utils>=1.2.1,<2',
'eth-utils>=1.3.0,<2',
'ipfsapi>=0.4.3,<1',
'jsonschema>=2.6.0,<3',
'protobuf>=3.0.0,<4',
'pytest-ethereum>=0.1.3a.1,<1',
'pytest-ethereum>=0.1.3a.3,<1',
'py-solc>=3.2.0,<4',
'rlp>=1.0.1,<2',
'web3[tester]>=4.7,<5',
Expand Down
14 changes: 7 additions & 7 deletions tests/ethpm/backends/test_http_backends.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
import pytest
from requests.exceptions import HTTPError

from ethpm import Package
from ethpm.backends.http import GithubOverHTTPSBackend
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.exceptions import ValidationError
from ethpm.constants import GITHUB_API_AUTHORITY
from ethpm.exceptions import CannotHandleURI, ValidationError


@pytest.mark.parametrize(
"uri",
(
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#0x01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
"https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03480",
),
)
def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract, w3):
# these tests may occassionally fail CI as a result of their network requests
backend = GithubOverHTTPSBackend()
assert backend.base_uri == RAW_GITHUB_AUTHORITY
assert backend.base_uri == GITHUB_API_AUTHORITY
# integration with Package.from_uri
owned_package = Package.from_uri(uri, w3)
assert owned_package.name == "owned"


def test_github_over_https_backend_raises_error_with_invalid_content_hash(w3):
invalid_uri = "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2ca111" # noqa: E501
with pytest.raises(ValidationError):
invalid_uri = "https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03123"
with pytest.raises(HTTPError):
Package.from_uri(invalid_uri, w3)
Loading