Skip to content

Commit

Permalink
Merge pull request #129 from njgheorghita/http-uris
Browse files Browse the repository at this point in the history
Update content addressed github uri scheme
  • Loading branch information
njgheorghita authored Dec 11, 2018
2 parents 3a9a606 + 4a90b18 commit ab67254
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 119 deletions.
30 changes: 21 additions & 9 deletions docs/uri_backends.rst
Original file line number Diff line number Diff line change
Expand Up @@ -40,23 +40,35 @@ IPFS
Pin asset(s) found at the given path and returns the pinned asset data.


HTTP
----
HTTPS
-----

``Py-EthPM`` offers a backend to fetch files from Github, ``GithubOverHTTPSBackend``.

A valid Github URI *should* conform to the following scheme.
A valid content-addressed Github URI *must* conform to the following scheme, as described in `ERC1319 <https://github.com/ethereum/EIPs/issues/1319>`__, to be used with this backend.

.. code:: python
https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha
.. py:method:: create_content_addressed_github_uri(uri)
This util function will return a content-addressed URI, as defined by Github's `blob <https://developer.github.com/v3/git/blobs/>`__ scheme. To generate a content-addressed URI for any manifest stored on github, this function requires accepts a Github API uri that follows the following scheme.

::
https://api.github.com/repos/:owner/:repo/contents/:path/:to/manifest.json

.. doctest::

>>> from ethpm.utils.uri import create_content_addressed_github_uri

To generate a valid Github PM URI.
>>> owned_github_api_uri = "https://api.github.com/repos/ethpm/py-ethpm/contents/ethpm/assets/owned/1.0.1.json"
>>> content_addressed_uri = "https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03480"

- Go to the target manifest in your browser.
- Press ``y`` to generate the permalink in the address bar.
- Replace ``"github"`` with ``"raw.githubusercontent"``, and remove the ``"blob"`` namespace from the URI.
- Suffix the URI with ``#`` followed by the ``keccak`` hash of the bytes found at the Github URI.
>>> actual_blob_uri = create_content_addressed_github_uri(owned_github_api_uri)
>>> assert actual_blob_uri == content_addressed_uri


Registry URIs
Expand Down
8 changes: 5 additions & 3 deletions ethpm/backends/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod

from ethpm.typing import URI


class BaseURIBackend(ABC):
"""
Expand All @@ -10,23 +12,23 @@ class BaseURIBackend(ABC):
"""

@abstractmethod
def can_resolve_uri(self, uri: str) -> bool:
def can_resolve_uri(self, uri: URI) -> bool:
"""
Return a bool indicating whether this backend class can
resolve the given URI to it's contents.
"""
pass

@abstractmethod
def can_translate_uri(self, uri: str) -> bool:
def can_translate_uri(self, uri: URI) -> bool:
"""
Return a bool indicating whether this backend class can
translate the given URI to a corresponding content-addressed URI.
"""
pass

@abstractmethod
def fetch_uri_contents(self, uri: str) -> bytes:
def fetch_uri_contents(self, uri: URI) -> bytes:
"""
Fetch the contents stored at a URI.
"""
Expand Down
42 changes: 27 additions & 15 deletions ethpm/backends/http.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,49 @@
from urllib import parse
import base64
import json

import requests

from ethpm.backends.base import BaseURIBackend
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.utils.uri import is_valid_github_uri
from ethpm.validation import validate_uri_contents
from ethpm.constants import GITHUB_API_AUTHORITY
from ethpm.exceptions import CannotHandleURI
from ethpm.typing import URI
from ethpm.utils.uri import (
is_valid_content_addressed_github_uri,
validate_blob_uri_contents,
)


class GithubOverHTTPSBackend(BaseURIBackend):
"""
Base class for all URIs pointing to a content-addressed Github URI.
"""

def can_resolve_uri(self, uri: str) -> bool:
return is_valid_github_uri(uri)
def can_resolve_uri(self, uri: URI) -> bool:
return is_valid_content_addressed_github_uri(uri)

def can_translate_uri(self, uri: str) -> bool:
def can_translate_uri(self, uri: URI) -> bool:
"""
GithubOverHTTPSBackend uri's must resolve to a valid manifest,
and cannot translate to another content-addressed URI.
"""
return False

def fetch_uri_contents(self, uri: str) -> bytes:
parsed_uri = parse.urlparse(uri)
validation_hash = parsed_uri.fragment
http_uri = f"{parsed_uri.scheme}://{parsed_uri.netloc}{parsed_uri.path}"
response = requests.get(http_uri)
def fetch_uri_contents(self, uri: URI) -> bytes:
if not self.can_resolve_uri(uri):
raise CannotHandleURI(f"GithubOverHTTPSBackend cannot resolve {uri}.")

response = requests.get(uri)
response.raise_for_status()
validate_uri_contents(response.content, validation_hash)
return response.content
contents = json.loads(response.content)
if contents["encoding"] != "base64":
raise CannotHandleURI(
"Expected contents returned from Github to be base64 encoded, "
f"instead received {contents['encoding']}."
)
decoded_contents = base64.b64decode(contents["content"])
validate_blob_uri_contents(decoded_contents, uri)
return decoded_contents

@property
def base_uri(self) -> str:
return RAW_GITHUB_AUTHORITY
return GITHUB_API_AUTHORITY
4 changes: 1 addition & 3 deletions ethpm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,4 @@

INFURA_GATEWAY_PREFIX = "https://ipfs.infura.io"

INTERNET_SCHEMES = ["http", "https"]

RAW_GITHUB_AUTHORITY = "raw.githubusercontent.com"
GITHUB_API_AUTHORITY = "api.github.com"
2 changes: 1 addition & 1 deletion ethpm/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def from_uri(cls, uri: str, w3: Web3) -> "Package":
A valid ``Web3`` instance is also required.
URI schemes supported:
- IPFS `ipfs://Qm...`
- HTTP `https://raw.githubusercontent.com/repo/path.json#hash`
- HTTP `https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha`
- Registry `ercXXX://registry.eth/greeter?version=1.0.0`
.. code:: python
Expand Down
92 changes: 75 additions & 17 deletions ethpm/utils/uri.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,92 @@
import hashlib
import json
from typing import Tuple
from urllib import parse

from eth_utils import is_text
from eth_utils import is_text, to_bytes, to_text
import requests

from ethpm.constants import INTERNET_SCHEMES, RAW_GITHUB_AUTHORITY
from ethpm.constants import GITHUB_API_AUTHORITY
from ethpm.exceptions import CannotHandleURI, ValidationError
from ethpm.typing import URI


def is_valid_github_uri(uri: str) -> bool:
def create_content_addressed_github_uri(uri: URI) -> URI:
"""
Return a bool indicating whether or not the URI is a valid Github URI.
Returns a content-addressed Github "git_url" that conforms to this scheme.
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha
Accepts Github-defined "url" that conforms to this scheme
https://api.github.com/repos/:owner/:repo/contents/:path/:to/manifest.json
"""
if not is_valid_api_github_uri(uri):
raise CannotHandleURI(f"{uri} does not conform to Github's API 'url' scheme.")
response = requests.get(uri)
response.raise_for_status()
contents = json.loads(response.content)
if contents["type"] != "file":
raise CannotHandleURI(
f"Expected url to point to a 'file' type, instead received {contents['type']}."
)
return contents["git_url"]


def is_valid_content_addressed_github_uri(uri: URI) -> bool:
"""
Returns a bool indicating whether the given uri conforms to this scheme.
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha
"""
return is_valid_github_uri(uri, ("/repos/", "/git/", "/blobs/"))


def is_valid_api_github_uri(uri: URI) -> bool:
"""
Returns a bool indicating whether the given uri conforms to this scheme.
https://api.github.com/repos/:owner/:repo/contents/:path/:to/:file
"""
return is_valid_github_uri(uri, ("/repos/", "/contents/"))


def is_valid_github_uri(uri: URI, expected_path_terms: Tuple[str, ...]) -> bool:
"""
Return a bool indicating whether or not the URI fulfills the following specs
Valid Github URIs *must*:
- Have 'http' or 'https' scheme
- Have 'raw.githubusercontent.com' authority
- Have any path (*should* include a commit hash in path)
- Have ending fragment containing the keccak hash of the uri contents
ex. 'https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash'
- Have 'https' scheme
- Have 'api.github.com' authority
- Have a path that contains all "expected_path_terms"
"""
if not is_text(uri):
return False
parse_result = parse.urlparse(uri)
path = parse_result.path
scheme = parse_result.scheme
authority = parse_result.netloc
content_hash = parse_result.fragment

if not path or not scheme or not content_hash:
parsed = parse.urlparse(uri)
path, scheme, authority = parsed.path, parsed.scheme, parsed.netloc
if not all((path, scheme, authority)):
return False

if any(term for term in expected_path_terms if term not in path):
return False

if scheme not in INTERNET_SCHEMES:
if scheme != "https":
return False

if authority != RAW_GITHUB_AUTHORITY:
if authority != GITHUB_API_AUTHORITY:
return False
return True


def validate_blob_uri_contents(contents: bytes, blob_uri: str) -> None:
"""
Raises an exception if the sha1 hash of the contents does not match the hash found in te
blob_uri. Formula for how git calculates the hash found here:
http://alblue.bandlem.com/2011/08/git-tip-of-week-objects.html
"""
blob_path = parse.urlparse(blob_uri).path
blob_hash = blob_path.split("/")[-1]
contents_str = to_text(contents)
content_length = len(contents_str)
hashable_contents = "blob " + str(content_length) + "\0" + contents_str
hash_object = hashlib.sha1(to_bytes(text=hashable_contents))
if hash_object.hexdigest() != blob_hash:
raise ValidationError(
f"Hash of contents fetched from {blob_uri} do not match its hash: {blob_hash}."
)
24 changes: 1 addition & 23 deletions ethpm/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,7 @@
from typing import Any, List
from urllib import parse

from eth_utils import (
decode_hex,
is_address,
is_canonical_address,
is_checksum_address,
is_text,
keccak,
to_hex,
)
from eth_utils import is_address, is_canonical_address, is_checksum_address, is_text
from web3 import Web3

from ethpm.constants import PACKAGE_NAME_REGEX, REGISTRY_URI_SCHEME
Expand Down Expand Up @@ -166,17 +158,3 @@ def validate_single_matching_uri(all_blockchain_uris: List[str], w3: Web3) -> st
f"Package has too many ({len(matching_uris)}) matching URIs: {matching_uris}."
)
return matching_uris[0]


def validate_uri_contents(contents: bytes, validation_hash: str) -> None:
"""
Validate that the keccak(contents) matches the validation_hash.
"""
hashed_contents = keccak(contents)
decoded_validation = decode_hex(validation_hash)
if hashed_contents != decoded_validation:
raise ValidationError(
"Invalid content-addressed URI. "
f"Validation hash:{to_hex(decoded_validation)} does not match the "
f"hash of URI contents: {to_hex(hashed_contents)}."
)
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
extras_require={
'test': [
'pytest>=3.2.1,<4',
'requests-mock>=1.5.0,<2',
'tox>=1.8.0,<2',
],
'lint': [
Expand Down Expand Up @@ -55,13 +54,13 @@
include_package_data=True,
install_requires=[
'bumpversion>=0.5.3,<1',
'eth-abi>=1.2.2,<2',
'eth-abi>=1.2.2,<1.3.0',
'eth-keys>=0.2.0b3,<1',
'eth-utils>=1.2.1,<2',
'eth-utils>=1.3.0,<2',
'ipfsapi>=0.4.3,<1',
'jsonschema>=2.6.0,<3',
'protobuf>=3.0.0,<4',
'pytest-ethereum>=0.1.3a.1,<1',
'pytest-ethereum>=0.1.3a.3,<1',
'py-solc>=3.2.0,<4',
'rlp>=1.0.1,<2',
'web3[tester]>=4.7,<5',
Expand Down
14 changes: 7 additions & 7 deletions tests/ethpm/backends/test_http_backends.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
import pytest
from requests.exceptions import HTTPError

from ethpm import Package
from ethpm.backends.http import GithubOverHTTPSBackend
from ethpm.constants import RAW_GITHUB_AUTHORITY
from ethpm.exceptions import ValidationError
from ethpm.constants import GITHUB_API_AUTHORITY
from ethpm.exceptions import CannotHandleURI, ValidationError


@pytest.mark.parametrize(
"uri",
(
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#0x01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501
"https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03480",
),
)
def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract, w3):
# these tests may occassionally fail CI as a result of their network requests
backend = GithubOverHTTPSBackend()
assert backend.base_uri == RAW_GITHUB_AUTHORITY
assert backend.base_uri == GITHUB_API_AUTHORITY
# integration with Package.from_uri
owned_package = Package.from_uri(uri, w3)
assert owned_package.name == "owned"


def test_github_over_https_backend_raises_error_with_invalid_content_hash(w3):
invalid_uri = "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2ca111" # noqa: E501
with pytest.raises(ValidationError):
invalid_uri = "https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03123"
with pytest.raises(HTTPError):
Package.from_uri(invalid_uri, w3)
Loading

0 comments on commit ab67254

Please sign in to comment.