-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #129 from njgheorghita/http-uris
Update content addressed github uri scheme
- Loading branch information
Showing
10 changed files
with
164 additions
and
119 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,49 @@ | ||
from urllib import parse | ||
import base64 | ||
import json | ||
|
||
import requests | ||
|
||
from ethpm.backends.base import BaseURIBackend | ||
from ethpm.constants import RAW_GITHUB_AUTHORITY | ||
from ethpm.utils.uri import is_valid_github_uri | ||
from ethpm.validation import validate_uri_contents | ||
from ethpm.constants import GITHUB_API_AUTHORITY | ||
from ethpm.exceptions import CannotHandleURI | ||
from ethpm.typing import URI | ||
from ethpm.utils.uri import ( | ||
is_valid_content_addressed_github_uri, | ||
validate_blob_uri_contents, | ||
) | ||
|
||
|
||
class GithubOverHTTPSBackend(BaseURIBackend): | ||
""" | ||
Base class for all URIs pointing to a content-addressed Github URI. | ||
""" | ||
|
||
def can_resolve_uri(self, uri: str) -> bool: | ||
return is_valid_github_uri(uri) | ||
def can_resolve_uri(self, uri: URI) -> bool: | ||
return is_valid_content_addressed_github_uri(uri) | ||
|
||
def can_translate_uri(self, uri: str) -> bool: | ||
def can_translate_uri(self, uri: URI) -> bool: | ||
""" | ||
GithubOverHTTPSBackend uri's must resolve to a valid manifest, | ||
and cannot translate to another content-addressed URI. | ||
""" | ||
return False | ||
|
||
def fetch_uri_contents(self, uri: str) -> bytes: | ||
parsed_uri = parse.urlparse(uri) | ||
validation_hash = parsed_uri.fragment | ||
http_uri = f"{parsed_uri.scheme}://{parsed_uri.netloc}{parsed_uri.path}" | ||
response = requests.get(http_uri) | ||
def fetch_uri_contents(self, uri: URI) -> bytes: | ||
if not self.can_resolve_uri(uri): | ||
raise CannotHandleURI(f"GithubOverHTTPSBackend cannot resolve {uri}.") | ||
|
||
response = requests.get(uri) | ||
response.raise_for_status() | ||
validate_uri_contents(response.content, validation_hash) | ||
return response.content | ||
contents = json.loads(response.content) | ||
if contents["encoding"] != "base64": | ||
raise CannotHandleURI( | ||
"Expected contents returned from Github to be base64 encoded, " | ||
f"instead received {contents['encoding']}." | ||
) | ||
decoded_contents = base64.b64decode(contents["content"]) | ||
validate_blob_uri_contents(decoded_contents, uri) | ||
return decoded_contents | ||
|
||
@property | ||
def base_uri(self) -> str: | ||
return RAW_GITHUB_AUTHORITY | ||
return GITHUB_API_AUTHORITY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,92 @@ | ||
import hashlib | ||
import json | ||
from typing import Tuple | ||
from urllib import parse | ||
|
||
from eth_utils import is_text | ||
from eth_utils import is_text, to_bytes, to_text | ||
import requests | ||
|
||
from ethpm.constants import INTERNET_SCHEMES, RAW_GITHUB_AUTHORITY | ||
from ethpm.constants import GITHUB_API_AUTHORITY | ||
from ethpm.exceptions import CannotHandleURI, ValidationError | ||
from ethpm.typing import URI | ||
|
||
|
||
def is_valid_github_uri(uri: str) -> bool: | ||
def create_content_addressed_github_uri(uri: URI) -> URI: | ||
""" | ||
Return a bool indicating whether or not the URI is a valid Github URI. | ||
Returns a content-addressed Github "git_url" that conforms to this scheme. | ||
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha | ||
Accepts Github-defined "url" that conforms to this scheme | ||
https://api.github.com/repos/:owner/:repo/contents/:path/:to/manifest.json | ||
""" | ||
if not is_valid_api_github_uri(uri): | ||
raise CannotHandleURI(f"{uri} does not conform to Github's API 'url' scheme.") | ||
response = requests.get(uri) | ||
response.raise_for_status() | ||
contents = json.loads(response.content) | ||
if contents["type"] != "file": | ||
raise CannotHandleURI( | ||
f"Expected url to point to a 'file' type, instead received {contents['type']}." | ||
) | ||
return contents["git_url"] | ||
|
||
|
||
def is_valid_content_addressed_github_uri(uri: URI) -> bool: | ||
""" | ||
Returns a bool indicating whether the given uri conforms to this scheme. | ||
https://api.github.com/repos/:owner/:repo/git/blobs/:file_sha | ||
""" | ||
return is_valid_github_uri(uri, ("/repos/", "/git/", "/blobs/")) | ||
|
||
|
||
def is_valid_api_github_uri(uri: URI) -> bool: | ||
""" | ||
Returns a bool indicating whether the given uri conforms to this scheme. | ||
https://api.github.com/repos/:owner/:repo/contents/:path/:to/:file | ||
""" | ||
return is_valid_github_uri(uri, ("/repos/", "/contents/")) | ||
|
||
|
||
def is_valid_github_uri(uri: URI, expected_path_terms: Tuple[str, ...]) -> bool: | ||
""" | ||
Return a bool indicating whether or not the URI fulfills the following specs | ||
Valid Github URIs *must*: | ||
- Have 'http' or 'https' scheme | ||
- Have 'raw.githubusercontent.com' authority | ||
- Have any path (*should* include a commit hash in path) | ||
- Have ending fragment containing the keccak hash of the uri contents | ||
ex. 'https://raw.githubusercontent.com/user/repo/commit_hash/path/to/manifest.json#content_hash' | ||
- Have 'https' scheme | ||
- Have 'api.github.com' authority | ||
- Have a path that contains all "expected_path_terms" | ||
""" | ||
if not is_text(uri): | ||
return False | ||
parse_result = parse.urlparse(uri) | ||
path = parse_result.path | ||
scheme = parse_result.scheme | ||
authority = parse_result.netloc | ||
content_hash = parse_result.fragment | ||
|
||
if not path or not scheme or not content_hash: | ||
parsed = parse.urlparse(uri) | ||
path, scheme, authority = parsed.path, parsed.scheme, parsed.netloc | ||
if not all((path, scheme, authority)): | ||
return False | ||
|
||
if any(term for term in expected_path_terms if term not in path): | ||
return False | ||
|
||
if scheme not in INTERNET_SCHEMES: | ||
if scheme != "https": | ||
return False | ||
|
||
if authority != RAW_GITHUB_AUTHORITY: | ||
if authority != GITHUB_API_AUTHORITY: | ||
return False | ||
return True | ||
|
||
|
||
def validate_blob_uri_contents(contents: bytes, blob_uri: str) -> None: | ||
""" | ||
Raises an exception if the sha1 hash of the contents does not match the hash found in te | ||
blob_uri. Formula for how git calculates the hash found here: | ||
http://alblue.bandlem.com/2011/08/git-tip-of-week-objects.html | ||
""" | ||
blob_path = parse.urlparse(blob_uri).path | ||
blob_hash = blob_path.split("/")[-1] | ||
contents_str = to_text(contents) | ||
content_length = len(contents_str) | ||
hashable_contents = "blob " + str(content_length) + "\0" + contents_str | ||
hash_object = hashlib.sha1(to_bytes(text=hashable_contents)) | ||
if hash_object.hexdigest() != blob_hash: | ||
raise ValidationError( | ||
f"Hash of contents fetched from {blob_uri} do not match its hash: {blob_hash}." | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,28 @@ | ||
import pytest | ||
from requests.exceptions import HTTPError | ||
|
||
from ethpm import Package | ||
from ethpm.backends.http import GithubOverHTTPSBackend | ||
from ethpm.constants import RAW_GITHUB_AUTHORITY | ||
from ethpm.exceptions import ValidationError | ||
from ethpm.constants import GITHUB_API_AUTHORITY | ||
from ethpm.exceptions import CannotHandleURI, ValidationError | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"uri", | ||
( | ||
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501 | ||
"https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#0x01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2cace1", # noqa: E501 | ||
"https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03480", | ||
), | ||
) | ||
def test_github_over_https_backend_fetch_uri_contents(uri, owned_contract, w3): | ||
# these tests may occassionally fail CI as a result of their network requests | ||
backend = GithubOverHTTPSBackend() | ||
assert backend.base_uri == RAW_GITHUB_AUTHORITY | ||
assert backend.base_uri == GITHUB_API_AUTHORITY | ||
# integration with Package.from_uri | ||
owned_package = Package.from_uri(uri, w3) | ||
assert owned_package.name == "owned" | ||
|
||
|
||
def test_github_over_https_backend_raises_error_with_invalid_content_hash(w3): | ||
invalid_uri = "https://raw.githubusercontent.com/ethpm/ethpm-spec/3945c47dedb04930ee12c0281494a1b5bdd692a0/examples/owned/1.0.0.json#01cbc2a69a9f86e9d9e7b87475e2ba2619404dc8d6ee3cb3a8acf3176c2ca111" # noqa: E501 | ||
with pytest.raises(ValidationError): | ||
invalid_uri = "https://api.github.com/repos/ethpm/py-ethpm/git/blobs/a7232a93f1e9e75d606f6c1da18aa16037e03123" | ||
with pytest.raises(HTTPError): | ||
Package.from_uri(invalid_uri, w3) |
Oops, something went wrong.