Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support asdf:// URI scheme #854

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions asdf/generic_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@
from os import SEEK_SET, SEEK_CUR, SEEK_END

import http.client
from urllib import parse as urlparse
from urllib.request import url2pathname

import numpy as np

from . import util
from .extern import atomicfile
from .util import patched_urllib_parse


__all__ = ['get_file', 'get_uri', 'resolve_uri', 'relative_uri']
Expand Down Expand Up @@ -144,8 +144,8 @@ def resolve_uri(base, uri):
"""
if base is None:
base = ''
resolved = urlparse.urljoin(base, uri)
parsed = urlparse.urlparse(resolved)
resolved = patched_urllib_parse.urljoin(base, uri)
parsed = patched_urllib_parse.urlparse(resolved)
if parsed.path != '' and not parsed.path.startswith('/'):
raise ValueError(
"Resolved to relative URL")
Expand All @@ -156,8 +156,8 @@ def relative_uri(source, target):
"""
Make a relative URI from source to target.
"""
su = urlparse.urlparse(source)
tu = urlparse.urlparse(target)
su = patched_urllib_parse.urlparse(source)
tu = patched_urllib_parse.urlparse(target)
extra = list(tu[3:])
relative = None
if tu[0] == '' and tu[1] == '':
Expand All @@ -175,7 +175,7 @@ def relative_uri(source, target):
relative = os.path.relpath(tu[2], os.path.dirname(su[2]))
if relative == '.':
relative = ''
relative = urlparse.urlunparse(["", "", relative] + extra)
relative = patched_urllib_parse.urlunparse(["", "", relative] + extra)
return relative


Expand Down Expand Up @@ -1044,7 +1044,7 @@ def _make_http_connection(init, mode, uri=None):
Creates a HTTPConnection instance if the HTTP server supports
Range requests, otherwise falls back to a generic InputStream.
"""
parsed = urlparse.urlparse(init)
parsed = patched_urllib_parse.urlparse(init)
connection = http.client.HTTPConnection(parsed.netloc)
connection.connect()

Expand Down Expand Up @@ -1166,7 +1166,7 @@ def get_file(init, mode='r', uri=None, close=False):
return GenericWrapper(init)

elif isinstance(init, (str, pathlib.Path)):
parsed = urlparse.urlparse(str(init))
parsed = patched_urllib_parse.urlparse(str(init))
if parsed.scheme in ['http', 'https']:
if 'w' in mode:
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion asdf/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def resolve_refs(node, json_id):
else:
suburl_path = suburl
suburl_path = resolver(suburl_path)
if suburl_path == url:
if suburl_path == url or suburl_path == schema.get("id"):
subschema = schema
else:
subschema = load_schema(suburl_path, resolver, True)
Expand Down
16 changes: 16 additions & 0 deletions asdf/tests/test_generic_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,22 @@ def test_relative_uri():
'http://www.google.com', 'file://local') == 'file://local'


@pytest.mark.parametrize("protocol", ["http", "asdf"])
def test_resolve_uri(protocol):
"""
Confirm that the patched urllib.parse is handling
asdf:// URIs correctly.
"""
assert generic_io.resolve_uri(
'{}://somewhere.org/some-schema'.format(protocol), '#/definitions/foo'
) == '{}://somewhere.org/some-schema#/definitions/foo'.format(protocol)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use f-strings here and bump the minimum version of python to 3.6 for our 2.8 release?

I don't see a good reason to keep supporting python 3.5 when it is no longer supported by astropy and numpy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're asking the good questions. I'll start an email discussion.


assert generic_io.resolve_uri(
'{}://somewhere.org/path/to/some-schema'.format(protocol),
'../../some/other/path/to/some-other-schema'
) == '{}://somewhere.org/some/other/path/to/some-other-schema'.format(protocol)


def test_arbitrary_file_object():
class Wrapper:
def __init__(self, init):
Expand Down
25 changes: 25 additions & 0 deletions asdf/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,31 @@ def test_load_schema_with_file_url(tmpdir):
schema.check_schema(schema_tree)


def test_load_schema_with_asdf_uri_scheme():
content = """%YAML 1.1
---
$schema: http://stsci.edu/schemas/asdf/asdf-schema-1.0.0
id: asdf://somewhere.org/schemas/foo

definitions:
bar:
type: string

type: object
properties:
id:
type: string
bar:
$ref: #/definitions/bar
...
"""
with asdf.config_context() as config:
config.add_resource_mapping({"asdf://somewhere.org/schemas/foo": content})

schema_tree = schema.load_schema("asdf://somewhere.org/schemas/foo", resolve_references=True)
schema.check_schema(schema_tree)


def test_schema_caching():
# Make sure that if we request the same URL, we get a different object
# (despite the caching internal to load_schema). Changes to a schema
Expand Down
10 changes: 10 additions & 0 deletions asdf/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,13 @@ def test_get_class_name():

def test_get_class_name_override():
assert util.get_class_name(BuiltinExtension, instance=False) == "asdf.extension.BuiltinExtension"


def test_patched_urllib_parse():
assert "asdf" in util.patched_urllib_parse.uses_relative
assert "asdf" in util.patched_urllib_parse.uses_netloc

import urllib.parse
assert urllib.parse is not util.patched_urllib_parse
assert "asdf" not in urllib.parse.uses_relative
assert "asdf" not in urllib.parse.uses_netloc
6 changes: 4 additions & 2 deletions asdf/treeutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,10 @@ def _recurse(node, json_id=None):
with _context.pending(node):
# Take note of the "id" field, in case we're modifying
# a schema and need to know the namespace for resolving
# URIs.
if isinstance(node, dict) and "id" in node:
# URIs. Ignore an id that is not a string, since it may
# be an object defining an id property and not an id
# itself (this is common in metaschemas).
if isinstance(node, dict) and "id" in node and isinstance(node["id"], str):
json_id = node["id"]

if postorder:
Expand Down
24 changes: 19 additions & 5 deletions asdf/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,27 @@
import math
import struct
import types
import importlib.util

from urllib.parse import urljoin
from urllib.request import pathname2url
from urllib import parse as urlparse

import numpy as np

# We're importing our own copy of urllib.parse because
# we need to patch it to support asdf:// URIs, but it'd
# be irresponsible to do this for all users of a
# standard library.
urllib_parse_spec = importlib.util.find_spec('urllib.parse')
patched_urllib_parse = importlib.util.module_from_spec(urllib_parse_spec)
urllib_parse_spec.loader.exec_module(patched_urllib_parse)
del urllib_parse_spec
Comment on lines +11 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What the advantage of doing it this way rather than

import urllib.parse as patched_urllib_parse

It's still local to this module's namespace, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately while that aliases the module reference to a new name, it still points to the same module object. Import caches the module after loading it the first time so that other imports don't have to read and parse the module's code again.

# module1.py
import urllib.parse as module1_alias

# module2.py
import urllib.parse as module2_alias

# Fire up a console:
In [1]: import module1

In [2]: import module2

In [3]: module1.module1_alias is module2.module2_alias
Out[3]: True

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see. I didn't realize there was caching going on. This bypasses that. #themoreyouknow


# urllib.parse needs to know that it should treat asdf://
# URIs like http:// URIs for the purposes of joining
# a relative path to a base URI.
patched_urllib_parse.uses_relative.append('asdf')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Digging deep into the standard library. What a naughty boy. ;-) I had to go look at the library source code to see this explained.

patched_urllib_parse.uses_netloc.append('asdf')


__all__ = ['human_list', 'get_array_base', 'get_base_uri', 'filepath_to_url',
'iter_subclasses', 'calculate_padding', 'resolve_name', 'NotSet',
Expand Down Expand Up @@ -58,15 +72,15 @@ def get_base_uri(uri):
"""
For a given URI, return the part without any fragment.
"""
parts = urlparse.urlparse(uri)
return urlparse.urlunparse(list(parts[:5]) + [''])
parts = patched_urllib_parse.urlparse(uri)
return patched_urllib_parse.urlunparse(list(parts[:5]) + [''])


def filepath_to_url(path):
"""
For a given local file path, return a file:// url.
"""
return urljoin('file:', pathname2url(path))
return patched_urllib_parse.urljoin('file:', pathname2url(path))


def iter_subclasses(cls):
Expand Down