Skip to content

Commit

Permalink
Fold link validation into resolve_all() and resolve scoped identifiers.
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Amstutz committed Jun 2, 2016
1 parent da10eec commit 9fcd67d
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 51 deletions.
32 changes: 16 additions & 16 deletions schema_salad/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,14 @@ def main(argsl=None): # type: (List[str]) -> int
return 0

# Validate links in the schema document
try:
metaschema_loader.validate_links(schema_doc)
except (validate.ValidationException) as e:
_logger.error("Schema `%s` failed link checking:\n%s",
args.schema, e, exc_info=(e if args.debug else False))
_logger.debug("Index is %s", metaschema_loader.idx.keys())
_logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
return 1
# try:
# metaschema_loader.validate_links(schema_doc)
# except (validate.ValidationException) as e:
# _logger.error("Schema `%s` failed link checking:\n%s",
# args.schema, e, exc_info=(e if args.debug else False))
# _logger.debug("Index is %s", metaschema_loader.idx.keys())
# _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys())
# return 1

# Validate the schema document against the metaschema
try:
Expand Down Expand Up @@ -197,14 +197,14 @@ def main(argsl=None): # type: (List[str]) -> int
return 0

# Validate links in the target document
try:
document_loader.validate_links(document)
except (validate.ValidationException) as e:
_logger.error("Document `%s` failed link checking:\n%s",
args.document, e, exc_info=(e if args.debug else False))
_logger.debug("Index is %s", json.dumps(
document_loader.idx.keys(), indent=4))
return 1
# try:
# document_loader.validate_links(document)
# except (validate.ValidationException) as e:
# _logger.error("Document `%s` failed link checking:\n%s",
# args.document, e, exc_info=(e if args.debug else False))
# _logger.debug("Index is %s", json.dumps(
# document_loader.idx.keys(), indent=4))
# return 1

# Validate the schema document against the metaschema
try:
Expand Down
76 changes: 50 additions & 26 deletions schema_salad/ref_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None,
self.cache = {}

self.url_fields = None # type: Set[str]
self.scoped_ref_fields = None # type: Set[str]
self.vocab_fields = None # type: Set[str]
self.identifiers = None # type: Set[str]
self.identity_links = None # type: Set[str]
Expand Down Expand Up @@ -186,6 +187,7 @@ def add_context(self, newcontext, baseuri=""):
"Refreshing context that already has stuff in it")

self.url_fields = set()
self.scoped_ref_fields = set()
self.vocab_fields = set()
self.identifiers = set()
self.identity_links = set()
Expand All @@ -206,6 +208,8 @@ def add_context(self, newcontext, baseuri=""):
self.identity_links.add(key)
elif isinstance(value, dict) and value.get("@type") == "@id":
self.url_fields.add(key)
if value.get("scopedRef", False):
self.scoped_ref_fields.add(key)
if value.get("identity", False):
self.identity_links.add(key)
elif isinstance(value, dict) and value.get("@type") == "@vocab":
Expand Down Expand Up @@ -235,7 +239,7 @@ def add_context(self, newcontext, baseuri=""):
_logger.debug("vocab_fields is %s", self.vocab_fields)
_logger.debug("vocab is %s", self.vocab)

def resolve_ref(self, ref, base_url=None):
def resolve_ref(self, ref, base_url=None, toplevel=True):
# type: (Union[Dict[str, Any], str, unicode], Union[str, unicode]) -> Tuple[Union[Dict[str, Any], str, unicode], Dict[str, Any]]
base_url = base_url or 'file://%s/' % os.path.abspath('.')

Expand Down Expand Up @@ -297,7 +301,7 @@ def resolve_ref(self, ref, base_url=None):
doc = self.fetch(doc_url)

# Recursively expand urls and resolve directives
obj, metadata = self.resolve_all(doc if doc else obj, doc_url)
obj, metadata = self.resolve_all(doc if doc else obj, doc_url, toplevel=toplevel)

# Requested reference should be in the index now, otherwise it's a bad
# reference
Expand All @@ -318,7 +322,7 @@ def resolve_ref(self, ref, base_url=None):
except TypeError:
return obj, metadata

def resolve_all(self, document, base_url, file_base=None):
def resolve_all(self, document, base_url, file_base=None, toplevel=True):
# type: (Any, Union[str, unicode], Union[str, unicode]) -> Tuple[Any, Dict[str, Any]]
loader = self
metadata = {} # type: Dict[str, Any]
Expand All @@ -328,7 +332,7 @@ def resolve_all(self, document, base_url, file_base=None):
if isinstance(document, dict):
# Handle $import and $include
if ('$import' in document or '$include' in document):
return self.resolve_ref(document, file_base)
return self.resolve_ref(document, base_url=file_base, toplevel=toplevel)
elif isinstance(document, list):
pass
else:
Expand Down Expand Up @@ -364,7 +368,7 @@ def resolve_all(self, document, base_url, file_base=None):
if "$graph" in document:
metadata = _copy_dict_without_key(document, "$graph")
document = document["$graph"]
metadata, _ = loader.resolve_all(metadata, base_url, file_base)
metadata, _ = loader.resolve_all(metadata, base_url, file_base=file_base, toplevel=False)

if isinstance(document, dict):
for idmapField in loader.idmap:
Expand Down Expand Up @@ -412,6 +416,8 @@ def resolve_all(self, document, base_url, file_base=None):
del document[d]

for d in loader.url_fields:
if d in self.scoped_ref_fields:
continue
if d in document:
if isinstance(document[d], basestring):
document[d] = loader.expand_url(
Expand All @@ -427,7 +433,7 @@ def resolve_all(self, document, base_url, file_base=None):
try:
for key, val in document.items():
document[key], _ = loader.resolve_all(
val, base_url, file_base)
val, base_url, file_base=file_base, toplevel=False)
except validate.ValidationException as v:
_logger.debug("loader is %s", id(loader))
raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % (
Expand All @@ -439,7 +445,7 @@ def resolve_all(self, document, base_url, file_base=None):
while i < len(document):
val = document[i]
if isinstance(val, dict) and "$import" in val:
l, _ = loader.resolve_ref(val, file_base)
l, _ = loader.resolve_ref(val, base_url=file_base, toplevel=False)
if isinstance(l, list):
del document[i]
for item in aslist(l):
Expand All @@ -450,7 +456,7 @@ def resolve_all(self, document, base_url, file_base=None):
i += 1
else:
document[i], _ = loader.resolve_all(
val, base_url, file_base)
val, base_url, file_base=file_base, toplevel=False)
i += 1
except validate.ValidationException as v:
raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % (
Expand All @@ -463,6 +469,9 @@ def resolve_all(self, document, base_url, file_base=None):
metadata[identifer], base_url, scoped=True)
loader.idx[metadata[identifer]] = document

if toplevel:
self.validate_links(document, "")

return document, metadata

def fetch_text(self, url):
Expand Down Expand Up @@ -522,36 +531,51 @@ def check_file(self, fn): # type: (Union[str, unicode]) -> bool
else:
return False

def validate_link(self, field, link):
def validate_link(self, field, link, docid):
# type: (str, Union[str, unicode, List[str], Dict[str, Any]]) -> bool
if field in self.nolinkcheck:
return True
return link
if isinstance(link, (str, unicode)):
if field in self.vocab_fields:
if link not in self.vocab and link not in self.idx and link not in self.rvocab:
if not self.check_file(link):
raise validate.ValidationException(
"Field `%s` contains undefined reference to `%s`" % (field, link))
elif link not in self.idx and link not in self.rvocab:
if not self.check_file(link):
if field in self.scoped_ref_fields:
split = urlparse.urlsplit(docid)
sp = split.fragment.split("/")
while len(sp) > 0:
sp.pop()
sp.append(link)
url = urlparse.urlunsplit(
(split.scheme, split.netloc, split.path, split.query, "/".join(sp)))
if url in self.idx:
print link, "is", url
return url
else:
sp.pop()
raise validate.ValidationException(
"Field `%s` contains undefined reference to `%s`" % (field, link))
elif not self.check_file(link):
raise validate.ValidationException(
"Field `%s` contains undefined reference to `%s`" % (field, link))
elif isinstance(link, list):
errors = []
for i in link:
for n, i in enumerate(link):
try:
self.validate_link(field, i)
link[n] = self.validate_link(field, i, docid)
except validate.ValidationException as v:
errors.append(v)
if errors:
raise validate.ValidationException(
"\n".join([str(e) for e in errors]))
elif isinstance(link, dict):
self.validate_links(link)
self.validate_links(link, docid)
else:
raise validate.ValidationException("Link must be a str, unicode, "
"list, or a dict.")
return True
return link

def getid(self, d): # type: (Any) -> Union[basestring, None]
if isinstance(d, dict):
Expand All @@ -561,10 +585,10 @@ def getid(self, d): # type: (Any) -> Union[basestring, None]
return d[i]
return None

def validate_links(self, document): # type: (Any) -> None
def validate_links(self, document, base_url): # type: (Any) -> None
docid = self.getid(document)
if docid is None:
docid = ""
if not docid:
docid = base_url

errors = []
iterator = None # type: Any
Expand All @@ -573,26 +597,26 @@ def validate_links(self, document): # type: (Any) -> None
elif isinstance(document, dict):
try:
for d in self.url_fields:
if d not in self.identity_links and d in document:
self.validate_link(d, document[d])
if d in document and d not in self.identity_links:
document[d] = self.validate_link(d, document[d], docid)
except validate.ValidationException as v:
errors.append(v)
if hasattr(document, "iteritems"):
iterator = document.iteritems()
else:
iterator = document.items()
else:
return
return document

for key, val in iterator:
try:
self.validate_links(val)
document[key] = self.validate_links(val, docid)
except validate.ValidationException as v:
if key not in self.nolinkcheck:
docid = self.getid(val)
if docid:
docid2 = self.getid(val)
if docid2:
errors.append(validate.ValidationException(
"While checking object `%s`\n%s" % (docid, validate.indent(str(v)))))
"While checking object `%s`\n%s" % (docid2, validate.indent(str(v)))))
else:
if isinstance(key, basestring):
errors.append(validate.ValidationException(
Expand All @@ -607,7 +631,7 @@ def validate_links(self, document): # type: (Any) -> None
"\n".join([str(e) for e in errors]))
else:
raise errors[0]
return
return document


def _copy_dict_without_key(from_dict, filtered_key):
Expand Down
1 change: 0 additions & 1 deletion schema_salad/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ def load_and_validate(document_loader, avsc_names, document, strict):
else:
data, metadata = document_loader.resolve_ref(document)

document_loader.validate_links(data)
validate_doc(avsc_names, data, document_loader, strict)
return data, metadata

Expand Down
44 changes: 36 additions & 8 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,44 @@ def test_idmap(self):
}
}, "http://example2.com/")

self.assertEqual(ra["id"], "http://example2.com/#stuff")
self.assertEqual("http://example2.com/#stuff", ra["id"])
for item in ra["inputs"]:
if item["a"] == 2:
self.assertEquals(item["id"],
'http://example2.com/#stuff/zing')
self.assertEquals('http://example2.com/#stuff/zing', item["id"])
else:
self.assertEquals(item["id"],
'http://example2.com/#stuff/zip')
self.assertEquals(ra['outputs'], ['http://example2.com/#stuff/out'])
self.assertEquals(ra['other'], {'n': 9})
self.assertEquals('http://example2.com/#stuff/zip', item["id"])
self.assertEquals(['http://example2.com/#stuff/out'], ra['outputs'])
self.assertEquals({'n': 9}, ra['other'])

def test_scoped_ref(self):
ldr = schema_salad.ref_resolver.Loader({})
ldr.add_context({
"ref": {
"@type": "@id",
"scopedRef": True,
},
"id": "@id"})

ra, _ = ldr.resolve_all({
"id": "foo",
"blurb": {
"id": "bar",
"blurb": {
"id": "baz",
"ref": ["foo", "bar", "baz"]
}
}
}, "http://example2.com/")

self.assertEquals({'id': 'http://example2.com/#foo',
'blurb': {
'id': 'http://example2.com/#foo/bar',
'blurb': {
'ref': ['http://example2.com/#foo',
'http://example2.com/#foo/bar',
'http://example2.com/#foo/bar/baz'],
'id': 'http://example2.com/#foo/bar/baz'}}},
ra)

def test_examples(self):
self.maxDiff = None
Expand All @@ -115,7 +143,7 @@ def test_examples(self):
"schema_salad/metaschema/%s_schema.yml" % a)
with open("schema_salad/metaschema/%s_src.yml" % a) as src_fp:
src = ldr.resolve_all(
yaml.load(src_fp, Loader=SafeLoader), "")[0]
yaml.load(src_fp, Loader=SafeLoader), "", toplevel=False)[0]
with open("schema_salad/metaschema/%s_proc.yml" % a) as src_proc:
proc = yaml.load(src_proc, Loader=SafeLoader)
self.assertEqual(proc, src)
Expand Down

0 comments on commit 9fcd67d

Please sign in to comment.