From 9fcd67d23ca2625a8203d3e4008eae128955b493 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 31 May 2016 17:54:35 -0400 Subject: [PATCH] Fold link validation into resolve_all() and resolve scoped identifiers. --- schema_salad/main.py | 32 +++++++-------- schema_salad/ref_resolver.py | 76 ++++++++++++++++++++++++------------ schema_salad/schema.py | 1 - tests/test_examples.py | 44 +++++++++++++++++---- 4 files changed, 102 insertions(+), 51 deletions(-) diff --git a/schema_salad/main.py b/schema_salad/main.py index f86667f21..4692d5d0b 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -111,14 +111,14 @@ def main(argsl=None): # type: (List[str]) -> int return 0 # Validate links in the schema document - try: - metaschema_loader.validate_links(schema_doc) - except (validate.ValidationException) as e: - _logger.error("Schema `%s` failed link checking:\n%s", - args.schema, e, exc_info=(e if args.debug else False)) - _logger.debug("Index is %s", metaschema_loader.idx.keys()) - _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys()) - return 1 + # try: + # metaschema_loader.validate_links(schema_doc) + # except (validate.ValidationException) as e: + # _logger.error("Schema `%s` failed link checking:\n%s", + # args.schema, e, exc_info=(e if args.debug else False)) + # _logger.debug("Index is %s", metaschema_loader.idx.keys()) + # _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys()) + # return 1 # Validate the schema document against the metaschema try: @@ -197,14 +197,14 @@ def main(argsl=None): # type: (List[str]) -> int return 0 # Validate links in the target document - try: - document_loader.validate_links(document) - except (validate.ValidationException) as e: - _logger.error("Document `%s` failed link checking:\n%s", - args.document, e, exc_info=(e if args.debug else False)) - _logger.debug("Index is %s", json.dumps( - document_loader.idx.keys(), indent=4)) - return 1 + # try: + # document_loader.validate_links(document) + # except (validate.ValidationException) as e: + # _logger.error("Document `%s` failed link checking:\n%s", + # args.document, e, exc_info=(e if args.debug else False)) + # _logger.debug("Index is %s", json.dumps( + # document_loader.idx.keys(), indent=4)) + # return 1 # Validate the schema document against the metaschema try: diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 2adff9b91..41bc7584b 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -94,6 +94,7 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None, self.cache = {} self.url_fields = None # type: Set[str] + self.scoped_ref_fields = None # type: Set[str] self.vocab_fields = None # type: Set[str] self.identifiers = None # type: Set[str] self.identity_links = None # type: Set[str] @@ -186,6 +187,7 @@ def add_context(self, newcontext, baseuri=""): "Refreshing context that already has stuff in it") self.url_fields = set() + self.scoped_ref_fields = set() self.vocab_fields = set() self.identifiers = set() self.identity_links = set() @@ -206,6 +208,8 @@ def add_context(self, newcontext, baseuri=""): self.identity_links.add(key) elif isinstance(value, dict) and value.get("@type") == "@id": self.url_fields.add(key) + if value.get("scopedRef", False): + self.scoped_ref_fields.add(key) if value.get("identity", False): self.identity_links.add(key) elif isinstance(value, dict) and value.get("@type") == "@vocab": @@ -235,7 +239,7 @@ def add_context(self, newcontext, baseuri=""): _logger.debug("vocab_fields is %s", self.vocab_fields) _logger.debug("vocab is %s", self.vocab) - def resolve_ref(self, ref, base_url=None): + def resolve_ref(self, ref, base_url=None, toplevel=True): # type: (Union[Dict[str, Any], str, unicode], Union[str, unicode]) -> Tuple[Union[Dict[str, Any], str, unicode], Dict[str, Any]] base_url = base_url or 'file://%s/' % os.path.abspath('.') @@ -297,7 +301,7 @@ def resolve_ref(self, ref, base_url=None): doc = self.fetch(doc_url) # Recursively expand urls and resolve directives - obj, metadata = self.resolve_all(doc if doc else obj, doc_url) + obj, metadata = self.resolve_all(doc if doc else obj, doc_url, toplevel=toplevel) # Requested reference should be in the index now, otherwise it's a bad # reference @@ -318,7 +322,7 @@ def resolve_ref(self, ref, base_url=None): except TypeError: return obj, metadata - def resolve_all(self, document, base_url, file_base=None): + def resolve_all(self, document, base_url, file_base=None, toplevel=True): # type: (Any, Union[str, unicode], Union[str, unicode]) -> Tuple[Any, Dict[str, Any]] loader = self metadata = {} # type: Dict[str, Any] @@ -328,7 +332,7 @@ def resolve_all(self, document, base_url, file_base=None): if isinstance(document, dict): # Handle $import and $include if ('$import' in document or '$include' in document): - return self.resolve_ref(document, file_base) + return self.resolve_ref(document, base_url=file_base, toplevel=toplevel) elif isinstance(document, list): pass else: @@ -364,7 +368,7 @@ def resolve_all(self, document, base_url, file_base=None): if "$graph" in document: metadata = _copy_dict_without_key(document, "$graph") document = document["$graph"] - metadata, _ = loader.resolve_all(metadata, base_url, file_base) + metadata, _ = loader.resolve_all(metadata, base_url, file_base=file_base, toplevel=False) if isinstance(document, dict): for idmapField in loader.idmap: @@ -412,6 +416,8 @@ def resolve_all(self, document, base_url, file_base=None): del document[d] for d in loader.url_fields: + if d in self.scoped_ref_fields: + continue if d in document: if isinstance(document[d], basestring): document[d] = loader.expand_url( @@ -427,7 +433,7 @@ def resolve_all(self, document, base_url, file_base=None): try: for key, val in document.items(): document[key], _ = loader.resolve_all( - val, base_url, file_base) + val, base_url, file_base=file_base, toplevel=False) except validate.ValidationException as v: _logger.debug("loader is %s", id(loader)) raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % ( @@ -439,7 +445,7 @@ def resolve_all(self, document, base_url, file_base=None): while i < len(document): val = document[i] if isinstance(val, dict) and "$import" in val: - l, _ = loader.resolve_ref(val, file_base) + l, _ = loader.resolve_ref(val, base_url=file_base, toplevel=False) if isinstance(l, list): del document[i] for item in aslist(l): @@ -450,7 +456,7 @@ def resolve_all(self, document, base_url, file_base=None): i += 1 else: document[i], _ = loader.resolve_all( - val, base_url, file_base) + val, base_url, file_base=file_base, toplevel=False) i += 1 except validate.ValidationException as v: raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % ( @@ -463,6 +469,9 @@ def resolve_all(self, document, base_url, file_base=None): metadata[identifer], base_url, scoped=True) loader.idx[metadata[identifer]] = document + if toplevel: + self.validate_links(document, "") + return document, metadata def fetch_text(self, url): @@ -522,10 +531,10 @@ def check_file(self, fn): # type: (Union[str, unicode]) -> bool else: return False - def validate_link(self, field, link): + def validate_link(self, field, link, docid): # type: (str, Union[str, unicode, List[str], Dict[str, Any]]) -> bool if field in self.nolinkcheck: - return True + return link if isinstance(link, (str, unicode)): if field in self.vocab_fields: if link not in self.vocab and link not in self.idx and link not in self.rvocab: @@ -533,25 +542,40 @@ def validate_link(self, field, link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) elif link not in self.idx and link not in self.rvocab: - if not self.check_file(link): + if field in self.scoped_ref_fields: + split = urlparse.urlsplit(docid) + sp = split.fragment.split("/") + while len(sp) > 0: + sp.pop() + sp.append(link) + url = urlparse.urlunsplit( + (split.scheme, split.netloc, split.path, split.query, "/".join(sp))) + if url in self.idx: + print link, "is", url + return url + else: + sp.pop() + raise validate.ValidationException( + "Field `%s` contains undefined reference to `%s`" % (field, link)) + elif not self.check_file(link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) elif isinstance(link, list): errors = [] - for i in link: + for n, i in enumerate(link): try: - self.validate_link(field, i) + link[n] = self.validate_link(field, i, docid) except validate.ValidationException as v: errors.append(v) if errors: raise validate.ValidationException( "\n".join([str(e) for e in errors])) elif isinstance(link, dict): - self.validate_links(link) + self.validate_links(link, docid) else: raise validate.ValidationException("Link must be a str, unicode, " "list, or a dict.") - return True + return link def getid(self, d): # type: (Any) -> Union[basestring, None] if isinstance(d, dict): @@ -561,10 +585,10 @@ def getid(self, d): # type: (Any) -> Union[basestring, None] return d[i] return None - def validate_links(self, document): # type: (Any) -> None + def validate_links(self, document, base_url): # type: (Any) -> None docid = self.getid(document) - if docid is None: - docid = "" + if not docid: + docid = base_url errors = [] iterator = None # type: Any @@ -573,8 +597,8 @@ def validate_links(self, document): # type: (Any) -> None elif isinstance(document, dict): try: for d in self.url_fields: - if d not in self.identity_links and d in document: - self.validate_link(d, document[d]) + if d in document and d not in self.identity_links: + document[d] = self.validate_link(d, document[d], docid) except validate.ValidationException as v: errors.append(v) if hasattr(document, "iteritems"): @@ -582,17 +606,17 @@ def validate_links(self, document): # type: (Any) -> None else: iterator = document.items() else: - return + return document for key, val in iterator: try: - self.validate_links(val) + document[key] = self.validate_links(val, docid) except validate.ValidationException as v: if key not in self.nolinkcheck: - docid = self.getid(val) - if docid: + docid2 = self.getid(val) + if docid2: errors.append(validate.ValidationException( - "While checking object `%s`\n%s" % (docid, validate.indent(str(v))))) + "While checking object `%s`\n%s" % (docid2, validate.indent(str(v))))) else: if isinstance(key, basestring): errors.append(validate.ValidationException( @@ -607,7 +631,7 @@ def validate_links(self, document): # type: (Any) -> None "\n".join([str(e) for e in errors])) else: raise errors[0] - return + return document def _copy_dict_without_key(from_dict, filtered_key): diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 739826862..a3517bb69 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -190,7 +190,6 @@ def load_and_validate(document_loader, avsc_names, document, strict): else: data, metadata = document_loader.resolve_ref(document) - document_loader.validate_links(data) validate_doc(avsc_names, data, document_loader, strict) return data, metadata diff --git a/tests/test_examples.py b/tests/test_examples.py index 404314060..4e8133766 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -97,16 +97,44 @@ def test_idmap(self): } }, "http://example2.com/") - self.assertEqual(ra["id"], "http://example2.com/#stuff") + self.assertEqual("http://example2.com/#stuff", ra["id"]) for item in ra["inputs"]: if item["a"] == 2: - self.assertEquals(item["id"], - 'http://example2.com/#stuff/zing') + self.assertEquals('http://example2.com/#stuff/zing', item["id"]) else: - self.assertEquals(item["id"], - 'http://example2.com/#stuff/zip') - self.assertEquals(ra['outputs'], ['http://example2.com/#stuff/out']) - self.assertEquals(ra['other'], {'n': 9}) + self.assertEquals('http://example2.com/#stuff/zip', item["id"]) + self.assertEquals(['http://example2.com/#stuff/out'], ra['outputs']) + self.assertEquals({'n': 9}, ra['other']) + + def test_scoped_ref(self): + ldr = schema_salad.ref_resolver.Loader({}) + ldr.add_context({ + "ref": { + "@type": "@id", + "scopedRef": True, + }, + "id": "@id"}) + + ra, _ = ldr.resolve_all({ + "id": "foo", + "blurb": { + "id": "bar", + "blurb": { + "id": "baz", + "ref": ["foo", "bar", "baz"] + } + } + }, "http://example2.com/") + + self.assertEquals({'id': 'http://example2.com/#foo', + 'blurb': { + 'id': 'http://example2.com/#foo/bar', + 'blurb': { + 'ref': ['http://example2.com/#foo', + 'http://example2.com/#foo/bar', + 'http://example2.com/#foo/bar/baz'], + 'id': 'http://example2.com/#foo/bar/baz'}}}, + ra) def test_examples(self): self.maxDiff = None @@ -115,7 +143,7 @@ def test_examples(self): "schema_salad/metaschema/%s_schema.yml" % a) with open("schema_salad/metaschema/%s_src.yml" % a) as src_fp: src = ldr.resolve_all( - yaml.load(src_fp, Loader=SafeLoader), "")[0] + yaml.load(src_fp, Loader=SafeLoader), "", toplevel=False)[0] with open("schema_salad/metaschema/%s_proc.yml" % a) as src_proc: proc = yaml.load(src_proc, Loader=SafeLoader) self.assertEqual(proc, src)