From 463360c7b76f221d693a0041bd32793d7ba52656 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 6 Oct 2020 19:47:06 -0400 Subject: [PATCH 1/8] Use for-loop instead of slower any() with generator expression --- libcove/lib/common.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index aea125a..dcd68b2 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -17,17 +17,30 @@ from flattentool import unflatten from flattentool.schema import get_property_type_set from jsonschema import FormatChecker, RefResolver -from jsonschema._utils import uniq +from jsonschema._utils import ensure_list, types_msg, uniq from jsonschema.compat import urlopen, urlsplit from jsonschema.exceptions import ValidationError from .exceptions import cove_spreadsheet_conversion_error from .tools import decimal_default, get_request + +def type_validator(validator, types, instance, schema): + types = ensure_list(types) + + for type in types: + if validator.is_type(instance, type): + break + else: + yield ValidationError(types_msg(instance, types)) + + # Because we will be changing items on this validator, it's important we take a copy! # Otherwise we could cause conflicts with other software in the same process. validator = jsonschema.validators.extend( - jsonschema.validators.Draft4Validator, validators={} + validators={ + "type": type_validator, + }, ) uniqueItemsValidator = validator.VALIDATORS.pop("uniqueItems") From 3a67f14420a4ce15aaade851372fcf4885b6197c Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 7 Oct 2020 00:32:04 -0400 Subject: [PATCH 2/8] Avoid calling ensure_list() on every node in the JSON data --- libcove/lib/common.py | 65 +++++++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index dcd68b2..0788190 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -26,8 +26,6 @@ def type_validator(validator, types, instance, schema): - types = ensure_list(types) - for type in types: if validator.is_type(instance, type): break @@ -189,26 +187,26 @@ def oneOf_draft4(validator, oneOf, instance, schema): class SchemaJsonMixin: @cached_property def schema_str(self): - response = get_request( + schema = get_request( self.schema_url, config=getattr(self, "config", None), force_cache=getattr(self, "cache_schema", False), - ) - return response.text + ).json() + return json.dumps(_ensure_type_lists(schema)) @cached_property def pkg_schema_str(self): uri_scheme = urlparse(self.pkg_schema_url).scheme if uri_scheme == "http" or uri_scheme == "https": - response = get_request( + schema = get_request( self.pkg_schema_url, config=getattr(self, "config", None), force_cache=getattr(self, "cache_schema", False), - ) - return response.text + ).json() else: with open(self.pkg_schema_url) as fp: - return fp.read() + schema = json.load(fp) + return json.dumps(_ensure_type_lists(schema)) @property def _schema_obj(self): @@ -358,10 +356,11 @@ def get_remote_json(self, uri, **kwargs): uri = urljoin(self.schema_url, os.path.basename(uri_info.path)) if "http" in uri_info.scheme: - return get_request(uri, config=self.config).json(**kwargs) + schema = get_request(uri, config=self.config).json(**kwargs) else: with open(uri) as schema_file: - return json.load(schema_file, **kwargs) + schema = json.load(schema_file, **kwargs) + return _ensure_type_lists(schema) def common_checks_context( @@ -1021,15 +1020,12 @@ def resolve_remote(self, uri): # Otherwise, pass off to urllib and assume utf-8 with urlopen(uri) as url: result = json.loads(url.read().decode("utf-8")) - - if self.cache_remote: - self.store[uri] = result - return result else: with open(uri) as schema_file: result = json.load(schema_file) add_is_codelist(result) + result = _ensure_type_lists(result) self.store[uri] = result return result @@ -1253,3 +1249,42 @@ def get_orgids_prefixes(orgids_url=None): pass # Update fails return [org_list["code"] for org_list in org_ids["lists"]] + + +def _ensure_type_lists(schema): + """ + Loads the JSON data and change the values of "type" properties to arrays. + """ + # https://tools.ietf.org/html/draft-fge-json-schema-validation-00 + # 11 validatation properties "MUST be a valid JSON Schema". + schemas = {"additionalItems", "additionalProperties", "items", "not"} + list_of_schemas = {"allOf", "anyOf", "oneOf", "items"} + dict_of_schemas = { + "definitions", + "dependencies", + "patternProperties", + "properties", + } + + def _recurse(schema, pointer=()): + if isinstance(schema, dict): + if "type" in schema and ( + not pointer + or pointer[-1] in schemas + or len(pointer) > 1 + and ( + pointer[-2] in list_of_schemas + and isinstance(pointer[-1], int) + or pointer[-2] in dict_of_schemas + and isinstance(pointer[-1], str) + ) + ): + schema["type"] = ensure_list(schema["type"]) + for key, value in schema.items(): + _recurse(value, pointer + (key,)) + elif isinstance(schema, list): + for i, item in enumerate(schema): + _recurse(item, pointer + (i,)) + + _recurse(schema) + return schema From aa4e2cf33d3f9298b2f8659c5e0da1edcec973b5 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 7 Oct 2020 01:08:39 -0400 Subject: [PATCH 3/8] Fix accidental code deletion --- libcove/lib/common.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 0788190..4f3b2b7 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -36,6 +36,7 @@ def type_validator(validator, types, instance, schema): # Because we will be changing items on this validator, it's important we take a copy! # Otherwise we could cause conflicts with other software in the same process. validator = jsonschema.validators.extend( + jsonschema.validators.Draft4Validator, validators={ "type": type_validator, }, From f91c56456a9baa1552a47bfdedb2ed99e009766e Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 7 Oct 2020 01:34:19 -0400 Subject: [PATCH 4/8] Fix DummyReleaseSchemaObj and DummyRecordSchemaObj in tests --- tests/lib/test_common.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index c173c61..fae0cb7 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -6,6 +6,7 @@ from libcove.lib.common import ( SchemaJsonMixin, + _ensure_type_lists, _get_schema_deprecated_paths, fields_present_generator, get_additional_fields_info, @@ -400,7 +401,7 @@ def __init__(self, schema_host): def get_pkg_schema_obj(self): with open(os.path.join(self.schema_host, "release-package-schema.json")) as fp: - schema_json = json.load(fp) + schema_json = _ensure_type_lists(json.load(fp)) return schema_json @@ -411,7 +412,7 @@ def __init__(self, schema_host): def get_pkg_schema_obj(self): with open(os.path.join(self.schema_host, "record-package-schema.json")) as fp: - schema_json = json.load(fp) + schema_json = _ensure_type_lists(json.load(fp)) return schema_json From 89e8d262bd9770908b6f456024b1f9ff8dcdc835 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 7 Oct 2020 01:36:36 -0400 Subject: [PATCH 5/8] Fix "type" comparison in _get_schema_deprecated_paths --- libcove/lib/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 4f3b2b7..b44dbb9 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -1079,9 +1079,9 @@ def _get_schema_deprecated_paths( ) ) - if value.get("type") == "object": + if value.get("type") == ["object"]: _get_schema_deprecated_paths(None, value, path, deprecated_paths) - elif value.get("type") == "array" and value.get("items", {}).get("properties"): + elif value.get("type") == ["array"] and value.get("items", {}).get("properties"): _get_schema_deprecated_paths(None, value["items"], path, deprecated_paths) return deprecated_paths From e9c9d5afb0e61132ef05e64cf3f365d8ab14e4ef Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Wed, 7 Oct 2020 01:40:34 -0400 Subject: [PATCH 6/8] black --- libcove/lib/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index b44dbb9..9e85ce9 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -1081,7 +1081,9 @@ def _get_schema_deprecated_paths( if value.get("type") == ["object"]: _get_schema_deprecated_paths(None, value, path, deprecated_paths) - elif value.get("type") == ["array"] and value.get("items", {}).get("properties"): + elif value.get("type") == ["array"] and value.get("items", {}).get( + "properties" + ): _get_schema_deprecated_paths(None, value["items"], path, deprecated_paths) return deprecated_paths From afee2ae8201fe85ae7faa635b5432a9da8998ed0 Mon Sep 17 00:00:00 2001 From: Ben Webb Date: Thu, 9 Sep 2021 13:26:47 +0000 Subject: [PATCH 7/8] Wrap a list around some missed instances of types --- libcove/lib/common.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index ae2ec2a..94b91c1 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -430,10 +430,10 @@ def get_schema_codelist_paths( if "codelist" in value and path not in codelist_paths: codelist_paths[path] = (value["codelist"], value.get("openCodelist", False)) - if value.get("type") == "object": + if value.get("type") == ["object"]: get_schema_codelist_paths(None, value, path, codelist_paths) elif ( - value.get("type") == "array" + value.get("type") == ["array"] and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): @@ -1250,10 +1250,10 @@ def _get_schema_non_required_ids( if prop == "id" and no_required_id and array_parent and not list_merge: id_paths.append(path) - if value.get("type") == "object": + if value.get("type") == ["object"]: _get_schema_non_required_ids(None, value, path, id_paths) elif ( - value.get("type") == "array" + value.get("type") == ["array"] and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): @@ -1303,10 +1303,10 @@ def add_is_codelist(obj): else: value["isCodelist"] = True - if value.get("type") == "object": + if value.get("type") == ["object"]: add_is_codelist(value) elif ( - value.get("type") == "array" + value.get("type") == ["array"] and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): From 011a2bdb0d3b1cbc8d359d5a1a80772e447db2e5 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 14 Sep 2021 19:01:15 -0400 Subject: [PATCH 8/8] Revert "Avoid calling ensure_list() on every node in the JSON data" --- libcove/lib/common.py | 81 ++++++++++++---------------------------- tests/lib/test_common.py | 5 +-- 2 files changed, 25 insertions(+), 61 deletions(-) diff --git a/libcove/lib/common.py b/libcove/lib/common.py index 3297cf0..342cb8d 100644 --- a/libcove/lib/common.py +++ b/libcove/lib/common.py @@ -32,6 +32,8 @@ def type_validator(validator, types, instance, schema): + types = ensure_list(types) + for type in types: if validator.is_type(instance, type): break @@ -323,26 +325,26 @@ def dependencies_extra_data(validator, dependencies, instance, schema): class SchemaJsonMixin: @cached_property def schema_str(self): - schema = get_request( + response = get_request( self.schema_url, config=getattr(self, "config", None), force_cache=getattr(self, "cache_schema", False), - ).json() - return json.dumps(_ensure_type_lists(schema)) + ) + return response.text @cached_property def pkg_schema_str(self): uri_scheme = urlparse(self.pkg_schema_url).scheme if uri_scheme == "http" or uri_scheme == "https": - schema = get_request( + response = get_request( self.pkg_schema_url, config=getattr(self, "config", None), force_cache=getattr(self, "cache_schema", False), - ).json() + ) + return response.text else: with open(self.pkg_schema_url) as fp: - schema = json.load(fp) - return json.dumps(_ensure_type_lists(schema)) + return fp.read() @property def _schema_obj(self): @@ -429,10 +431,10 @@ def get_schema_codelist_paths( if "codelist" in value and path not in codelist_paths: codelist_paths[path] = (value["codelist"], value.get("openCodelist", False)) - if value.get("type") == ["object"]: + if value.get("type") == "object": get_schema_codelist_paths(None, value, path, codelist_paths) elif ( - value.get("type") == ["array"] + value.get("type") == "array" and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): @@ -494,11 +496,10 @@ def get_remote_json(self, uri, **kwargs): uri = urljoin(self.schema_url, os.path.basename(uri_info.path)) if "http" in uri_info.scheme: - schema = get_request(uri, config=self.config).json(**kwargs) + return get_request(uri, config=self.config).json(**kwargs) else: with open(uri) as schema_file: - schema = json.load(schema_file, **kwargs) - return _ensure_type_lists(schema) + return json.load(schema_file, **kwargs) def common_checks_context( @@ -1139,12 +1140,15 @@ def resolve_remote(self, uri): # Otherwise, pass off to urllib and assume utf-8 with urlopen(uri) as url: result = json.loads(url.read().decode("utf-8")) + + if self.cache_remote: + self.store[uri] = result + return result else: with open(uri) as schema_file: result = json.load(schema_file) add_is_codelist(result) - result = _ensure_type_lists(result) self.store[uri] = result return result @@ -1197,10 +1201,10 @@ def _get_schema_deprecated_paths( ) ) - if value.get("type") == ["object"]: + if value.get("type") == "object": _get_schema_deprecated_paths(None, value, path, deprecated_paths) elif ( - value.get("type") == ["array"] + value.get("type") == "array" and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): @@ -1242,10 +1246,10 @@ def _get_schema_non_required_ids( if prop == "id" and no_required_id and array_parent and not list_merge: id_paths.append(path) - if value.get("type") == ["object"]: + if value.get("type") == "object": _get_schema_non_required_ids(None, value, path, id_paths) elif ( - value.get("type") == ["array"] + value.get("type") == "array" and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): @@ -1295,10 +1299,10 @@ def add_is_codelist(obj): else: value["isCodelist"] = True - if value.get("type") == ["object"]: + if value.get("type") == "object": add_is_codelist(value) elif ( - value.get("type") == ["array"] + value.get("type") == "array" and isinstance(value.get("items"), dict) and value.get("items").get("properties") ): @@ -1485,42 +1489,3 @@ def get_field_coverage(schema_obj, json_data_list): add_field_coverage(schema_dict, json_data_item) add_field_coverage_percentages(schema_dict) return schema_dict - - -def _ensure_type_lists(schema): - """ - Loads the JSON data and change the values of "type" properties to arrays. - """ - # https://tools.ietf.org/html/draft-fge-json-schema-validation-00 - # 11 validatation properties "MUST be a valid JSON Schema". - schemas = {"additionalItems", "additionalProperties", "items", "not"} - list_of_schemas = {"allOf", "anyOf", "oneOf", "items"} - dict_of_schemas = { - "definitions", - "dependencies", - "patternProperties", - "properties", - } - - def _recurse(schema, pointer=()): - if isinstance(schema, dict): - if "type" in schema and ( - not pointer - or pointer[-1] in schemas - or len(pointer) > 1 - and ( - pointer[-2] in list_of_schemas - and isinstance(pointer[-1], int) - or pointer[-2] in dict_of_schemas - and isinstance(pointer[-1], str) - ) - ): - schema["type"] = ensure_list(schema["type"]) - for key, value in schema.items(): - _recurse(value, pointer + (key,)) - elif isinstance(schema, list): - for i, item in enumerate(schema): - _recurse(item, pointer + (i,)) - - _recurse(schema) - return schema diff --git a/tests/lib/test_common.py b/tests/lib/test_common.py index d2d1aed..0c19c52 100644 --- a/tests/lib/test_common.py +++ b/tests/lib/test_common.py @@ -10,7 +10,6 @@ from libcove.lib.common import ( SchemaJsonMixin, - _ensure_type_lists, _get_schema_deprecated_paths, add_field_coverage, add_field_coverage_percentages, @@ -566,7 +565,7 @@ def __init__(self, schema_host): def get_pkg_schema_obj(self): with open(os.path.join(self.schema_host, "release-package-schema.json")) as fp: - schema_json = _ensure_type_lists(json.load(fp)) + schema_json = json.load(fp) return schema_json @@ -577,7 +576,7 @@ def __init__(self, schema_host): def get_pkg_schema_obj(self): with open(os.path.join(self.schema_host, "record-package-schema.json")) as fp: - schema_json = _ensure_type_lists(json.load(fp)) + schema_json = json.load(fp) return schema_json