Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use for-loop instead of slower any() with generator expression #66

Merged
merged 10 commits into from
Sep 15, 2021
87 changes: 70 additions & 17 deletions libcove/lib/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,27 @@
from cached_property import cached_property
from flattentool import unflatten
from jsonschema import FormatChecker, RefResolver
from jsonschema._utils import extras_msg, find_additional_properties, uniq
from jsonschema._utils import (
ensure_list,
extras_msg,
find_additional_properties,
types_msg,
uniq,
)
from jsonschema.exceptions import UndefinedTypeCheck, ValidationError

from .exceptions import cove_spreadsheet_conversion_error
from .tools import decimal_default, get_request


def type_validator(validator, types, instance, schema):
for type in types:
if validator.is_type(instance, type):
break
else:
yield ValidationError(types_msg(instance, types))


class TypeChecker:
def is_type(self, instance, type):
if type == "string":
Expand Down Expand Up @@ -53,7 +67,9 @@ def is_type(self, instance, type):
# Otherwise we could cause conflicts with other software in the same process.
validator = jsonschema.validators.extend(
jsonschema.validators.Draft4Validator,
validators={},
validators={
"type": type_validator,
},
type_checker=TypeChecker(),
)

Expand Down Expand Up @@ -308,26 +324,26 @@ def dependencies_extra_data(validator, dependencies, instance, schema):
class SchemaJsonMixin:
@cached_property
def schema_str(self):
response = get_request(
schema = get_request(
self.schema_url,
config=getattr(self, "config", None),
force_cache=getattr(self, "cache_schema", False),
)
return response.text
).json()
return json.dumps(_ensure_type_lists(schema))

@cached_property
def pkg_schema_str(self):
uri_scheme = urlparse(self.pkg_schema_url).scheme
if uri_scheme == "http" or uri_scheme == "https":
response = get_request(
schema = get_request(
self.pkg_schema_url,
config=getattr(self, "config", None),
force_cache=getattr(self, "cache_schema", False),
)
return response.text
).json()
else:
with open(self.pkg_schema_url) as fp:
return fp.read()
schema = json.load(fp)
return json.dumps(_ensure_type_lists(schema))

@property
def _schema_obj(self):
Expand Down Expand Up @@ -479,10 +495,11 @@ def get_remote_json(self, uri, **kwargs):
uri = urljoin(self.schema_url, os.path.basename(uri_info.path))

if "http" in uri_info.scheme:
return get_request(uri, config=self.config).json(**kwargs)
schema = get_request(uri, config=self.config).json(**kwargs)
else:
with open(uri) as schema_file:
return json.load(schema_file, **kwargs)
schema = json.load(schema_file, **kwargs)
return _ensure_type_lists(schema)


def common_checks_context(
Expand Down Expand Up @@ -1130,15 +1147,12 @@ def resolve_remote(self, uri):
# Otherwise, pass off to urllib and assume utf-8
with urlopen(uri) as url:
result = json.loads(url.read().decode("utf-8"))

if self.cache_remote:
self.store[uri] = result
return result
else:
with open(uri) as schema_file:
result = json.load(schema_file)

add_is_codelist(result)
result = _ensure_type_lists(result)
self.store[uri] = result
return result

Expand Down Expand Up @@ -1191,10 +1205,10 @@ def _get_schema_deprecated_paths(
)
)

if value.get("type") == "object":
if value.get("type") == ["object"]:
_get_schema_deprecated_paths(None, value, path, deprecated_paths)
elif (
value.get("type") == "array"
value.get("type") == ["array"]
and isinstance(value.get("items"), dict)
and value.get("items").get("properties")
):
Expand Down Expand Up @@ -1479,3 +1493,42 @@ def get_field_coverage(schema_obj, json_data_list):
add_field_coverage(schema_dict, json_data_item)
add_field_coverage_percentages(schema_dict)
return schema_dict


def _ensure_type_lists(schema):
"""
Loads the JSON data and change the values of "type" properties to arrays.
"""
# https://tools.ietf.org/html/draft-fge-json-schema-validation-00
# 11 validatation properties "MUST be a valid JSON Schema".
schemas = {"additionalItems", "additionalProperties", "items", "not"}
list_of_schemas = {"allOf", "anyOf", "oneOf", "items"}
dict_of_schemas = {
"definitions",
"dependencies",
"patternProperties",
"properties",
}

def _recurse(schema, pointer=()):
if isinstance(schema, dict):
if "type" in schema and (
not pointer
or pointer[-1] in schemas
or len(pointer) > 1
and (
pointer[-2] in list_of_schemas
and isinstance(pointer[-1], int)
or pointer[-2] in dict_of_schemas
and isinstance(pointer[-1], str)
)
):
schema["type"] = ensure_list(schema["type"])
for key, value in schema.items():
_recurse(value, pointer + (key,))
elif isinstance(schema, list):
for i, item in enumerate(schema):
_recurse(item, pointer + (i,))

_recurse(schema)
return schema
7 changes: 4 additions & 3 deletions tests/lib/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from libcove.lib.common import (
SchemaJsonMixin,
_ensure_type_lists,
_get_schema_deprecated_paths,
add_field_coverage,
add_field_coverage_percentages,
Expand Down Expand Up @@ -572,7 +573,7 @@ def __init__(self, schema_host):

def get_pkg_schema_obj(self):
with open(os.path.join(self.schema_host, "release-package-schema.json")) as fp:
schema_json = json.load(fp)
schema_json = _ensure_type_lists(json.load(fp))
return schema_json


Expand All @@ -583,7 +584,7 @@ def __init__(self, schema_host):

def get_pkg_schema_obj(self):
with open(os.path.join(self.schema_host, "record-package-schema.json")) as fp:
schema_json = json.load(fp)
schema_json = _ensure_type_lists(json.load(fp))
return schema_json


Expand Down Expand Up @@ -772,7 +773,7 @@ class DummySchemaObj:

def get_pkg_schema_obj(self):
return {
"type": "array",
"type": ["array"],
"minItems": 2,
}

Expand Down