From 23272c19353bbcedce034d96687e598cd024aa09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gonzalo=20Pe=C3=B1a-Castellanos?= Date: Tue, 6 Oct 2020 15:21:26 -0500 Subject: [PATCH] PR: Use fastjsonschema if installed and add tests (#191) * Use fastjsonschema if installed and tests. * Refactor code into a common validator * Split classes and add reading the current validator from environment variable * Add review comments. * Update docs, readme and changelog and CI for tests * Fix extras * Add fastjsonshcema to test deps and update CI --- .github/workflows/tests.yml | 2 +- README.md | 13 +- docs/api.rst | 12 ++ docs/changelog.rst | 4 + nbformat/json_compat.py | 82 +++++++++ nbformat/tests/base.py | 9 +- nbformat/tests/many_tracebacks.ipynb | 46 +++++ nbformat/tests/test_validator.py | 265 +++++++++++++++++---------- nbformat/validator.py | 23 +-- setup.py | 3 +- 10 files changed, 340 insertions(+), 119 deletions(-) create mode 100644 nbformat/json_compat.py create mode 100644 nbformat/tests/many_tracebacks.ipynb diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f8377d92..0105c34f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,7 +22,7 @@ jobs: - name: Install test dependencies run: | pip install --upgrade pip setuptools - pip install nbformat[test] + pip install .[test] pip install codecov - name: Install nbformat run: | diff --git a/README.md b/README.md index 0322a5e3..f1ddadd7 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ [![codecov.io](https://codecov.io/github/jupyter/nbformat/coverage.svg?branch=master)](https://codecov.io/github/jupyter/nbformat?branch=master) [![Code Health](https://landscape.io/github/jupyter/nbformat/master/landscape.svg?style=flat)](https://landscape.io/github/jupyter/nbformat/master) - - +![CI Tests](https://github.com/jupyter/nbformat/workflows/Run%20tests/badge.svg) `nbformat` contains the reference implementation of the [Jupyter Notebook format][], and Python APIs for working with notebooks. @@ -20,6 +19,16 @@ From the command line: pip install nbformat ``` +## Using a different json schema validator + +You can install and use [fastjsonschema](https://horejsek.github.io/python-fastjsonschema/) by running: + +``` {.sourceCode .bash} +pip install nbformat[fast] +``` + +To enable fast validation with `fastjsonschema`, set the environment variable `NBFORMAT_VALIDATOR` to the value `fastjsonschema`. + ## Python Version Support This library supported python 2.7 and python 3.5+ for `4.x.x` releases. With python 2's end-of-life nbformat `5.x.x` is now python 3.5+ only. Support for 3.5 will be dropped when it's officially sunset by the python organization. diff --git a/docs/api.rst b/docs/api.rst index 2e5493ec..dddde60b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -120,3 +120,15 @@ bindings are available, and an in-memory store otherwise. .. autoclass:: SQLiteSignatureStore .. autoclass:: MemorySignatureStore + +Optional fast validation +------------------------ + +You can install and use `fastjsonschema `_ by running:: + + pip install nbformat[fast] + + +To enable fast validation with `fastjsonschema`, set the environment variable:: + + NBFORMAT_VALIDATOR="fastjsonschema" diff --git a/docs/changelog.rst b/docs/changelog.rst index b6b3b89e..8367576f 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -7,6 +7,10 @@ Changes in nbformat In Development ============== +- Add optional support for using `fastjsonschema` as the JSON validation library. + To enable fast validation, install `fastjsonschema` and set the environment + variable `NBFORMAT_VALIDATOR` to the value `fastjsonschema`. + 5.0.7 ===== diff --git a/nbformat/json_compat.py b/nbformat/json_compat.py new file mode 100644 index 00000000..936d3c87 --- /dev/null +++ b/nbformat/json_compat.py @@ -0,0 +1,82 @@ +# Copyright (c) Jupyter Development Team. +# Distributed under the terms of the Modified BSD License. +""" +Common validator wrapper to provide a uniform usage of other schema validation +libraries. +""" + +import os + +import jsonschema +from jsonschema import Draft4Validator as _JsonSchemaValidator +from jsonschema import ValidationError + +try: + import fastjsonschema + from fastjsonschema import JsonSchemaException as _JsonSchemaException +except ImportError: + fastjsonschema = None + _JsonSchemaException = ValidationError + + +class JsonSchemaValidator: + name = "jsonschema" + + def __init__(self, schema): + self._schema = schema + self._default_validator = _JsonSchemaValidator(schema) # Default + self._validator = self._default_validator + + def validate(self, data): + self._default_validator.validate(data) + + def iter_errors(self, data, schema=None): + return self._default_validator.iter_errors(data, schema) + + +class FastJsonSchemaValidator(JsonSchemaValidator): + name = "fastjsonschema" + + def __init__(self, schema): + self._validator = fastjsonschema.compile(schema) + + def validate(self, data): + try: + self._validator(data) + except _JsonSchemaException as error: + raise ValidationError(error.message, schema_path=error.path) + + def iter_errors(self, data, schema=None): + errors = [] + validate_func = self._validator if schema is None else fastjsonschema.compile(schema) + try: + validate_func(data) + except _JsonSchemaException as error: + errors = [ValidationError(error.message, schema_path=error.path)] + + return errors + + +_VALIDATOR_MAP = [ + ("fastjsonschema", fastjsonschema, FastJsonSchemaValidator), + ("jsonschema", jsonschema, JsonSchemaValidator), +] +VALIDATORS = [item[0] for item in _VALIDATOR_MAP] + + +def _validator_for_name(validator_name): + if validator_name not in VALIDATORS: + raise ValueError("Invalid validator '{0}' value!\nValid values are: {1}".format( + validator_name, VALIDATORS)) + + for (name, module, validator_cls) in _VALIDATOR_MAP: + if module and validator_name == name: + return validator_cls + + +def get_current_validator(): + """ + Return the default validator based on the value of an environment variable. + """ + validator_name = os.environ.get("NBFORMAT_VALIDATOR", "jsonschema") + return _validator_for_name(validator_name) diff --git a/nbformat/tests/base.py b/nbformat/tests/base.py index 303d2f73..312c22bb 100644 --- a/nbformat/tests/base.py +++ b/nbformat/tests/base.py @@ -12,9 +12,10 @@ class TestsBase(unittest.TestCase): """Base tests class.""" - def fopen(self, f, mode=u'r',encoding='utf-8'): - return io.open(os.path.join(self._get_files_path(), f), mode, encoding=encoding) + @classmethod + def fopen(cls, f, mode=u'r',encoding='utf-8'): + return io.open(os.path.join(cls._get_files_path(), f), mode, encoding=encoding) - - def _get_files_path(self): + @classmethod + def _get_files_path(cls): return os.path.dirname(__file__) diff --git a/nbformat/tests/many_tracebacks.ipynb b/nbformat/tests/many_tracebacks.ipynb new file mode 100644 index 00000000..a37eda86 --- /dev/null +++ b/nbformat/tests/many_tracebacks.ipynb @@ -0,0 +1,46 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'iAmNotDefined' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0miAmNotDefined\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'iAmNotDefined' is not defined" + ] + } + ], + "source": [ + "# Imagine this cell called a function which runs things on a cluster and you have an error" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/nbformat/tests/test_validator.py b/nbformat/tests/test_validator.py index 308d98a2..7723dc91 100644 --- a/nbformat/tests/test_validator.py +++ b/nbformat/tests/test_validator.py @@ -4,117 +4,196 @@ # Distributed under the terms of the Modified BSD License. import os +import re from .base import TestsBase from jsonschema import ValidationError from nbformat import read from ..validator import isvalid, validate, iter_validate +from ..json_compat import VALIDATORS + +import pytest + + +# Fixtures +@pytest.fixture(autouse=True) +def clean_env_before_and_after_tests(): + """Fixture to clean up env variables before and after tests.""" + os.environ.pop("NBFORMAT_VALIDATOR", None) + yield + os.environ.pop("NBFORMAT_VALIDATOR", None) + + +# Helpers +def set_validator(validator_name): + os.environ["NBFORMAT_VALIDATOR"] = validator_name + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb2(validator_name): + """Test that a v2 notebook converted to current passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test2.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb3(validator_name): + """Test that a v3 notebook passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test3.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4(validator_name): + """Test that a v4 notebook passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) == True + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4_document_info(validator_name): + """Test that a notebook with document_info passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4docinfo.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4custom(validator_name): + """Test that a notebook with a custom JSON mimetype passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4custom.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4jupyter_metadata(validator_name): + """Test that a notebook with a jupyter metadata passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4jupyter_metadata.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_nb4jupyter_metadata_timings(validator_name): + """Tests that a notebook with "timing" in metadata passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f: + nb = read(f, as_version=4) + validate(nb) + assert isvalid(nb) + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_invalid(validator_name): + """Test than an invalid notebook does not pass validation""" + set_validator(validator_name) + # this notebook has a few different errors: + # - one cell is missing its source + # - invalid cell type + # - invalid output_type + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb) + assert not isvalid(nb) -class TestValidator(TestsBase): +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_validate_empty(validator_name): + """Test that an empty notebook (invalid) fails validation""" + set_validator(validator_name) + with pytest.raises(ValidationError) as e: + validate({}) - def test_nb2(self): - """Test that a v2 notebook converted to current passes validation""" - with self.fopen(u'test2.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb3(self): - """Test that a v3 notebook passes validation""" - with self.fopen(u'test3.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_future(validator_name): + """Test that a notebook from the future with extra keys passes validation""" + set_validator(validator_name) + with TestsBase.fopen(u'test4plus.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError): + validate(nb, version=4, version_minor=3) - def test_nb4(self): - """Test that a v4 notebook passes validation""" - with self.fopen(u'test4.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) + assert not isvalid(nb, version=4, version_minor=3) + assert isvalid(nb) - def test_nb4_document_info(self): - """Test that a notebook with document_info passes validation""" - with self.fopen(u'test4docinfo.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4custom(self): - """Test that a notebook with a custom JSON mimetype passes validation""" - with self.fopen(u'test4custom.ipynb', u'r') as f: - nb = read(f, as_version=4) +# This is only a valid test for the default validator, jsonschema +@pytest.mark.parametrize("validator_name", ["jsonschema"]) +def test_validation_error(validator_name): + set_validator(validator_name) + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) + with pytest.raises(ValidationError) as exception_info: validate(nb) - self.assertEqual(isvalid(nb), True) - def test_nb4jupyter_metadata(self): - """Test that a notebook with a jupyter metadata passes validation""" - with self.fopen(u'test4jupyter_metadata.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertEqual(isvalid(nb), True) + s = str(exception_info.value) + assert re.compile(r"validating .required. in markdown_cell").search(s) + assert re.compile(r"source.* is a required property").search(s) + assert re.compile(r"On instance\[u?['\"].*cells['\"]\]\[0\]").search(s) + assert len(s.splitlines()) < 10 - def test_nb4jupyter_metadata_timings(self): - """Tests that a notebook with "timing" in metadata passes validation""" - with self.fopen(u'test4jupyter_metadata_timings.ipynb', u'r') as f: - nb = read(f, as_version=4) - validate(nb) - self.assertTrue(isvalid(nb)) - - def test_invalid(self): - """Test than an invalid notebook does not pass validation""" - # this notebook has a few different errors: - # - one cell is missing its source - # - invalid cell type - # - invalid output_type - with self.fopen(u'invalid.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError): - validate(nb) - self.assertEqual(isvalid(nb), False) - - def test_validate_empty(self): - """Test that an empty notebook (invalid) fails validation""" - with self.assertRaises(ValidationError) as e: - validate({}) - - def test_future(self): - """Test that a notebook from the future with extra keys passes validation""" - with self.fopen(u'test4plus.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError): - validate(nb, version=4, version_minor=3) - self.assertEqual(isvalid(nb, version=4, version_minor=3), False) - self.assertEqual(isvalid(nb), True) +# This is only a valid test for the default validator, jsonschema +@pytest.mark.parametrize("validator_name", ["jsonschema"]) +def test_iter_validation_error(validator_name): + set_validator(validator_name) + with TestsBase.fopen(u'invalid.ipynb', u'r') as f: + nb = read(f, as_version=4) - def test_validation_error(self): - with self.fopen(u'invalid.ipynb', u'r') as f: - nb = read(f, as_version=4) - with self.assertRaises(ValidationError) as e: - validate(nb) - s = str(e.exception) - self.assertRegex(s, "validating.*required.* in markdown_cell") - self.assertRegex(s, "source.* is a required property") - self.assertRegex(s, r"On instance\[u?['\"].*cells['\"]\]\[0\]") - self.assertLess(len(s.splitlines()), 10) - - def test_iter_validation_error(self): - with self.fopen(u'invalid.ipynb', u'r') as f: + errors = list(iter_validate(nb)) + assert len(errors) == 3 + assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'} + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_iter_validation_empty(validator_name): + """Test that an empty notebook (invalid) fails validation via iter_validate""" + set_validator(validator_name) + errors = list(iter_validate({})) + assert len(errors) == 1 + assert type(errors[0]) == ValidationError + + +@pytest.mark.parametrize("validator_name", VALIDATORS) +def test_validation_no_version(validator_name): + """Test that an invalid notebook with no version fails validation""" + set_validator(validator_name) + with pytest.raises(ValidationError) as e: + validate({'invalid': 'notebook'}) + + +def test_invalid_validator_raises_value_error(): + """Test that an invalid notebook with no version fails validation""" + set_validator("foobar") + with pytest.raises(ValueError): + with TestsBase.fopen(u'test2.ipynb', u'r') as f: nb = read(f, as_version=4) - errors = list(iter_validate(nb)) - assert len(errors) == 3 - assert {e.ref for e in errors} == {'markdown_cell', 'heading_cell', 'bad stream'} - def test_iter_validation_empty(self): - """Test that an empty notebook (invalid) fails validation via iter_validate""" - errors = list(iter_validate({})) - assert len(errors) == 1 - assert type(errors[0]) == ValidationError +def test_invalid_validator_raises_value_error_after_read(): + """Test that an invalid notebook with no version fails validation""" + set_validator("jsonschema") + with TestsBase.fopen(u'test2.ipynb', u'r') as f: + nb = read(f, as_version=4) - def test_validation_no_version(self): - """Test that an invalid notebook with no version fails validation""" - with self.assertRaises(ValidationError) as e: - validate({'invalid': 'notebook'}) + set_validator("foobar") + with pytest.raises(ValueError): + validate(nb) diff --git a/nbformat/validator.py b/nbformat/validator.py index 1be77072..bd44729c 100644 --- a/nbformat/validator.py +++ b/nbformat/validator.py @@ -8,23 +8,10 @@ import sys import warnings -try: - from jsonschema import ValidationError - from jsonschema import Draft4Validator as Validator -except ImportError as e: - verbose_msg = """ - Jupyter notebook format depends on the jsonschema package: - - https://pypi.python.org/pypi/jsonschema - - Please install it first. - """ - raise ImportError(verbose_msg) from e - from ipython_genutils.importstring import import_item +from .json_compat import get_current_validator, ValidationError from .reader import get_version, reads - validators = {} def _relax_additional_properties(obj): @@ -61,7 +48,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): if version_minor is None: version_minor = current_minor - version_tuple = (version, version_minor) + current_validator = get_current_validator() + version_tuple = (current_validator.name, version, version_minor) if version_tuple not in validators: try: @@ -75,7 +63,7 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): # and allow undefined cell types and outputs schema_json = _allow_undefined(schema_json) - validators[version_tuple] = Validator(schema_json) + validators[version_tuple] = current_validator(schema_json) if relax_add_props: try: @@ -86,8 +74,8 @@ def get_validator(version=None, version_minor=None, relax_add_props=False): # this allows properties to be added for intermediate # representations while validating for all other kinds of errors schema_json = _relax_additional_properties(schema_json) + validators[version_tuple] = current_validator(schema_json) - validators[version_tuple] = Validator(schema_json) return validators[version_tuple] @@ -257,7 +245,6 @@ def validate(nbdict=None, ref=None, version=None, version_minor=None, else: raise TypeError("validate() missing 1 required argument: 'nbdict'") - if ref is None: # if ref is not specified, we have a whole notebook, so we can get the version nbdict_version, nbdict_version_minor = get_version(nbdict) diff --git a/setup.py b/setup.py index ea4c93af..5fb46c8d 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,8 @@ ] extras_require = setuptools_args['extras_require'] = { - 'test': ['testpath', 'pytest', 'pytest-cov'], + 'fast': ['fastjsonschema'], + 'test': ['fastjsonschema', 'testpath', 'pytest', 'pytest-cov'], } if 'setuptools' in sys.modules: