Skip to content

Commit

Permalink
Implement test failure severity levels
Browse files Browse the repository at this point in the history
A small refactor to make test parsing easier to modify
add concept of test modifier kwargs, pass them through to config
plug the severity setting into test result handling
Update existing tests
Add integration tests
severity settings for data tests, too
  • Loading branch information
Jacob Beck committed Apr 30, 2019
1 parent aa4f771 commit abcbaca
Show file tree
Hide file tree
Showing 11 changed files with 322 additions and 181 deletions.
5 changes: 4 additions & 1 deletion core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,14 @@
}
]
},
'severity': {
'enum': ['ERROR', 'WARN'],
},
},
'required': [
'enabled', 'materialized', 'post-hook', 'pre-hook', 'vars',
'quoting', 'column_types', 'tags'
]
],
}


Expand Down
10 changes: 10 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,6 +661,16 @@ def warn_or_error(msg, node=None, log_fmt=None):
logger.warning(msg)


def warn_or_raise(exc, log_fmt=None):
if dbt.flags.WARN_ERROR:
raise exc
else:
msg = str(exc)
if log_fmt is not None:
msg = log_fmt.format(msg)
logger.warning(msg)


# Update this when a new function should be added to the
# dbt context's `exceptions` key!
CONTEXT_EXPORTS = {
Expand Down
264 changes: 140 additions & 124 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,109 +67,144 @@ def as_kwarg(key, value):
return "{key}={value}".format(key=key, value=formatted_value)


def build_test_raw_sql(test_namespace, model, test_type, test_args):
"""Build the raw SQL from a test definition.
:param test_namespace: The test's namespace, if one exists
:param model: The model under test
:param test_type: The type of the test (unique_id, etc)
:param test_args: The arguments passed to the test as a list of `key=value`
strings
:return: A string of raw sql for the test node.
"""
# sort the dict so the keys are rendered deterministically (for tests)
kwargs = [as_kwarg(key, test_args[key]) for key in sorted(test_args)]

if test_namespace is None:
macro_name = "test_{}".format(test_type)
else:
macro_name = "{}.test_{}".format(test_namespace, test_type)

raw_sql = "{{{{ {macro}(model=ref('{model}'), {kwargs}) }}}}".format(
**{
'model': model['name'],
'macro': macro_name,
'kwargs': ", ".join(kwargs)
}
)
return raw_sql
class TestBuilder(object):
"""An object to hold assorted test settings and perform basic parsing
Test names have the following pattern:
- the test name itself may be namespaced (package.test)
- or it may not be namespaced (test)
- the test may have arguments embedded in the name (, severity=WARN)
- or it may not have arguments.
def build_source_test_raw_sql(test_namespace, source, table, test_type,
test_args):
"""Build the raw SQL from a source test definition.
:param test_namespace: The test's namespace, if one exists
:param source: The source under test.
:param table: The table under test
:param test_type: The type of the test (unique_id, etc)
:param test_args: The arguments passed to the test as a list of `key=value`
strings
:return: A string of raw sql for the test node.
"""
# sort the dict so the keys are rendered deterministically (for tests)
kwargs = [as_kwarg(key, test_args[key]) for key in sorted(test_args)]

if test_namespace is None:
macro_name = "test_{}".format(test_type)
else:
macro_name = "{}.test_{}".format(test_namespace, test_type)

raw_sql = (
"{{{{ {macro}(model=source('{source}', '{table}'), {kwargs}) }}}}"
.format(
source=source['name'],
table=table['name'],
macro=macro_name,
kwargs=", ".join(kwargs))
TEST_NAME_PATTERN = re.compile(
r'((?P<test_namespace>([a-zA-Z_][0-9a-zA-Z_]*))\.)?'
r'(?P<test_name>([a-zA-Z_][0-9a-zA-Z_]*))'
)
return raw_sql
# map magic keys to default values
MODIFIER_ARGS = {'severity': 'ERROR'}

def __init__(self, test, target, column_name, package_name):
test_name, test_args = self.extract_test_args(test, column_name)
self.args = test_args
self.package_name = package_name
self.target = target

match = self.TEST_NAME_PATTERN.match(test_name)
if match is None:
dbt.exceptions.raise_compiler_error(
'Test name string did not match expected pattern: {}'
.format(test_name)
)

def calculate_test_namespace(test_type, package_name):
test_namespace = None
split = test_type.split('.')
if len(split) > 1:
test_type = split[1]
package_name = split[0]
test_namespace = package_name
groups = match.groupdict()
self.name = groups['test_name']
self.namespace = groups['test_namespace']
self.modifiers = {}
for key, default in self.MODIFIER_ARGS.items():
self.modifiers[key] = self.args.pop(key, default)

return test_namespace, test_type, package_name
if self.namespace is not None:
self.package_name = self.namespace

@staticmethod
def extract_test_args(test, name=None):
if not isinstance(test, dict):
dbt.exceptions.raise_compiler_error(
'test must be dict or str, got {} (value {})'.format(
type(test), test
)
)

def _build_test_args(test, name):
if isinstance(test, basestring):
test_name = test
test_args = {}
elif isinstance(test, dict):
test = list(test.items())
if len(test) != 1:
dbt.exceptions.raise_compiler_error(
'test definition dictionary must have exactly one key, got'
' {} instead ({} keys)'.format(test, len(test))
)
test_name, test_args = test[0]
else:
dbt.exceptions.raise_compiler_error(
'test must be dict or str, got {} (value {})'.format(
type(test), test

if not isinstance(test_args, dict):
dbt.exceptions.raise_compiler_error(
'test arguments must be dict, got {} (value {})'.format(
type(test_args), test_args
)
)
)
if not isinstance(test_args, dict):
dbt.exceptions.raise_compiler_error(
'test arguments must be dict, got {} (value {})'.format(
type(test_args), test_args
if not isinstance(test_name, basestring):
dbt.exceptions.raise_compiler_error(
'test name must be a str, got {} (value {})'.format(
type(test_name), test_name
)
)
if name is not None:
test_args['column_name'] = name
return test_name, test_args

def severity(self):
return self.modifiers.get('severity', 'ERROR').upper()

def test_kwargs_str(self):
# sort the dict so the keys are rendered deterministically (for tests)
return ', '.join((
as_kwarg(key, self.args[key])
for key in sorted(self.args)
))

def macro_name(self):
macro_name = 'test_{}'.format(self.name)
if self.namespace is not None:
macro_name = "{}.{}".format(self.namespace, macro_name)
return macro_name

def build_model_str(self):
raise NotImplementedError('build_model_str not implemented!')

def get_test_name(self):
raise NotImplementedError('get_test_name not implemented!')

def build_raw_sql(self):
return (
"{{{{ config(severity='{severity}') }}}}"
"{{{{ {macro}(model={model}, {kwargs}) }}}}"
).format(
model=self.build_model_str(),
macro=self.macro_name(),
kwargs=self.test_kwargs_str(),
severity=self.severity()
)
if not isinstance(test_name, basestring):
dbt.exceptions.raise_compiler_error(
'test name must be a str, got {} (value {})'.format(
type(test_name), test_name
)


class RefTestBuilder(TestBuilder):
def build_model_str(self):
return "ref('{}')".format(self.target['name'])

def get_test_name(self):
return get_nice_schema_test_name(self.name,
self.target['name'],
self.args)

def describe_test_target(self):
return 'model "{}"'.format(self.target)


class SourceTestBuilder(TestBuilder):
def build_model_str(self):
return "source('{}', '{}')".format(
self.target['source']['name'],
self.target['table']['name']
)
if name is not None:
test_args['column_name'] = name
return test_name, test_args

def get_test_name(self):
target_name = '{}_{}'.format(self.target['source']['name'],
self.target['table']['name'])
return get_nice_schema_test_name(
'source_' + self.name,
target_name,
self.args
)

def describe_test_target(self):
return 'source "{0[source]}.{0[table]}"'.format(self.target)


def warn_invalid(filepath, key, value, explain):
Expand Down Expand Up @@ -212,6 +247,8 @@ def add(self, column_name, description):


class SchemaBaseTestParser(MacrosKnownParser):
Builder = TestBuilder

def _parse_column(self, target, column, package_name, root_dir, path,
refs):
# this should yield ParsedNodes where resource_type == NodeType.Test
Expand All @@ -237,53 +274,38 @@ def _parse_column(self, target, column, package_name, root_dir, path,
)
continue

def _build_raw_sql(self, test_namespace, target, test_type, test_args):
raise NotImplementedError

def _generate_test_name(self, target, test_type, test_args):
"""Returns a hashed_name, full_name pair."""
raise NotImplementedError

@staticmethod
def _describe_test_target(test_target):
raise NotImplementedError

def build_test_node(self, test_target, package_name, test, root_dir, path,
column_name=None):
"""Build a test node against the given target (a model or a source).
:param test_target: An unparsed form of the target.
"""
test_type, test_args = _build_test_args(test, column_name)
if isinstance(test, basestring):
test = {test: {}}

test_namespace, test_type, package_name = calculate_test_namespace(
test_type, package_name
)
test_info = self.Builder(test, test_target, column_name, package_name)

source_package = self.all_projects.get(package_name)
source_package = self.all_projects.get(test_info.package_name)
if source_package is None:
desc = '"{}" test on {}'.format(
test_type, self._describe_test_target(test_target)
test_info.name, test_info.describe_test_target()
)
dbt.exceptions.raise_dep_not_found(None, desc, test_namespace)
dbt.exceptions.raise_dep_not_found(None, desc, test_info.namespace)

test_path = os.path.basename(path)

hashed_name, full_name = self._generate_test_name(test_target,
test_type,
test_args)
hashed_name, full_name = test_info.get_test_name()

hashed_path = get_pseudo_test_path(hashed_name, test_path,
'schema_test')

full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
test_args)
raw_sql = test_info.build_raw_sql()

unparsed = UnparsedNode(
name=full_name,
resource_type=NodeType.Test,
package_name=package_name,
package_name=test_info.package_name,
root_path=root_dir,
path=hashed_path,
original_file_path=path,
Expand Down Expand Up @@ -318,15 +340,7 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,


class SchemaModelParser(SchemaBaseTestParser):
def _build_raw_sql(self, test_namespace, target, test_type, test_args):
return build_test_raw_sql(test_namespace, target, test_type, test_args)

def _generate_test_name(self, target, test_type, test_args):
return get_nice_schema_test_name(test_type, target['name'], test_args)

@staticmethod
def _describe_test_target(test_target):
return 'model "{}"'.format(test_target)
Builder = RefTestBuilder

def parse_models_entry(self, model_dict, path, package_name, root_dir):
model_name = model_dict['name']
Expand Down Expand Up @@ -381,6 +395,8 @@ def parse_all(self, models, path, package_name, root_dir):


class SchemaSourceParser(SchemaBaseTestParser):
Builder = SourceTestBuilder

def __init__(self, root_project_config, all_projects, macro_manifest):
super(SchemaSourceParser, self).__init__(
root_project_config=root_project_config,
Expand All @@ -389,16 +405,16 @@ def __init__(self, root_project_config, all_projects, macro_manifest):
)
self._renderer = ConfigRenderer(self.root_project_config.cli_vars)

def _build_raw_sql(self, test_namespace, target, test_type, test_args):
return build_source_test_raw_sql(test_namespace, target['source'],
target['table'], test_type,
test_args)
def _build_raw_sql(self, test_info):
return test_info.build_source_test_raw_sql()

def _generate_test_name(self, target, test_type, test_args):
def _generate_test_name(self, test_info):
target_name = '{}_{}'.format(test_info.target['source']['name'],
test_info.target['table']['name'])
return get_nice_schema_test_name(
'source_' + test_type,
'{}_{}'.format(target['source']['name'], target['table']['name']),
test_args
'source_' + test_info.name,
target_name,
test_info.args
)

@staticmethod
Expand Down
Loading

0 comments on commit abcbaca

Please sign in to comment.