Skip to content

Commit

Permalink
Custom names for generic tests (#4898)
Browse files Browse the repository at this point in the history
* Support user-supplied name for generic tests

* Support dict-style generic test spec

* Add changelog entry

* Add TODO comment

* Rework raise_duplicate_resource_name

* Add functional tests

* Update comments, rm TODO

* PR feedback
  • Loading branch information
jtcohen6 committed Mar 25, 2022
1 parent 81118d9 commit 5071b00
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 42 deletions.
7 changes: 7 additions & 0 deletions .changes/unreleased/Features-20220318-085756.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Features
body: Support custom names for generic tests
time: 2022-03-18T08:57:56.05584+01:00
custom:
Author: jtcohen6
Issue: "3348"
PR: "4898"
56 changes: 36 additions & 20 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,31 +838,47 @@ def raise_duplicate_macro_name(node_1, node_2, namespace) -> NoReturn:

def raise_duplicate_resource_name(node_1, node_2):
duped_name = node_1.name
node_type = NodeType(node_1.resource_type)
pluralized = (
node_type.pluralize()
if node_1.resource_type == node_2.resource_type
else "resources" # still raise if ref() collision, e.g. model + seed
)

if node_1.resource_type in NodeType.refable():
get_func = 'ref("{}")'.format(duped_name)
elif node_1.resource_type == NodeType.Source:
action = "looking for"
# duplicate 'ref' targets
if node_type in NodeType.refable():
formatted_name = f'ref("{duped_name}")'
# duplicate sources
elif node_type == NodeType.Source:
duped_name = node_1.get_full_source_name()
get_func = node_1.get_source_representation()
elif node_1.resource_type == NodeType.Documentation:
get_func = 'doc("{}")'.format(duped_name)
elif node_1.resource_type == NodeType.Test and "schema" in node_1.tags:
return
formatted_name = node_1.get_source_representation()
# duplicate docs blocks
elif node_type == NodeType.Documentation:
formatted_name = f'doc("{duped_name}")'
# duplicate generic tests
elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"):
column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else ""
model_name = node_1.file_key_name
duped_name = f'{node_1.name}" defined on {column_name}"{model_name}'
action = "running"
formatted_name = "tests"
# all other resource types
else:
get_func = '"{}"'.format(duped_name)
formatted_name = duped_name

# should this be raise_parsing_error instead?
raise_compiler_error(
'dbt found two resources with the name "{}". Since these resources '
"have the same name,\ndbt will be unable to find the correct resource "
"when {} is used. To fix this,\nchange the name of one of "
"these resources:\n- {} ({})\n- {} ({})".format(
duped_name,
get_func,
node_1.unique_id,
node_1.original_file_path,
node_2.unique_id,
node_2.original_file_path,
)
f"""
dbt found two {pluralized} with the name "{duped_name}".
Since these resources have the same name, dbt will be unable to find the correct resource
when {action} {formatted_name}.
To fix this, change the name of one of these resources:
- {node_1.unique_id} ({node_1.original_file_path})
- {node_2.unique_id} ({node_2.original_file_path})
""".strip()
)


Expand Down
61 changes: 44 additions & 17 deletions core/dbt/parser/generic_test_builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,13 @@
from dbt.parser.search import FileBlock


def get_nice_generic_test_name(
def synthesize_generic_test_names(
test_type: str, test_name: str, args: Dict[str, Any]
) -> Tuple[str, str]:
# Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name
# Will not be unique if multiple tests have same name + arguments, and only configs differ
# Returns a shorter version (hashed/truncated, for the compiled file)
# as well as the full name (for the unique_id + FQN)
flat_args = []
for arg_name in sorted(args):
# the model is already embedded in the name, so skip it
Expand Down Expand Up @@ -263,13 +267,25 @@ def __init__(
if self.namespace is not None:
self.package_name = self.namespace

compiled_name, fqn_name = self.get_test_name()
self.compiled_name: str = compiled_name
self.fqn_name: str = fqn_name

# use hashed name as alias if too long
if compiled_name != fqn_name and "alias" not in self.config:
self.config["alias"] = compiled_name
# If the user has provided a custom name for this generic test, use it
# Then delete the "name" argument to avoid passing it into the test macro
# Otherwise, use an auto-generated name synthesized from test inputs
self.compiled_name: str = ""
self.fqn_name: str = ""

if "name" in self.args:
# Assign the user-defined name here, which will be checked for uniqueness later
# we will raise an error if two tests have same name for same model + column combo
self.compiled_name = self.args["name"]
self.fqn_name = self.args["name"]
del self.args["name"]
else:
short_name, full_name = self.get_synthetic_test_names()
self.compiled_name = short_name
self.fqn_name = full_name
# use hashed name as alias if full name is too long
if short_name != full_name and "alias" not in self.config:
self.config["alias"] = short_name

def _bad_type(self) -> TypeError:
return TypeError('invalid target type "{}"'.format(type(self.target)))
Expand All @@ -281,13 +297,23 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]:
"test must be dict or str, got {} (value {})".format(type(test), test)
)

test = list(test.items())
if len(test) != 1:
raise_parsing_error(
"test definition dictionary must have exactly one key, got"
" {} instead ({} keys)".format(test, len(test))
)
test_name, test_args = test[0]
# If the test is a dictionary with top-level keys, the test name is "test_name"
# and the rest are arguments
# {'name': 'my_favorite_test', 'test_name': 'unique', 'config': {'where': '1=1'}}
if "test_name" in test.keys():
test_name = test.pop("test_name")
test_args = test
# If the test is a nested dictionary with one top-level key, the test name
# is the dict name, and nested keys are arguments
# {'unique': {'name': 'my_favorite_test', 'config': {'where': '1=1'}}}
else:
test = list(test.items())
if len(test) != 1:
raise_parsing_error(
"test definition dictionary must have exactly one key, got"
" {} instead ({} keys)".format(test, len(test))
)
test_name, test_args = test[0]

if not isinstance(test_args, dict):
raise_parsing_error(
Expand Down Expand Up @@ -401,7 +427,8 @@ def macro_name(self) -> str:
macro_name = "{}.{}".format(self.namespace, macro_name)
return macro_name

def get_test_name(self) -> Tuple[str, str]:
def get_synthetic_test_names(self) -> Tuple[str, str]:
# Returns two names: shorter (for the compiled file), full (for the unique_id + FQN)
if isinstance(self.target, UnparsedNodeUpdate):
name = self.name
elif isinstance(self.target, UnpatchedSourceDefinition):
Expand All @@ -410,7 +437,7 @@ def get_test_name(self) -> Tuple[str, str]:
raise self._bad_type()
if self.namespace is not None:
name = "{}_{}".format(self.namespace, name)
return get_nice_generic_test_name(name, self.target.name, self.args)
return synthesize_generic_test_names(name, self.target.name, self.args)

def construct_config(self) -> str:
configs = ",".join(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def models(self):

@use_profile("postgres")
def test_postgres_duplicate_exposure(self):
message = "dbt found two resources with the name"
message = "dbt found two exposures with the name"
try:
self.run_dbt(["compile"])
self.assertTrue(False, "dbt did not throw for duplicate exposures")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def models(self):

@use_profile("postgres")
def test_postgres_duplicate_model_enabled(self):
message = "dbt found two resources with the name"
message = "dbt found two models with the name"
try:
self.run_dbt(["run"])
self.assertTrue(False, "dbt did not throw for duplicate models")
Expand Down Expand Up @@ -80,7 +80,7 @@ def packages_config(self):
@use_profile("postgres")
def test_postgres_duplicate_model_enabled_across_packages(self):
self.run_dbt(["deps"])
message = "dbt found two resources with the name"
message = "dbt found two models with the name"
try:
self.run_dbt(["run"])
self.assertTrue(False, "dbt did not throw for duplicate models")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def models(self):

@use_profile("postgres")
def test_postgres_duplicate_source_enabled(self):
message = "dbt found two resources with the name"
message = "dbt found two sources with the name"
try:
self.run_dbt(["compile"])
self.assertTrue(False, "dbt did not throw for duplicate sources")
Expand Down
91 changes: 91 additions & 0 deletions tests/functional/schema_tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,73 @@
SELECT 'NOT_NULL' AS id
"""


dupe_generic_tests_collide__schema_yml = """
version: 2
models:
- name: model_a
columns:
- name: id
tests:
- not_null:
config:
where: "1=1"
- not_null:
config:
where: "1=2"
"""

dupe_generic_tests_collide__model_a = """
SELECT 'NOT_NULL' AS id
"""


custom_generic_test_names__schema_yml = """
version: 2
models:
- name: model_a
columns:
- name: id
tests:
- not_null:
name: not_null_where_1_equals_1
config:
where: "1=1"
- not_null:
name: not_null_where_1_equals_2
config:
where: "1=2"
"""

custom_generic_test_names__model_a = """
SELECT 'NOT_NULL' AS id
"""

custom_generic_test_names_alt_format__schema_yml = """
version: 2
models:
- name: model_a
columns:
- name: id
tests:
- name: not_null_where_1_equals_1
test_name: not_null
config:
where: "1=1"
- name: not_null_where_1_equals_2
test_name: not_null
config:
where: "1=2"
"""

custom_generic_test_names_alt_format__model_a = """
SELECT 'NOT_NULL' AS id
"""


test_context_where_subq_macros__custom_generic_test_sql = """
/*{# This test will fail if get_where_subquery() is missing from TestContext + TestMacroNamespace #}*/
Expand Down Expand Up @@ -1266,6 +1333,30 @@ def name_collision():
}


@pytest.fixture(scope="class")
def dupe_tests_collide():
return {
"schema.yml": dupe_generic_tests_collide__schema_yml,
"model_a.sql": dupe_generic_tests_collide__model_a,
}


@pytest.fixture(scope="class")
def custom_generic_test_names():
return {
"schema.yml": custom_generic_test_names__schema_yml,
"model_a.sql": custom_generic_test_names__model_a,
}


@pytest.fixture(scope="class")
def custom_generic_test_names_alt_format():
return {
"schema.yml": custom_generic_test_names_alt_format__schema_yml,
"model_a.sql": custom_generic_test_names_alt_format__model_a,
}


@pytest.fixture(scope="class")
def test_context_where_subq_macros():
return {"custom_generic_test.sql": test_context_where_subq_macros__custom_generic_test_sql}
Expand Down
62 changes: 61 additions & 1 deletion tests/functional/schema_tests/test_schema_v2_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
seeds,
test_context_models,
name_collision,
dupe_tests_collide,
custom_generic_test_names,
custom_generic_test_names_alt_format,
test_context_where_subq_macros,
invalid_schema_models,
all_models,
Expand All @@ -25,7 +28,7 @@
project_files,
case_sensitive_models__uppercase_SQL,
)
from dbt.exceptions import ParsingException
from dbt.exceptions import ParsingException, CompilationException
from dbt.contracts.results import TestStatus


Expand Down Expand Up @@ -658,6 +661,63 @@ def test_collision_test_names_get_hash(
assert test_results[1].node.unique_id in expected_unique_ids


class TestGenericTestsCollide:
@pytest.fixture(scope="class")
def models(self, dupe_tests_collide): # noqa: F811
return dupe_tests_collide

def test_generic_test_collision(
self,
project,
):
"""These tests collide, since only the configs differ"""
with pytest.raises(CompilationException) as exc:
run_dbt()
assert "dbt found two tests with the name" in str(exc)


class TestGenericTestsCustomNames:
@pytest.fixture(scope="class")
def models(self, custom_generic_test_names): # noqa: F811
return custom_generic_test_names

# users can define custom names for specific instances of generic tests
def test_generic_tests_with_custom_names(
self,
project,
):
"""These tests don't collide, since they have user-provided custom names"""
results = run_dbt()
test_results = run_dbt(["test"])

# model + both tests run
assert len(results) == 1
assert len(test_results) == 2

# custom names propagate to the unique_id
expected_unique_ids = [
"test.test.not_null_where_1_equals_1.7b96089006",
"test.test.not_null_where_1_equals_2.8ae586e17f",
]
assert test_results[0].node.unique_id in expected_unique_ids
assert test_results[1].node.unique_id in expected_unique_ids


class TestGenericTestsCustomNamesAltFormat(TestGenericTestsCustomNames):
@pytest.fixture(scope="class")
def models(self, custom_generic_test_names_alt_format): # noqa: F811
return custom_generic_test_names_alt_format

# exactly as above, just alternative format for yaml definition
def test_collision_test_names_get_hash(
self,
project,
):
"""These tests don't collide, since they have user-provided custom names,
defined using an alternative format"""
super().test_generic_tests_with_custom_names(project)


class TestInvalidSchema:
@pytest.fixture(scope="class")
def models(self, invalid_schema_models): # noqa: F811
Expand Down

0 comments on commit 5071b00

Please sign in to comment.