Custom names for generic tests (#4898)

* Support user-supplied name for generic tests * Support dict-style generic test spec * Add changelog entry * Add TODO comment * Rework raise_duplicate_resource_name * Add functional tests * Update comments, rm TODO * PR feedback
dbt-labs · Mar 25, 2022 · 5071b00 · 5071b00
1 parent 81118d9
commit 5071b00
Show file tree

Hide file tree

Showing 8 changed files with 243 additions and 42 deletions.
diff --git a/.changes/unreleased/Features-20220318-085756.yaml b/.changes/unreleased/Features-20220318-085756.yaml
@@ -0,0 +1,7 @@
+kind: Features
+body: Support custom names for generic tests
+time: 2022-03-18T08:57:56.05584+01:00
+custom:
+  Author: jtcohen6
+  Issue: "3348"
+  PR: "4898"
diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py
@@ -838,31 +838,47 @@ def raise_duplicate_macro_name(node_1, node_2, namespace) -> NoReturn:
 
 def raise_duplicate_resource_name(node_1, node_2):
     duped_name = node_1.name
+    node_type = NodeType(node_1.resource_type)
+    pluralized = (
+        node_type.pluralize()
+        if node_1.resource_type == node_2.resource_type
+        else "resources"  # still raise if ref() collision, e.g. model + seed
+    )
 
-    if node_1.resource_type in NodeType.refable():
-        get_func = 'ref("{}")'.format(duped_name)
-    elif node_1.resource_type == NodeType.Source:
+    action = "looking for"
+    # duplicate 'ref' targets
+    if node_type in NodeType.refable():
+        formatted_name = f'ref("{duped_name}")'
+    # duplicate sources
+    elif node_type == NodeType.Source:
         duped_name = node_1.get_full_source_name()
-        get_func = node_1.get_source_representation()
-    elif node_1.resource_type == NodeType.Documentation:
-        get_func = 'doc("{}")'.format(duped_name)
-    elif node_1.resource_type == NodeType.Test and "schema" in node_1.tags:
-        return
+        formatted_name = node_1.get_source_representation()
+    # duplicate docs blocks
+    elif node_type == NodeType.Documentation:
+        formatted_name = f'doc("{duped_name}")'
+    # duplicate generic tests
+    elif node_type == NodeType.Test and hasattr(node_1, "test_metadata"):
+        column_name = f'column "{node_1.column_name}" in ' if node_1.column_name else ""
+        model_name = node_1.file_key_name
+        duped_name = f'{node_1.name}" defined on {column_name}"{model_name}'
+        action = "running"
+        formatted_name = "tests"
+    # all other resource types
     else:
-        get_func = '"{}"'.format(duped_name)
+        formatted_name = duped_name
 
+    # should this be raise_parsing_error instead?
     raise_compiler_error(
-        'dbt found two resources with the name "{}". Since these resources '
-        "have the same name,\ndbt will be unable to find the correct resource "
-        "when {} is used. To fix this,\nchange the name of one of "
-        "these resources:\n- {} ({})\n- {} ({})".format(
-            duped_name,
-            get_func,
-            node_1.unique_id,
-            node_1.original_file_path,
-            node_2.unique_id,
-            node_2.original_file_path,
-        )
+        f"""
+dbt found two {pluralized} with the name "{duped_name}".
+
+Since these resources have the same name, dbt will be unable to find the correct resource
+when {action} {formatted_name}.
+
+To fix this, change the name of one of these resources:
+- {node_1.unique_id} ({node_1.original_file_path})
+- {node_2.unique_id} ({node_2.original_file_path})
+    """.strip()
     )
 
 

diff --git a/core/dbt/parser/generic_test_builders.py b/core/dbt/parser/generic_test_builders.py
@@ -25,9 +25,13 @@
 from dbt.parser.search import FileBlock
 
 
-def get_nice_generic_test_name(
+def synthesize_generic_test_names(
     test_type: str, test_name: str, args: Dict[str, Any]
 ) -> Tuple[str, str]:
+    # Using the type, name, and arguments to this generic test, synthesize a (hopefully) unique name
+    # Will not be unique if multiple tests have same name + arguments, and only configs differ
+    # Returns a shorter version (hashed/truncated, for the compiled file)
+    # as well as the full name (for the unique_id + FQN)
     flat_args = []
     for arg_name in sorted(args):
         # the model is already embedded in the name, so skip it
@@ -263,13 +267,25 @@ def __init__(
         if self.namespace is not None:
             self.package_name = self.namespace
 
-        compiled_name, fqn_name = self.get_test_name()
-        self.compiled_name: str = compiled_name
-        self.fqn_name: str = fqn_name
-
-        # use hashed name as alias if too long
-        if compiled_name != fqn_name and "alias" not in self.config:
-            self.config["alias"] = compiled_name
+        # If the user has provided a custom name for this generic test, use it
+        # Then delete the "name" argument to avoid passing it into the test macro
+        # Otherwise, use an auto-generated name synthesized from test inputs
+        self.compiled_name: str = ""
+        self.fqn_name: str = ""
+
+        if "name" in self.args:
+            # Assign the user-defined name here, which will be checked for uniqueness later
+            # we will raise an error if two tests have same name for same model + column combo
+            self.compiled_name = self.args["name"]
+            self.fqn_name = self.args["name"]
+            del self.args["name"]
+        else:
+            short_name, full_name = self.get_synthetic_test_names()
+            self.compiled_name = short_name
+            self.fqn_name = full_name
+            # use hashed name as alias if full name is too long
+            if short_name != full_name and "alias" not in self.config:
+                self.config["alias"] = short_name
 
     def _bad_type(self) -> TypeError:
         return TypeError('invalid target type "{}"'.format(type(self.target)))
@@ -281,13 +297,23 @@ def extract_test_args(test, name=None) -> Tuple[str, Dict[str, Any]]:
                 "test must be dict or str, got {} (value {})".format(type(test), test)
             )
 
-        test = list(test.items())
-        if len(test) != 1:
-            raise_parsing_error(
-                "test definition dictionary must have exactly one key, got"
-                " {} instead ({} keys)".format(test, len(test))
-            )
-        test_name, test_args = test[0]
+        # If the test is a dictionary with top-level keys, the test name is "test_name"
+        # and the rest are arguments
+        # {'name': 'my_favorite_test', 'test_name': 'unique', 'config': {'where': '1=1'}}
+        if "test_name" in test.keys():
+            test_name = test.pop("test_name")
+            test_args = test
+        # If the test is a nested dictionary with one top-level key, the test name
+        # is the dict name, and nested keys are arguments
+        # {'unique': {'name': 'my_favorite_test', 'config': {'where': '1=1'}}}
+        else:
+            test = list(test.items())
+            if len(test) != 1:
+                raise_parsing_error(
+                    "test definition dictionary must have exactly one key, got"
+                    " {} instead ({} keys)".format(test, len(test))
+                )
+            test_name, test_args = test[0]
 
         if not isinstance(test_args, dict):
             raise_parsing_error(
@@ -401,7 +427,8 @@ def macro_name(self) -> str:
             macro_name = "{}.{}".format(self.namespace, macro_name)
         return macro_name
 
-    def get_test_name(self) -> Tuple[str, str]:
+    def get_synthetic_test_names(self) -> Tuple[str, str]:
+        # Returns two names: shorter (for the compiled file), full (for the unique_id + FQN)
         if isinstance(self.target, UnparsedNodeUpdate):
             name = self.name
         elif isinstance(self.target, UnpatchedSourceDefinition):
@@ -410,7 +437,7 @@ def get_test_name(self) -> Tuple[str, str]:
             raise self._bad_type()
         if self.namespace is not None:
             name = "{}_{}".format(self.namespace, name)
-        return get_nice_generic_test_name(name, self.target.name, self.args)
+        return synthesize_generic_test_names(name, self.target.name, self.args)
 
     def construct_config(self) -> str:
         configs = ",".join(

diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py b/test/integration/025_duplicate_model_tests/test_duplicate_exposure.py
@@ -14,7 +14,7 @@ def models(self):
 
     @use_profile("postgres")
     def test_postgres_duplicate_exposure(self):
-        message = "dbt found two resources with the name"
+        message = "dbt found two exposures with the name"
         try:
             self.run_dbt(["compile"])
             self.assertTrue(False, "dbt did not throw for duplicate exposures")

diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_model.py b/test/integration/025_duplicate_model_tests/test_duplicate_model.py
@@ -14,7 +14,7 @@ def models(self):
 
     @use_profile("postgres")
     def test_postgres_duplicate_model_enabled(self):
-        message = "dbt found two resources with the name"
+        message = "dbt found two models with the name"
         try:
             self.run_dbt(["run"])
             self.assertTrue(False, "dbt did not throw for duplicate models")
@@ -80,7 +80,7 @@ def packages_config(self):
     @use_profile("postgres")
     def test_postgres_duplicate_model_enabled_across_packages(self):
         self.run_dbt(["deps"])
-        message = "dbt found two resources with the name"
+        message = "dbt found two models with the name"
         try:
             self.run_dbt(["run"])
             self.assertTrue(False, "dbt did not throw for duplicate models")

diff --git a/test/integration/025_duplicate_model_tests/test_duplicate_source.py b/test/integration/025_duplicate_model_tests/test_duplicate_source.py
@@ -14,7 +14,7 @@ def models(self):
 
     @use_profile("postgres")
     def test_postgres_duplicate_source_enabled(self):
-        message = "dbt found two resources with the name"
+        message = "dbt found two sources with the name"
         try:
             self.run_dbt(["compile"])
             self.assertTrue(False, "dbt did not throw for duplicate sources")

diff --git a/tests/functional/schema_tests/fixtures.py b/tests/functional/schema_tests/fixtures.py
@@ -420,6 +420,73 @@
 SELECT 'NOT_NULL' AS id
 """
 
+
+dupe_generic_tests_collide__schema_yml = """
+version: 2
+models:
+- name: model_a
+  columns:
+  - name: id
+    tests:
+    - not_null:
+        config:
+          where: "1=1"
+    - not_null:
+        config:
+          where: "1=2"
+
+"""
+
+dupe_generic_tests_collide__model_a = """
+SELECT 'NOT_NULL' AS id
+"""
+
+
+custom_generic_test_names__schema_yml = """
+version: 2
+models:
+- name: model_a
+  columns:
+  - name: id
+    tests:
+    - not_null:
+        name: not_null_where_1_equals_1
+        config:
+          where: "1=1"
+    - not_null:
+        name: not_null_where_1_equals_2
+        config:
+          where: "1=2"
+
+"""
+
+custom_generic_test_names__model_a = """
+SELECT 'NOT_NULL' AS id
+"""
+
+custom_generic_test_names_alt_format__schema_yml = """
+version: 2
+models:
+- name: model_a
+  columns:
+  - name: id
+    tests:
+    - name: not_null_where_1_equals_1
+      test_name: not_null
+      config:
+        where: "1=1"
+    - name: not_null_where_1_equals_2
+      test_name: not_null
+      config:
+        where: "1=2"
+
+"""
+
+custom_generic_test_names_alt_format__model_a = """
+SELECT 'NOT_NULL' AS id
+"""
+
+
 test_context_where_subq_macros__custom_generic_test_sql = """
 /*{# This test will fail if get_where_subquery() is missing from TestContext + TestMacroNamespace #}*/
 
@@ -1266,6 +1333,30 @@ def name_collision():
     }
 
 
+@pytest.fixture(scope="class")
+def dupe_tests_collide():
+    return {
+        "schema.yml": dupe_generic_tests_collide__schema_yml,
+        "model_a.sql": dupe_generic_tests_collide__model_a,
+    }
+
+
+@pytest.fixture(scope="class")
+def custom_generic_test_names():
+    return {
+        "schema.yml": custom_generic_test_names__schema_yml,
+        "model_a.sql": custom_generic_test_names__model_a,
+    }
+
+
+@pytest.fixture(scope="class")
+def custom_generic_test_names_alt_format():
+    return {
+        "schema.yml": custom_generic_test_names_alt_format__schema_yml,
+        "model_a.sql": custom_generic_test_names_alt_format__model_a,
+    }
+
+
 @pytest.fixture(scope="class")
 def test_context_where_subq_macros():
     return {"custom_generic_test.sql": test_context_where_subq_macros__custom_generic_test_sql}

diff --git a/tests/functional/schema_tests/test_schema_v2_tests.py b/tests/functional/schema_tests/test_schema_v2_tests.py
@@ -16,6 +16,9 @@
     seeds,
     test_context_models,
     name_collision,
+    dupe_tests_collide,
+    custom_generic_test_names,
+    custom_generic_test_names_alt_format,
     test_context_where_subq_macros,
     invalid_schema_models,
     all_models,
@@ -25,7 +28,7 @@
     project_files,
     case_sensitive_models__uppercase_SQL,
 )
-from dbt.exceptions import ParsingException
+from dbt.exceptions import ParsingException, CompilationException
 from dbt.contracts.results import TestStatus
 
 
@@ -658,6 +661,63 @@ def test_collision_test_names_get_hash(
         assert test_results[1].node.unique_id in expected_unique_ids
 
 
+class TestGenericTestsCollide:
+    @pytest.fixture(scope="class")
+    def models(self, dupe_tests_collide):  # noqa: F811
+        return dupe_tests_collide
+
+    def test_generic_test_collision(
+        self,
+        project,
+    ):
+        """These tests collide, since only the configs differ"""
+        with pytest.raises(CompilationException) as exc:
+            run_dbt()
+        assert "dbt found two tests with the name" in str(exc)
+
+
+class TestGenericTestsCustomNames:
+    @pytest.fixture(scope="class")
+    def models(self, custom_generic_test_names):  # noqa: F811
+        return custom_generic_test_names
+
+    # users can define custom names for specific instances of generic tests
+    def test_generic_tests_with_custom_names(
+        self,
+        project,
+    ):
+        """These tests don't collide, since they have user-provided custom names"""
+        results = run_dbt()
+        test_results = run_dbt(["test"])
+
+        # model + both tests run
+        assert len(results) == 1
+        assert len(test_results) == 2
+
+        # custom names propagate to the unique_id
+        expected_unique_ids = [
+            "test.test.not_null_where_1_equals_1.7b96089006",
+            "test.test.not_null_where_1_equals_2.8ae586e17f",
+        ]
+        assert test_results[0].node.unique_id in expected_unique_ids
+        assert test_results[1].node.unique_id in expected_unique_ids
+
+
+class TestGenericTestsCustomNamesAltFormat(TestGenericTestsCustomNames):
+    @pytest.fixture(scope="class")
+    def models(self, custom_generic_test_names_alt_format):  # noqa: F811
+        return custom_generic_test_names_alt_format
+
+    # exactly as above, just alternative format for yaml definition
+    def test_collision_test_names_get_hash(
+        self,
+        project,
+    ):
+        """These tests don't collide, since they have user-provided custom names,
+        defined using an alternative format"""
+        super().test_generic_tests_with_custom_names(project)
+
+
 class TestInvalidSchema:
     @pytest.fixture(scope="class")
     def models(self, invalid_schema_models):  # noqa: F811