godatadriven · pgoslatara · Aug 26, 2024 · Aug 26, 2024 · Aug 26, 2024
@@ -1,6 +1,8 @@
 dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artifacts exists, generally the `target` directory inside a dbt project. Defaults to `./target`.
 
 catalog_checks:
+  - name: check_column_description_populated
+    include: ^marts
   - name: check_column_name_complies_to_column_type
     column_name_pattern: ^is_.*
     exclude: ^staging

@@ -16,6 +16,49 @@
 from dbt_bouncer.utils import bouncer_check
 
 
+class CheckColumnDescriptionPopulated(BaseCheck):
+    name: Literal["check_column_description_populated"]
+
+
+@pytest.mark.iterate_over_catalog_nodes
+@bouncer_check
+def check_column_description_populated(
+    models: List[DbtBouncerModel],
+    request: TopRequest,
+    catalog_node: Union[CatalogTable, None] = None,
+    **kwargs,
+) -> None:
+    """
+    Columns must have a populated description.
+
+    Receives:
+        catalog_node (CatalogTable): The CatalogTable object to check.
+        exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked.
+        include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked.
+
+    Example(s):
+        ```yaml
+        manifest_checks:
+            - name: check_column_description_populated
+              include: ^marts
+        ```
+    """
+
+    if catalog_node.unique_id.split(".")[0] == "model":
+        model = [m for m in models if m.unique_id == catalog_node.unique_id][0]
+        non_complying_columns = []
+        for _, v in catalog_node.columns.items():
+            if (
+                model.columns.get(v.name) is None
+                or len(model.columns[v.name].description.strip()) <= 4
+            ):
+                non_complying_columns.append(v.name)
+
+        assert (
+            not non_complying_columns
+        ), f"`{catalog_node.unique_id.split('.')[-1]}` has columns that do not have a populated description: {non_complying_columns}"
+
+
 class CheckColumnNameCompliesToColumnType(BaseCheck):
     column_name_pattern: str
     name: Literal["check_column_name_complies_to_column_type"]

@@ -9,13 +9,135 @@
     from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4, Nodes6
 
 from dbt_bouncer.checks.catalog.check_columns import (
+    check_column_description_populated,
     check_column_has_specified_test,
     check_column_name_complies_to_column_type,
     check_columns_are_all_documented,
     check_columns_are_documented_in_public_models,
 )
 
 
+@pytest.mark.parametrize(
+    "catalog_node, models, expectation",
+    [
+        (
+            CatalogTable(
+                **{
+                    "columns": {
+                        "col_1": {
+                            "index": 1,
+                            "name": "col_1",
+                            "type": "INTEGER",
+                        },
+                        "col_2": {
+                            "index": 2,
+                            "name": "col_2",
+                            "type": "INTEGER",
+                        },
+                    },
+                    "metadata": {
+                        "name": "table_1",
+                        "schema": "main",
+                        "type": "VIEW",
+                    },
+                    "stats": {},
+                    "unique_id": "model.package_name.model_1",
+                }
+            ),
+            [
+                Nodes4(
+                    **{
+                        "alias": "model_1",
+                        "checksum": {"name": "sha256", "checksum": ""},
+                        "columns": {
+                            "col_1": {
+                                "description": "This is a description",
+                                "index": 1,
+                                "name": "col_1",
+                                "type": "INTEGER",
+                            },
+                            "col_2": {
+                                "description": "This is a description",
+                                "index": 2,
+                                "name": "col_2",
+                                "type": "INTEGER",
+                            },
+                        },
+                        "fqn": ["package_name", "model_1"],
+                        "name": "model_1",
+                        "original_file_path": "model_1.sql",
+                        "package_name": "package_name",
+                        "path": "model_1.sql",
+                        "resource_type": "model",
+                        "schema": "main",
+                        "unique_id": "model.package_name.model_1",
+                    }
+                )
+            ],
+            does_not_raise(),
+        ),
+        (
+            CatalogTable(
+                **{
+                    "columns": {
+                        "col_1": {
+                            "index": 1,
+                            "name": "col_1",
+                            "type": "INTEGER",
+                        },
+                        "col_2": {
+                            "index": 2,
+                            "name": "col_2",
+                            "type": "INTEGER",
+                        },
+                    },
+                    "metadata": {
+                        "name": "table_1",
+                        "schema": "main",
+                        "type": "VIEW",
+                    },
+                    "stats": {},
+                    "unique_id": "model.package_name.model_2",
+                }
+            ),
+            [
+                Nodes4(
+                    **{
+                        "alias": "model_2",
+                        "checksum": {"name": "sha256", "checksum": ""},
+                        "columns": {
+                            "col_1": {
+                                "description": "This is a description",
+                                "index": 1,
+                                "name": "col_1",
+                                "type": "INTEGER",
+                            },
+                            "col_2": {
+                                "index": 2,
+                                "name": "col_2",
+                                "type": "INTEGER",
+                            },
+                        },
+                        "fqn": ["package_name", "model_2"],
+                        "name": "model_2",
+                        "original_file_path": "model_2.sql",
+                        "package_name": "package_name",
+                        "path": "model_2.sql",
+                        "resource_type": "model",
+                        "schema": "main",
+                        "unique_id": "model.package_name.model_2",
+                    }
+                )
+            ],
+            pytest.raises(AssertionError),
+        ),
+    ],
+)
+def test_check_column_description_populated(catalog_node, models, expectation):
+    with expectation:
+        check_column_description_populated(catalog_node=catalog_node, models=models, request=None)
+
+
 @pytest.mark.parametrize(
     "catalog_node, column_name_pattern, test_name, tests, expectation",
     [