Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check column descriptions #171

Merged
merged 2 commits into from
Aug 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dbt-bouncer-example.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
dbt_artifacts_dir: dbt_project/target # [Optional] Directory where the dbt artifacts exists, generally the `target` directory inside a dbt project. Defaults to `./target`.

catalog_checks:
- name: check_column_description_populated
include: ^marts
- name: check_column_name_complies_to_column_type
column_name_pattern: ^is_.*
exclude: ^staging
Expand Down
43 changes: 43 additions & 0 deletions src/dbt_bouncer/checks/catalog/check_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,49 @@
from dbt_bouncer.utils import bouncer_check


class CheckColumnDescriptionPopulated(BaseCheck):
name: Literal["check_column_description_populated"]


@pytest.mark.iterate_over_catalog_nodes
@bouncer_check
def check_column_description_populated(
models: List[DbtBouncerModel],
request: TopRequest,
catalog_node: Union[CatalogTable, None] = None,
**kwargs,
) -> None:
"""
Columns must have a populated description.

Receives:
catalog_node (CatalogTable): The CatalogTable object to check.
exclude (Optional[str]): Regex pattern to match the model path. Model paths that match the pattern will not be checked.
include (Optional[str]): Regex pattern to match the model path. Only model paths that match the pattern will be checked.

Example(s):
```yaml
manifest_checks:
- name: check_column_description_populated
include: ^marts
```
"""

if catalog_node.unique_id.split(".")[0] == "model":
model = [m for m in models if m.unique_id == catalog_node.unique_id][0]
non_complying_columns = []
for _, v in catalog_node.columns.items():
if (
model.columns.get(v.name) is None
or len(model.columns[v.name].description.strip()) <= 4
):
non_complying_columns.append(v.name)

assert (
not non_complying_columns
), f"`{catalog_node.unique_id.split('.')[-1]}` has columns that do not have a populated description: {non_complying_columns}"


class CheckColumnNameCompliesToColumnType(BaseCheck):
column_name_pattern: str
name: Literal["check_column_name_complies_to_column_type"]
Expand Down
122 changes: 122 additions & 0 deletions tests/unit/checks/catalog/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,135 @@
from dbt_artifacts_parser.parsers.manifest.manifest_v12 import Nodes4, Nodes6

from dbt_bouncer.checks.catalog.check_columns import (
check_column_description_populated,
check_column_has_specified_test,
check_column_name_complies_to_column_type,
check_columns_are_all_documented,
check_columns_are_documented_in_public_models,
)


@pytest.mark.parametrize(
"catalog_node, models, expectation",
[
(
CatalogTable(
**{
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"metadata": {
"name": "table_1",
"schema": "main",
"type": "VIEW",
},
"stats": {},
"unique_id": "model.package_name.model_1",
}
),
[
Nodes4(
**{
"alias": "model_1",
"checksum": {"name": "sha256", "checksum": ""},
"columns": {
"col_1": {
"description": "This is a description",
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"description": "This is a description",
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"fqn": ["package_name", "model_1"],
"name": "model_1",
"original_file_path": "model_1.sql",
"package_name": "package_name",
"path": "model_1.sql",
"resource_type": "model",
"schema": "main",
"unique_id": "model.package_name.model_1",
}
)
],
does_not_raise(),
),
(
CatalogTable(
**{
"columns": {
"col_1": {
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"metadata": {
"name": "table_1",
"schema": "main",
"type": "VIEW",
},
"stats": {},
"unique_id": "model.package_name.model_2",
}
),
[
Nodes4(
**{
"alias": "model_2",
"checksum": {"name": "sha256", "checksum": ""},
"columns": {
"col_1": {
"description": "This is a description",
"index": 1,
"name": "col_1",
"type": "INTEGER",
},
"col_2": {
"index": 2,
"name": "col_2",
"type": "INTEGER",
},
},
"fqn": ["package_name", "model_2"],
"name": "model_2",
"original_file_path": "model_2.sql",
"package_name": "package_name",
"path": "model_2.sql",
"resource_type": "model",
"schema": "main",
"unique_id": "model.package_name.model_2",
}
)
],
pytest.raises(AssertionError),
),
],
)
def test_check_column_description_populated(catalog_node, models, expectation):
with expectation:
check_column_description_populated(catalog_node=catalog_node, models=models, request=None)


@pytest.mark.parametrize(
"catalog_node, column_name_pattern, test_name, tests, expectation",
[
Expand Down