From 36601f18bc216c8c6bd3257e21306801dbb64d51 Mon Sep 17 00:00:00 2001 From: Marcello Victorino Date: Fri, 30 Sep 2022 16:13:35 +0100 Subject: [PATCH 1/3] Feat: iterate over multiple test cases --- README.md | 26 ++++++++++++++++++ macros/dmt_get_test_sql.sql | 19 +++++++++++-- macros/dmt_unit_test.sql | 54 ++++++++++++++++++++++++++++++++++--- 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index df6d5a9..e7571d9 100644 --- a/README.md +++ b/README.md @@ -157,3 +157,29 @@ models: - ref('raw_customers') columns: ... ``` + +### Support multiple test cases + +It is also possible to support the same model unit test over multiple test cases. Keeping the yaml config concise, but still easily identifying the specific test cases that are failing. + +* `test_case_list`: new optional field, expects an array with the test cases to be applied on `input_mapping` and `expected_output` +* `@`: The special character to be used as template for string substitution, iterating through the values of `test_case_list` +* `\`: the special character added to the beginning of the value. This is required to stop Jinja automatically rendering the value (i.e: `ref()`, `source()` etc.) + +```yaml +models: + - name: stg_customers + tests: + - dbt_datamocktool.unit_test: + tags: + - example_tag_unit_test + test_case_list: [1,2,3,5,8,13,21] + input_mapping: + source('jaffle_shop', 'raw_customers'): \ref('dmt__raw_customers_@') + expected_output: \ref('dmt__expected_stg_customers_@') +``` + +This will result in a **single** test being executed. But under the hood, all test cases listed are checked. Any error count is passed as the test output. + +In case of any failures, the **single** test will return a count of errors > 0, making it a "dbt-test failure". The full log will contain a list of all failing tests. + diff --git a/macros/dmt_get_test_sql.sql b/macros/dmt_get_test_sql.sql index f247074..be2f921 100644 --- a/macros/dmt_get_test_sql.sql +++ b/macros/dmt_get_test_sql.sql @@ -28,11 +28,26 @@ {% set ns.test_sql = render(ns.test_sql)|replace(ns.rendered_keys[k], v) %} {% endfor %} - {% set mock_model_relation = dbt_datamocktool._get_model_to_mock( + {# Store model result for visibility in case of unit test failure #} + {% set identifier_name = model.name %} + {% if test_case %} + {% set identifier_name = identifier_name ~ '__test_case_'~test_case %} + {% endif %} + {# Note: possible to hardcode desired database.schema to store the mocked model output #} + {% set mock_model_relation = api.Relation.create( + database=model.database, + schema=model.schema, + identifier=identifier_name, + type='view') %} + {# Create view to expose full definition #} + {% do run_query(create_view_as(relation, ns.test_sql)) %} + + {# FIXME: commenting out as not sure best way to incorporate the dispatch pattern #} + {# {% set mock_model_relation = dbt_datamocktool._get_model_to_mock( model, suffix=('_dmt_' ~ modules.datetime.datetime.now().strftime("%S%f")) ) %} - {% do dbt_datamocktool._create_mock_table_or_view(mock_model_relation, ns.test_sql) %} + {% do dbt_datamocktool._create_mock_table_or_view(mock_model_relation, ns.test_sql) %} #} {% endif %} {% for k in depends_on %} diff --git a/macros/dmt_unit_test.sql b/macros/dmt_unit_test.sql index d54eb32..5e6691a 100644 --- a/macros/dmt_unit_test.sql +++ b/macros/dmt_unit_test.sql @@ -1,9 +1,55 @@ -{% test unit_test(model, input_mapping, expected_output, name, description, compare_columns, depends_on) %} - {% set test_sql = dbt_datamocktool.get_unit_test_sql(model, input_mapping, depends_on)|trim %} - {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql, compare_columns=compare_columns)) %} +{% macro individual_unit_test(model, input_mapping, expected_output, test_case) %} + + {% set new_input_mapping = dict() %} + {% for k, v in input_mapping.items() %} + {# String substitution on the templated value #} + {% set templated_value = v|replace('@', test_case)|replace('\\', '') %} + + {# Update copy of dictionary #} + {% do new_input_mapping.update({k: render('{{' ~ templated_value ~ '}}')}) %} + {% endfor %} + + {# Retrieve the SQL code with the input mapping applied, using mocked input #} + {% set test_sql = get_unit_test_sql(model=model, input_mapping=new_input_mapping, test_case=test_case) %} + + {# equality test expects a Relation #} + {% set full_path = render('{{' ~ expected_output|replace('@', test_case)|replace('\\', '') ~ '}}') %} + {% set full_path_list = full_path.split('.') %} + {% set expected_output = adapter.get_relation(*full_path_list) %} + + {# Retrieve the SQL code that compares the results between model and expected result #} + {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql)) %} + +{% endmacro %} +--- +{% test unit_test(model, input_mapping, expected_output, test_case_list = []) %} + {# Support iterating through list of test cases #} + {% if test_case_list %} + {% set error_count = namespace(value=0) %} + {% for test_case in test_case_list %} + {% set unit_test_sql = individual_unit_test(model, input_mapping, expected_output, test_case) %} + {% if execute %} + {% set test_difference_count = run_query(unit_test_sql).columns[0].values()[0] %} + {% else %} + {% set test_difference_count = 0 %} + {% endif %} + + {% if test_difference_count > 0 %} + {# log errors with red font #} + {{ log('\033[31m [ERROR] >> TEST CASE FAILED: ' ~ test_case ~ ' | Number of incorrect records = ' ~ test_difference_count ~ '\033[m', info=True) }} + {% set error_count.value = error_count.value + 1 %} + {% endif %} + {% endfor %} + + {% do return('select '~ error_count.value) %} + + {% else %} + {# Backwards compatible #} + {% set test_sql = custom_get_unit_test_sql(model, input_mapping) %} + {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql)) %} + {% endif %} {% endtest %} {% test assert_mock_eq(model, input_mapping, expected_output) %} {% do return(test_unit_test(model, input_mapping, expected_output)) %} {% endtest %} - From bbe67651fbd5c5c8c641299754ef06ef716e8e9f Mon Sep 17 00:00:00 2001 From: Marcello Victorino Date: Fri, 30 Sep 2022 16:13:55 +0100 Subject: [PATCH 2/3] doc: suggested updates --- README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e7571d9..f3e1da0 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ -# datamocktool - +

datamocktool

+- [About](#about) +- [Requirements](#requirements) +- [Quickstart](#quickstart) +- [Advanced Usage](#advanced-usage) + - [Using Other Materializations](#using-other-materializations) + - [Test Names/Descriptions](#test-namesdescriptions) + - [Compare Columns](#compare-columns) + - [Manual Dependencies](#manual-dependencies) + - [Support multiple test cases](#support-multiple-test-cases) + - [Visibility on test failure](#visibility-on-test-failure) ## About datamocktool (dmt) is a simple package for unit testing dbt projects. @@ -183,3 +192,5 @@ This will result in a **single** test being executed. But under the hood, all te In case of any failures, the **single** test will return a count of errors > 0, making it a "dbt-test failure". The full log will contain a list of all failing tests. +### Visibility on test failure +TODO: if storing result as a view, it is possible to inspect the mocked model code, making it easier to debug issues with failing tests. \ No newline at end of file From e0a2eff7e1449f88b277fd61e1088e72b238c383 Mon Sep 17 00:00:00 2001 From: Marcello Victorino Date: Fri, 14 Oct 2022 17:19:31 +0100 Subject: [PATCH 3/3] Code review --- README.md | 11 ++++--- macros/dmt_get_test_sql.sql | 47 +----------------------------- macros/dmt_unit_test.sql | 57 +++++++++++++++++-------------------- 3 files changed, 34 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index f3e1da0..cf81230 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@

datamocktool

+ - [About](#about) - [Requirements](#requirements) - [Quickstart](#quickstart) @@ -7,7 +8,7 @@ - [Test Names/Descriptions](#test-namesdescriptions) - [Compare Columns](#compare-columns) - [Manual Dependencies](#manual-dependencies) - - [Support multiple test cases](#support-multiple-test-cases) + - [Multiple test cases](#multiple-test-cases) - [Visibility on test failure](#visibility-on-test-failure) ## About @@ -167,9 +168,9 @@ models: columns: ... ``` -### Support multiple test cases +### Multiple test cases -It is also possible to support the same model unit test over multiple test cases. Keeping the yaml config concise, but still easily identifying the specific test cases that are failing. +It is also possible to support the same model unit test over multiple test cases. * `test_case_list`: new optional field, expects an array with the test cases to be applied on `input_mapping` and `expected_output` * `@`: The special character to be used as template for string substitution, iterating through the values of `test_case_list` @@ -193,4 +194,6 @@ This will result in a **single** test being executed. But under the hood, all te In case of any failures, the **single** test will return a count of errors > 0, making it a "dbt-test failure". The full log will contain a list of all failing tests. ### Visibility on test failure -TODO: if storing result as a view, it is possible to inspect the mocked model code, making it easier to debug issues with failing tests. \ No newline at end of file +The final mocked model, where the input mapping takes place is stored as a view. This allows to inspect the underlying code, making it easier to debug issues with failing tests. + +Additionally, you can export the results of the mocked model and expected output seed - performing a `diff` analysis to easily spot incorrect values. \ No newline at end of file diff --git a/macros/dmt_get_test_sql.sql b/macros/dmt_get_test_sql.sql index be2f921..d28a901 100644 --- a/macros/dmt_get_test_sql.sql +++ b/macros/dmt_get_test_sql.sql @@ -1,4 +1,4 @@ -{% macro get_unit_test_sql(model, input_mapping, depends_on) %} +{% macro get_unit_test_sql(model, input_mapping, depends_on, test_case=none) %} {% set ns=namespace( test_sql="(select 1) raw_sql", rendered_keys={}, @@ -42,12 +42,6 @@ {# Create view to expose full definition #} {% do run_query(create_view_as(relation, ns.test_sql)) %} - {# FIXME: commenting out as not sure best way to incorporate the dispatch pattern #} - {# {% set mock_model_relation = dbt_datamocktool._get_model_to_mock( - model, suffix=('_dmt_' ~ modules.datetime.datetime.now().strftime("%S%f")) - ) %} - - {% do dbt_datamocktool._create_mock_table_or_view(mock_model_relation, ns.test_sql) %} #} {% endif %} {% for k in depends_on %} @@ -56,42 +50,3 @@ {{ mock_model_relation }} {% endmacro %} - - -{% macro _get_model_to_mock(model, suffix) %} - {{ return(adapter.dispatch('_get_model_to_mock', 'dbt_datamocktool')(model, suffix)) }} -{% endmacro %} - -{% macro default___get_model_to_mock(model, suffix) %} - {{ return(make_temp_relation(model.incorporate(type='table'), suffix=suffix)) }} -{% endmacro %} - -{# Spark-specific logic excludes a schema name in order to fix https://github.com/mjirv/dbt-datamocktool/issues/22 #} -{% macro spark___get_model_to_mock(model, suffix) %} - {{ return(make_temp_relation(model.incorporate(type='table').include(schema=False), suffix=suffix)) }} -{% endmacro %} - -{# SQL Server logic creates a view instead of a temp table to fix https://github.com/mjirv/dbt-datamocktool/issues/42 #} -{% macro sqlserver___get_model_to_mock(model, suffix) %} - {% set schema = "datamocktool_tmp" %} - {% if not adapter.check_schema_exists(database=model.database, schema=schema) %} - {% do adapter.create_schema(api.Relation.create(database=model.database, schema=schema)) %} - {% endif %} - {% set tmp_identifier = model.identifier ~ suffix %} - {# SQL Server requires us to specify a table type because it calls `drop_relation_script()` from `create_table_as()`. - I'd prefer to use something like RelationType.table, but can't find a way to access the relation types #} - {{ return(model.incorporate(type='view', path={"identifier": tmp_identifier, "schema": schema})) }} -{% endmacro %} - - -{% macro _create_mock_table_or_view(model, test_sql) %} - {{ return(adapter.dispatch('_create_mock_table_or_view', 'dbt_datamocktool')(model, test_sql)) }} -{% endmacro %} - -{% macro default___create_mock_table_or_view(model, test_sql) %} - {% do run_query(create_table_as(True, model, test_sql)) %} -{% endmacro %} - -{% macro sqlserver___create_mock_table_or_view(model, test_sql) %} - {% do run_query(create_view_as(model, test_sql)) %} -{% endmacro %} diff --git a/macros/dmt_unit_test.sql b/macros/dmt_unit_test.sql index 5e6691a..5bf814e 100644 --- a/macros/dmt_unit_test.sql +++ b/macros/dmt_unit_test.sql @@ -1,35 +1,30 @@ -{% macro individual_unit_test(model, input_mapping, expected_output, test_case) %} - - {% set new_input_mapping = dict() %} - {% for k, v in input_mapping.items() %} - {# String substitution on the templated value #} - {% set templated_value = v|replace('@', test_case)|replace('\\', '') %} - - {# Update copy of dictionary #} - {% do new_input_mapping.update({k: render('{{' ~ templated_value ~ '}}')}) %} - {% endfor %} - - {# Retrieve the SQL code with the input mapping applied, using mocked input #} - {% set test_sql = get_unit_test_sql(model=model, input_mapping=new_input_mapping, test_case=test_case) %} - - {# equality test expects a Relation #} - {% set full_path = render('{{' ~ expected_output|replace('@', test_case)|replace('\\', '') ~ '}}') %} - {% set full_path_list = full_path.split('.') %} - {% set expected_output = adapter.get_relation(*full_path_list) %} - - {# Retrieve the SQL code that compares the results between model and expected result #} - {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql)) %} - -{% endmacro %} ---- -{% test unit_test(model, input_mapping, expected_output, test_case_list = []) %} +{% test unit_test(model, input_mapping, expected_output, name, description, compare_columns, depends_on, test_case_list = []) %} {# Support iterating through list of test cases #} {% if test_case_list %} {% set error_count = namespace(value=0) %} - {% for test_case in test_case_list %} - {% set unit_test_sql = individual_unit_test(model, input_mapping, expected_output, test_case) %} + {% for test_case in test_case_list %} + {# String substitution for inputs #} + {% set individual_input_mapping = dict() %} + {% for k, v in input_mapping.items() %} + {# String substitution on the templated value #} + {% set templated_value = v|replace('@', test_case)|replace('\\', '') %} + {# Update copy of dictionary #} + {% do individual_input_mapping.update({k: render('{{' ~ templated_value ~ '}}')}) %} + {% endfor %} + + {# String substitution for expected output #} + {# Equality test expects a Relation #} + {% set full_path = render('{{' ~ expected_output|replace('@', test_case)|replace('\\', '') ~ '}}') %} + {% set full_path_list = full_path.split('.') %} + {% set individual_expected_output = adapter.get_relation(*full_path_list) %} + + {# Retrieve the SQL code with the input mapping applied, using mocked input #} + {% set individual_test_sql = dbt_datamocktool.get_unit_test_sql(model, individual_input_mapping, depends_on, test_case) %} + {# Retrieve the SQL code that compares the results between model and expected result #} + {% set comparison_sql = dbt_utils.test_equality(individual_expected_output, compare_model=individual_test_sql, compare_columns=compare_columns) %} + {% if execute %} - {% set test_difference_count = run_query(unit_test_sql).columns[0].values()[0] %} + {% set test_difference_count = run_query(comparison_sql).columns[0].values()[0] %} {% else %} {% set test_difference_count = 0 %} {% endif %} @@ -44,9 +39,9 @@ {% do return('select '~ error_count.value) %} {% else %} - {# Backwards compatible #} - {% set test_sql = custom_get_unit_test_sql(model, input_mapping) %} - {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql)) %} + {# Backwards compatible when not using multiple test_case list #} + {% set test_sql = dbt_datamocktool.get_unit_test_sql(model, input_mapping, depends_on)|trim %} + {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql, compare_columns=compare_columns)) %} {% endif %} {% endtest %}