diff --git a/README.md b/README.md index df6d5a9..cf81230 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,15 @@ -# datamocktool - +

datamocktool

+ +- [About](#about) +- [Requirements](#requirements) +- [Quickstart](#quickstart) +- [Advanced Usage](#advanced-usage) + - [Using Other Materializations](#using-other-materializations) + - [Test Names/Descriptions](#test-namesdescriptions) + - [Compare Columns](#compare-columns) + - [Manual Dependencies](#manual-dependencies) + - [Multiple test cases](#multiple-test-cases) + - [Visibility on test failure](#visibility-on-test-failure) ## About datamocktool (dmt) is a simple package for unit testing dbt projects. @@ -157,3 +167,33 @@ models: - ref('raw_customers') columns: ... ``` + +### Multiple test cases + +It is also possible to support the same model unit test over multiple test cases. + +* `test_case_list`: new optional field, expects an array with the test cases to be applied on `input_mapping` and `expected_output` +* `@`: The special character to be used as template for string substitution, iterating through the values of `test_case_list` +* `\`: the special character added to the beginning of the value. This is required to stop Jinja automatically rendering the value (i.e: `ref()`, `source()` etc.) + +```yaml +models: + - name: stg_customers + tests: + - dbt_datamocktool.unit_test: + tags: + - example_tag_unit_test + test_case_list: [1,2,3,5,8,13,21] + input_mapping: + source('jaffle_shop', 'raw_customers'): \ref('dmt__raw_customers_@') + expected_output: \ref('dmt__expected_stg_customers_@') +``` + +This will result in a **single** test being executed. But under the hood, all test cases listed are checked. Any error count is passed as the test output. + +In case of any failures, the **single** test will return a count of errors > 0, making it a "dbt-test failure". The full log will contain a list of all failing tests. + +### Visibility on test failure +The final mocked model, where the input mapping takes place is stored as a view. This allows to inspect the underlying code, making it easier to debug issues with failing tests. + +Additionally, you can export the results of the mocked model and expected output seed - performing a `diff` analysis to easily spot incorrect values. \ No newline at end of file diff --git a/macros/dmt_get_test_sql.sql b/macros/dmt_get_test_sql.sql index f247074..d28a901 100644 --- a/macros/dmt_get_test_sql.sql +++ b/macros/dmt_get_test_sql.sql @@ -1,4 +1,4 @@ -{% macro get_unit_test_sql(model, input_mapping, depends_on) %} +{% macro get_unit_test_sql(model, input_mapping, depends_on, test_case=none) %} {% set ns=namespace( test_sql="(select 1) raw_sql", rendered_keys={}, @@ -28,11 +28,20 @@ {% set ns.test_sql = render(ns.test_sql)|replace(ns.rendered_keys[k], v) %} {% endfor %} - {% set mock_model_relation = dbt_datamocktool._get_model_to_mock( - model, suffix=('_dmt_' ~ modules.datetime.datetime.now().strftime("%S%f")) - ) %} + {# Store model result for visibility in case of unit test failure #} + {% set identifier_name = model.name %} + {% if test_case %} + {% set identifier_name = identifier_name ~ '__test_case_'~test_case %} + {% endif %} + {# Note: possible to hardcode desired database.schema to store the mocked model output #} + {% set mock_model_relation = api.Relation.create( + database=model.database, + schema=model.schema, + identifier=identifier_name, + type='view') %} + {# Create view to expose full definition #} + {% do run_query(create_view_as(relation, ns.test_sql)) %} - {% do dbt_datamocktool._create_mock_table_or_view(mock_model_relation, ns.test_sql) %} {% endif %} {% for k in depends_on %} @@ -41,42 +50,3 @@ {{ mock_model_relation }} {% endmacro %} - - -{% macro _get_model_to_mock(model, suffix) %} - {{ return(adapter.dispatch('_get_model_to_mock', 'dbt_datamocktool')(model, suffix)) }} -{% endmacro %} - -{% macro default___get_model_to_mock(model, suffix) %} - {{ return(make_temp_relation(model.incorporate(type='table'), suffix=suffix)) }} -{% endmacro %} - -{# Spark-specific logic excludes a schema name in order to fix https://github.com/mjirv/dbt-datamocktool/issues/22 #} -{% macro spark___get_model_to_mock(model, suffix) %} - {{ return(make_temp_relation(model.incorporate(type='table').include(schema=False), suffix=suffix)) }} -{% endmacro %} - -{# SQL Server logic creates a view instead of a temp table to fix https://github.com/mjirv/dbt-datamocktool/issues/42 #} -{% macro sqlserver___get_model_to_mock(model, suffix) %} - {% set schema = "datamocktool_tmp" %} - {% if not adapter.check_schema_exists(database=model.database, schema=schema) %} - {% do adapter.create_schema(api.Relation.create(database=model.database, schema=schema)) %} - {% endif %} - {% set tmp_identifier = model.identifier ~ suffix %} - {# SQL Server requires us to specify a table type because it calls `drop_relation_script()` from `create_table_as()`. - I'd prefer to use something like RelationType.table, but can't find a way to access the relation types #} - {{ return(model.incorporate(type='view', path={"identifier": tmp_identifier, "schema": schema})) }} -{% endmacro %} - - -{% macro _create_mock_table_or_view(model, test_sql) %} - {{ return(adapter.dispatch('_create_mock_table_or_view', 'dbt_datamocktool')(model, test_sql)) }} -{% endmacro %} - -{% macro default___create_mock_table_or_view(model, test_sql) %} - {% do run_query(create_table_as(True, model, test_sql)) %} -{% endmacro %} - -{% macro sqlserver___create_mock_table_or_view(model, test_sql) %} - {% do run_query(create_view_as(model, test_sql)) %} -{% endmacro %} diff --git a/macros/dmt_unit_test.sql b/macros/dmt_unit_test.sql index d54eb32..5bf814e 100644 --- a/macros/dmt_unit_test.sql +++ b/macros/dmt_unit_test.sql @@ -1,9 +1,50 @@ -{% test unit_test(model, input_mapping, expected_output, name, description, compare_columns, depends_on) %} - {% set test_sql = dbt_datamocktool.get_unit_test_sql(model, input_mapping, depends_on)|trim %} - {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql, compare_columns=compare_columns)) %} +{% test unit_test(model, input_mapping, expected_output, name, description, compare_columns, depends_on, test_case_list = []) %} + {# Support iterating through list of test cases #} + {% if test_case_list %} + {% set error_count = namespace(value=0) %} + {% for test_case in test_case_list %} + {# String substitution for inputs #} + {% set individual_input_mapping = dict() %} + {% for k, v in input_mapping.items() %} + {# String substitution on the templated value #} + {% set templated_value = v|replace('@', test_case)|replace('\\', '') %} + {# Update copy of dictionary #} + {% do individual_input_mapping.update({k: render('{{' ~ templated_value ~ '}}')}) %} + {% endfor %} + + {# String substitution for expected output #} + {# Equality test expects a Relation #} + {% set full_path = render('{{' ~ expected_output|replace('@', test_case)|replace('\\', '') ~ '}}') %} + {% set full_path_list = full_path.split('.') %} + {% set individual_expected_output = adapter.get_relation(*full_path_list) %} + + {# Retrieve the SQL code with the input mapping applied, using mocked input #} + {% set individual_test_sql = dbt_datamocktool.get_unit_test_sql(model, individual_input_mapping, depends_on, test_case) %} + {# Retrieve the SQL code that compares the results between model and expected result #} + {% set comparison_sql = dbt_utils.test_equality(individual_expected_output, compare_model=individual_test_sql, compare_columns=compare_columns) %} + + {% if execute %} + {% set test_difference_count = run_query(comparison_sql).columns[0].values()[0] %} + {% else %} + {% set test_difference_count = 0 %} + {% endif %} + + {% if test_difference_count > 0 %} + {# log errors with red font #} + {{ log('\033[31m [ERROR] >> TEST CASE FAILED: ' ~ test_case ~ ' | Number of incorrect records = ' ~ test_difference_count ~ '\033[m', info=True) }} + {% set error_count.value = error_count.value + 1 %} + {% endif %} + {% endfor %} + + {% do return('select '~ error_count.value) %} + + {% else %} + {# Backwards compatible when not using multiple test_case list #} + {% set test_sql = dbt_datamocktool.get_unit_test_sql(model, input_mapping, depends_on)|trim %} + {% do return(dbt_utils.test_equality(expected_output, compare_model=test_sql, compare_columns=compare_columns)) %} + {% endif %} {% endtest %} {% test assert_mock_eq(model, input_mapping, expected_output) %} {% do return(test_unit_test(model, input_mapping, expected_output)) %} {% endtest %} -