dbt-labs · gwenwindflower · Mar 5, 2024 · Nov 24, 2022 · Feb 6, 2023 · Feb 7, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,18 +9,24 @@
 --->
 
 # Unreleased
+## New features
+- The `equality` test now accepts an additional argument, `precision` to aide in comparing floating point numbers ([#757](https://github.com/dbt-labs/dbt-utils/issues/757), [#765](https://github.com/dbt-labs/dbt-utils/pull/765))
+- Add option to ignore columns in equality test ([#734](https://github.com/dbt-labs/dbt-utils/issues/734), [#737](https://github.com/dbt-labs/dbt-utils/pull/737))
 ## Fixes
 - deduplicate macro for Databricks now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic
 - deduplicate macro for Redshift now uses the QUALIFY clause, which fixes NULL columns issues from the default natural join logic
+- Equality test will now raise an error when the second model has less columns than the first ([#785](https://github.com/dbt-labs/dbt-utils/issues/785))
 - get_tables_by_pattern_sql will now:
   - return redshift external tables ([#752](https://github.com/dbt-labs/dbt-utils/issues/752)
   - work with valid redshift database names that contain dashes
 ## Under the hood
 - created a new dispatch redshift__get_tables_by_pattern which unions the result of the default macro and querying svv_external_tables for the same conditions (schema name, pattern, exclude pattern).
 
 ## Contributors:
-[@graciegoheen](https://github.com/graciegoheen)
-[@yauhen-sobaleu](https://github.com/yauhen-sobaleu)
+- [@graciegoheen](https://github.com/graciegoheen)
+- [@yauhen-sobaleu](https://github.com/yauhen-sobaleu)
+- [@rlh1994](https://github.com/rlh1994)
+- [@brunocostalopes](https://github.com/brunocostalopes)
 [@brendan-cook-87](https://github.com/brendan-cook-87)
 
 # dbt utils v1.1.1

diff --git a/README.md b/README.md
@@ -114,21 +114,37 @@ This test supports the `group_by_columns` parameter; see [Grouping in tests](#gr
 
 ### equality ([source](macros/generic_tests/equality.sql))
 
-Asserts the equality of two relations. Optionally specify a subset of columns to compare.
+Asserts the equality of two relations. Optionally specify a subset of columns to compare or exclude, and a precision to compare numeric columns on.
 
 **Usage:**
 
 ```yaml
 version: 2
 
 models:
+  # compare the entire table 
   - name: model_name
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('other_table_name')
+
+  # only compare some of the columns
+  - name: model_name_compare_columns
     tests:
       - dbt_utils.equality:
           compare_model: ref('other_table_name')
           compare_columns:
             - first_column
             - second_column
+          precision: 4
+
+  # compare all columns except the ones on the ignore list
+  - name: model_name_exclude_columns
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('other_table_name')
+          exclude_columns:
+            - third_column
 ```
 
 ### expression_is_true ([source](macros/generic_tests/expression_is_true.sql))

diff --git a/integration_tests/.gitignore b/integration_tests/.gitignore
@@ -1,6 +1,6 @@
-
 target/
 dbt_modules/
 logs/
 .env/
 profiles.yml
+package-lock.yml
diff --git a/integration_tests/data/schema_tests/data_test_equality_a.csv b/integration_tests/data/schema_tests/data_test_equality_a.csv
@@ -0,0 +1,4 @@
+col_a,col_b,col_c
+1,1,3
+1,2,1
+2,3,3
diff --git a/integration_tests/data/schema_tests/data_test_equality_b.csv b/integration_tests/data/schema_tests/data_test_equality_b.csv
@@ -0,0 +1,4 @@
+col_a,col_b,col_c
+1,1,2
+1,2,2
+2,3,2
diff --git a/integration_tests/data/schema_tests/data_test_equality_floats_a.csv b/integration_tests/data/schema_tests/data_test_equality_floats_a.csv
@@ -0,0 +1,11 @@
+id,float_number
+05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186
+cfae9054-940b-42a1-84d4-052daae6194f,81.2511656
+6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675
+c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841
+59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434
+b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425
+26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223
+b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680
+11c979b7-2661-4375-8143-7c9b54b90627,19.5755431
+a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047
diff --git a/integration_tests/data/schema_tests/data_test_equality_floats_b.csv b/integration_tests/data/schema_tests/data_test_equality_floats_b.csv
@@ -0,0 +1,11 @@
+id,float_number
+05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888187
+cfae9054-940b-42a1-84d4-052daae6194f,81.2511657
+6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959676
+c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100842
+59caed0d-53d6-473c-a88c-3726c7693f05,68.6029435
+b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861426
+26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662224
+b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524681
+11c979b7-2661-4375-8143-7c9b54b90627,19.5755432
+a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237048
diff --git a/integration_tests/data/schema_tests/data_test_equality_floats_columns_a.csv b/integration_tests/data/schema_tests/data_test_equality_floats_columns_a.csv
@@ -0,0 +1,11 @@
+id,float_number,to_ignore
+05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186,a
+cfae9054-940b-42a1-84d4-052daae6194f,81.2511656,a
+6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675,a
+c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841,a
+59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434,a
+b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425,a
+26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223,a
+b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680,a
+11c979b7-2661-4375-8143-7c9b54b90627,19.5755431,a
+a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047,a
diff --git a/integration_tests/data/schema_tests/data_test_equality_floats_columns_b.csv b/integration_tests/data/schema_tests/data_test_equality_floats_columns_b.csv
@@ -0,0 +1,11 @@
+id,float_number,to_ignore
+05ac09c4-f947-45a8-8c14-88f430f8b294,62.3888186,b
+cfae9054-940b-42a1-84d4-052daae6194f,81.2511656,b
+6029501d-c274-49f2-a69d-4c75a3d9931d,23.3959675,b
+c653e520-df81-4a5f-b44b-bb1b4c1b7846,72.2100841,b
+59caed0d-53d6-473c-a88c-3726c7693f05,68.6029434,b
+b441f6a0-ce7f-4ad9-b96b-b41d73a94ae7,72.7861425,b
+26491840-bfd4-4496-9ca9-ad9220a2de47,35.3662223,b
+b4f233ce-a494-4bb6-9cf2-73bb6854e58a,89.1524680,b
+11c979b7-2661-4375-8143-7c9b54b90627,19.5755431,b
+a8057f73-312e-48e6-b344-f4a510a2c4a8,22.9237047,b
diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml
@@ -32,7 +32,7 @@ seeds:
     sql:
       data_events_20180103:
         +schema: events
-      
+
       data_get_column_values_dropped:
         # this.incorporate() to hardcode the node's type as otherwise dbt doesn't know it yet
         +post-hook: "{% do adapter.drop_relation(this.incorporate(type='table')) %}"
@@ -53,3 +53,19 @@ seeds:
       data_test_sequential_timestamps:
         +column_types:
           my_timestamp: timestamp
+
+      data_test_equality_floats_a:
+        +column_types:
+          float_number: float
+
+      data_test_equality_floats_columns_a:
+        +column_types:
+          float_number: float
+
+      data_test_equality_floats_b:
+        +column_types:
+          float_number: float
+
+      data_test_equality_floats_columns_b:
+        +column_types:
+          float_number: float
diff --git a/integration_tests/models/generic_tests/equality_less_columns.sql b/integration_tests/models/generic_tests/equality_less_columns.sql
@@ -0,0 +1,9 @@
+with data as (
+
+    select * from {{ ref('data_test_equality_b') }}
+
+)
+
+select
+    col_a, col_b
+from data
diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml
@@ -142,6 +142,57 @@ seeds:
           - dbt_utils.not_null_proportion:
               at_least: 0.9
 
+  - name: data_test_equality_a
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_a')
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_b')
+          error_if: "<1" #sneaky way to ensure that the test is returning failing rows
+          warn_if: "<0"
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_b')
+          compare_columns:
+            - col_a
+            - col_b
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_b')
+          exclude_columns:
+            - col_c
+
+  - name: data_test_equality_floats_a
+    tests:
+      # test precision only
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats_b')
+          precision: 4
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats_b')
+          precision: 8
+          error_if: "<1" #sneaky way to ensure that the test is returning failing rows
+          warn_if: "<0"
+
+  - name: data_test_equality_floats_columns_a
+    tests:
+    # Positive assertion tests
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats_columns_b')
+          compare_columns:
+            - id
+            - float_number
+          precision: 4
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats_columns_b')
+          exclude_columns:
+            - to_ignore
+          precision: 4
+    # all columns should fail even with rounding
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_floats_columns_b')
+          precision: 4
+          error_if: "<1" #sneaky way to ensure that the test is returning failing rows
+          warn_if: "<0"
+
 models:
   - name: recency_time_included
     tests:
@@ -199,3 +250,10 @@ models:
       - dbt_utils.fewer_rows_than:
           compare_model: ref('data_test_fewer_rows_than_table_2')
           group_by_columns: ['col_a']
+
+  - name: equality_less_columns
+    tests:
+      - dbt_utils.equality:
+          compare_model: ref('data_test_equality_a')
+          exclude_columns:
+            - col_c
diff --git a/integration_tests/models/sql/test_union.sql b/integration_tests/models/sql/test_union.sql
@@ -2,7 +2,7 @@
 select
     id,
     name,
-    favorite_color
+    favorite_color,
+    favorite_number
 
 from {{ ref('test_union_base') }}
-
diff --git a/macros/generic_tests/equality.sql b/macros/generic_tests/equality.sql
@@ -1,8 +1,12 @@
-{% test equality(model, compare_model, compare_columns=None) %}
-  {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns)) }}
+{% test equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None) %}
+  {{ return(adapter.dispatch('test_equality', 'dbt_utils')(model, compare_model, compare_columns, exclude_columns, precision)) }}
 {% endtest %}
 
-{% macro default__test_equality(model, compare_model, compare_columns=None) %}
+{% macro default__test_equality(model, compare_model, compare_columns=None, exclude_columns=None, precision = None) %}
+
+{%- if compare_columns and exclude_columns -%}
+    {{ exceptions.raise_compiler_error("Both a compare and an ignore list were provided to the `equality` macro. Only one is allowed") }}
+{%- endif -%}
 
 {% set set_diff %}
     count(*) + coalesce(abs(
@@ -19,20 +23,107 @@
     {{ return('') }}
 {% endif %}
 
+
+
 -- setup
 {%- do dbt_utils._is_relation(model, 'test_equality') -%}
 
-{#-
-If the compare_cols arg is provided, we can run this test without querying the
-information schema — this allows the model to be an ephemeral model
--#}
-
+{# Ensure there are no extra columns in the compare_model vs model #}
 {%- if not compare_columns -%}
     {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
-    {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%}
-{%- endif -%}
+    {%- do dbt_utils._is_ephemeral(compare_model, 'test_equality') -%}
+
+    {%- set model_columns = adapter.get_columns_in_relation(model) -%}
+    {%- set compare_model_columns = adapter.get_columns_in_relation(compare_model) -%}
+
+
+    {%- if exclude_columns -%}
+        {#-- Lower case ignore columns for easier comparison --#}
+        {%- set exclude_columns = exclude_columns | map("lower") | list %}
+
+        {# Filter out the excluded columns #}
+        {%- set include_columns = [] %}
+        {%- set include_model_columns = [] %}
+        {%- for column in model_columns -%}
+            {%- if column.name | lower not in exclude_columns -%}
+                {% do include_columns.append(column) %}
+            {%- endif %}
+        {%- endfor %}
+        {%- for column in compare_model_columns -%}
+            {%- if column.name | lower not in exclude_columns -%}
+                {% do include_model_columns.append(column) %}
+            {%- endif %}
+        {%- endfor %}
+
+        {%- set compare_columns_set = set(include_columns | map(attribute='quoted') | map("lower")) %}
+        {%- set compare_model_columns_set = set(include_model_columns | map(attribute='quoted') | map("lower")) %}
+    {%- else -%}
+        {%- set compare_columns_set = set(model_columns | map(attribute='quoted') | map("lower")) %}
+        {%- set compare_model_columns_set = set(compare_model_columns | map(attribute='quoted') | map("lower")) %}
+    {%- endif -%}
+
+    {% if compare_columns_set != compare_model_columns_set %}
+        {{ exceptions.raise_compiler_error(compare_model ~" has less columns than " ~ model ~ ", please ensure they have the same columns or use the `compare_columns` or `exclude_columns` arguments to subset them.") }}
+    {% endif %}
 
-{% set compare_cols_csv = compare_columns | join(', ') %}
+
+{% endif %}
+
+{%- if not precision -%}
+    {%- if not compare_columns -%}
+        {# 
+            You cannot get the columns in an ephemeral model (due to not existing in the information schema),
+            so if the user does not provide an explicit list of columns we must error in the case it is ephemeral
+        #}
+        {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
+        {%- set compare_columns = adapter.get_columns_in_relation(model)-%}
+
+        {%- if exclude_columns -%}
+            {#-- Lower case ignore columns for easier comparison --#}
+            {%- set exclude_columns = exclude_columns | map("lower") | list %}
+
+            {# Filter out the excluded columns #}
+            {%- set include_columns = [] %}
+            {%- for column in compare_columns -%}
+                {%- if column.name | lower not in exclude_columns -%}
+                    {% do include_columns.append(column) %}
+                {%- endif %}
+            {%- endfor %}
+
+            {%- set compare_columns = include_columns | map(attribute='quoted') %}
+        {%- else -%} {# Compare columns provided #}
+            {%- set compare_columns = compare_columns | map(attribute='quoted') %}
+        {%- endif -%}
+    {%- endif -%}
+
+    {% set compare_cols_csv = compare_columns | join(', ') %}
+
+{% else %} {# Precision required #}
+    {#-
+        If rounding is required, we need to get the types, so it cannot be ephemeral even if they provide column names
+    -#}
+    {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%}
+    {%- set columns = adapter.get_columns_in_relation(model) -%}
+
+    {% set columns_list = [] %}
+    {%- for col in columns -%}
+        {%- if (
+                (col.name|lower in compare_columns|map('lower') or not compare_columns) and
+                (col.name|lower not in exclude_columns|map('lower') or not exclude_columns)
+                ) -%}
+            {# Databricks double type is not picked up by any number type checks in dbt #}
+            {%- if col.is_float() or col.is_numeric() or col.data_type == 'double' -%}
+                {# Cast is required due to postgres not having round for a double precision number #}
+                {%- do columns_list.append('round(cast(' ~ col.quoted ~ ' as ' ~ dbt.type_numeric() ~ '),' ~ precision ~ ') as ' ~ col.quoted) -%}
+            {%- else -%} {# Non-numeric type #}
+                {%- do columns_list.append(col.quoted) -%}
+            {%- endif -%}
+        {% endif %}
+    {%- endfor -%}
+
+    {% set compare_cols_csv = columns_list | join(', ') %}
+
+{% endif %}
 
 with a as (