From 96c461b65822440f173ab1a68f1302f96e99aed2 Mon Sep 17 00:00:00 2001 From: Judah Rand <17158624+judahrand@users.noreply.github.com> Date: Fri, 25 Mar 2022 03:52:16 +0000 Subject: [PATCH] Add `alias` argument to `deduplicate` macro (#526) * Add `alias` argument to `deduplicate * Test `alias` argument * Rename `alias` to `relation_alias` --- README.md | 5 +++-- .../data/sql/data_deduplicate.csv | 1 + .../models/sql/test_deduplicate.sql | 19 +++++++++++++++++-- macros/sql/deduplicate.sql | 12 ++++++------ 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index acc7c825..80c6cbbb 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ For compatibility details between versions of dbt-core and dbt-utils, [see this - [SQL generators](#sql-generators) - [date_spine](#date_spine-source) - - [dedupe](#dedupe-source) + - [deduplicate](#deduplicate) - [haversine_distance](#haversine_distance-source) - [group_by](#group_by-source) - [star](#star-source) @@ -715,7 +715,8 @@ This macro returns the sql required to remove duplicate rows from a model or sou {{ dbt_utils.deduplicate( relation=source('my_source', 'my_table'), group_by="user_id, cast(timestamp as day)", - order_by="timestamp desc" + order_by="timestamp desc", + relation_alias="my_cte" ) }} ``` diff --git a/integration_tests/data/sql/data_deduplicate.csv b/integration_tests/data/sql/data_deduplicate.csv index c3ae0c4d..7e06170a 100644 --- a/integration_tests/data/sql/data_deduplicate.csv +++ b/integration_tests/data/sql/data_deduplicate.csv @@ -1,3 +1,4 @@ user_id,event,version 1,play,1 1,play,2 +2,pause,1 diff --git a/integration_tests/models/sql/test_deduplicate.sql b/integration_tests/models/sql/test_deduplicate.sql index 7df79261..81fe81e7 100644 --- a/integration_tests/models/sql/test_deduplicate.sql +++ b/integration_tests/models/sql/test_deduplicate.sql @@ -1,6 +1,21 @@ -with deduped as ( +with - {{ dbt_utils.deduplicate(ref('data_deduplicate'), group_by='user_id', order_by='version desc') | indent }} +source as ( + select * + from {{ ref('data_deduplicate') }} + where user_id = 1 +), + +deduped as ( + + {{ + dbt_utils.deduplicate( + ref('data_deduplicate'), + group_by='user_id', + order_by='version desc', + relation_alias="source" + ) | indent + }} ) diff --git a/macros/sql/deduplicate.sql b/macros/sql/deduplicate.sql index f5d65534..9a3571a2 100644 --- a/macros/sql/deduplicate.sql +++ b/macros/sql/deduplicate.sql @@ -1,8 +1,8 @@ -{%- macro deduplicate(relation, group_by, order_by=none) -%} - {{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by)) }} +{%- macro deduplicate(relation, group_by, order_by=none, relation_alias=none) -%} + {{ return(adapter.dispatch('deduplicate', 'dbt_utils')(relation, group_by, order_by=order_by, relation_alias=relation_alias)) }} {% endmacro %} -{%- macro default__deduplicate(relation, group_by, order_by=none) -%} +{%- macro default__deduplicate(relation, group_by, order_by=none, relation_alias=none) -%} select {{ dbt_utils.star(relation, relation_alias='deduped') | indent }} @@ -15,7 +15,7 @@ order by {{ order_by }} {%- endif %} ) as rn - from {{ relation }} as _inner + from {{ relation if relation_alias is none else relation_alias }} as _inner ) as deduped where deduped.rn = 1 @@ -26,7 +26,7 @@ -- clause in BigQuery: -- https://github.com/dbt-labs/dbt-utils/issues/335#issuecomment-788157572 #} -{%- macro bigquery__deduplicate(relation, group_by, order_by=none) -%} +{%- macro bigquery__deduplicate(relation, group_by, order_by=none, relation_alias=none) -%} select {{ dbt_utils.star(relation, relation_alias='deduped') | indent }} @@ -39,7 +39,7 @@ {%- endif %} limit 1 )[offset(0)] as deduped - from {{ relation }} as original + from {{ relation if relation_alias is none else relation_alias }} as original group by {{ group_by }} )