From 9772c1caeb067c65dc806f49085978020415a6d6 Mon Sep 17 00:00:00 2001 From: Blake Blackwell Date: Sun, 17 Feb 2019 19:28:46 -0600 Subject: [PATCH 01/29] Adding incremental logic to Snowflake plugins --- .../macros/materializations/incremental.sql | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql new file mode 100644 index 00000000000..18045af7a28 --- /dev/null +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -0,0 +1,66 @@ + +{% materialization incremental, adapter='snowflake' -%} + + {%- set unique_key = config.get('unique_key') -%} + {%- set sql_where = config.get('sql_where') -%} + + {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} + {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} + + {% if non_destructive_mode %} + {{ exceptions.raise_compiler_error("--non-destructive mode is not supported on BigQuery") }} + {% endif %} + + {%- set identifier = model['alias'] -%} + + {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} + + {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} + + {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} + {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} + + {%- set should_drop = (full_refresh_mode or exists_not_as_table) -%} + {%- set force_create = (full_refresh_mode) -%} + + -- setup + {% if old_relation is none -%} + -- noop + {%- elif should_drop -%} + {{ adapter.drop_relation(old_relation) }} + {%- set old_relation = none -%} + {%- endif %} + + {% set source_sql -%} + {#-- wrap sql in parens to make it a subquery --#} + ( + select * from ( + {{ sql }} + ) + {% if sql_where %} + where ({{ sql_where }}) or ({{ sql_where }}) is null + {% endif %} + ) + {%- endset -%} + + + {{ run_hooks(pre_hooks, inside_transaction=True) }} + + -- build model + {% if force_create or old_relation is none -%} + {%- call statement('main') -%} + {{ create_table_as(False, target_relation, sql) }} + {%- endcall -%} + {%- else -%} + {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} + {%- call statement('main') -%} + {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} + {% endcall %} + {%- endif %} + +-- `COMMIT` happens here + {{ adapter.commit() }} + + {{ run_hooks(post_hooks, inside_transaction=False) }} + +{%- endmaterialization %} From 56801f90950ed493553d698b37c6087597bf91fd Mon Sep 17 00:00:00 2001 From: Blake Blackwell Date: Sat, 2 Mar 2019 05:28:09 -0600 Subject: [PATCH 02/29] Adding changes based on Drew's recommendations --- .../snowflake/macros/materializations/incremental.sql | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 18045af7a28..99d9f2e941d 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -7,10 +7,6 @@ {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} - {% if non_destructive_mode %} - {{ exceptions.raise_compiler_error("--non-destructive mode is not supported on BigQuery") }} - {% endif %} - {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} @@ -43,7 +39,9 @@ ) {%- endset -%} + {{ run_hooks(pre_hooks, inside_transaction=False) }} + -- `BEGIN` happens here: {{ run_hooks(pre_hooks, inside_transaction=True) }} -- build model @@ -58,7 +56,9 @@ {% endcall %} {%- endif %} --- `COMMIT` happens here + {{ run_hooks(post_hooks, inside_transaction=True) }} + + -- `COMMIT` happens here {{ adapter.commit() }} {{ run_hooks(post_hooks, inside_transaction=False) }} From 38254a86956f6e3ee12e8460a58ba188be7ee0d4 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Tue, 16 Apr 2019 17:53:31 +0200 Subject: [PATCH 03/29] make create or replace snowflake macro --- .../dbt/include/snowflake/macros/adapters.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index 05144c654b8..1feb36cc09f 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -15,6 +15,23 @@ ); {% endmacro %} +{% macro snowflake__create_or_replace_table_as(relation, sql) -%} + {% if temporary %} + use schema {{ adapter.quote_as_configured(schema, 'schema') }}; + {% endif %} + + {%- set transient = config.get('transient', default=true) -%} + + create or replace {% if temporary -%} + temporary + {%- elif transient -%} + transient + {%- endif %} table {{ relation.include(database=(not temporary), schema=(not temporary)) }} + as ( + {{ sql }} + ); +{% endmacro %} + {% macro snowflake__create_view_as(relation, sql) -%} create or replace view {{ relation }} as ( {{ sql }} From 9222c790433626f3c5c18ccd135a66bb3354a1d3 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Tue, 16 Apr 2019 17:53:52 +0200 Subject: [PATCH 04/29] implement create or replace in table mater --- .../macros/materializations/table.sql | 44 ++++++------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 51806e7b2df..d8dd4d4ffb8 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -37,8 +37,10 @@ -- setup: if the target relation already exists, truncate or drop it (if it's a view) {% if non_destructive_mode -%} {% if exists_as_table -%} - {{ adapter.truncate_relation(old_relation) }} + --noop we can do away with this step all together since the table can be replaced in Snowflake. + {# {{ adapter.truncate_relation(old_relation) }} #} {% elif exists_as_view -%} + --noop. I think we should also be able to do away with this and call a replace. {{ adapter.drop_relation(old_relation) }} {%- set old_relation = none -%} {%- endif %} @@ -49,44 +51,22 @@ -- `BEGIN` happens here: {{ run_hooks(pre_hooks, inside_transaction=True) }} - -- build model + --build model {% call statement('main') -%} - {%- if non_destructive_mode -%} - {%- if old_relation is not none -%} - {{ create_table_as(create_as_temporary, intermediate_relation, sql) }} - - {% set dest_columns = adapter.get_columns_in_relation(old_relation) %} - {% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %} - - insert into {{ target_relation }} ({{ dest_cols_csv }}) ( - select {{ dest_cols_csv }} - from {{ intermediate_relation.include(database=(not create_as_temporary), schema=(not create_as_temporary)) }} - ); - {%- else -%} - {{ create_table_as(create_as_temporary, target_relation, sql) }} - {%- endif -%} - {%- else -%} - {{ create_table_as(create_as_temporary, intermediate_relation, sql) }} - {%- endif -%} - {%- endcall %} - - -- cleanup - {% if non_destructive_mode -%} - -- noop - {%- else -%} + -- we can leverage Snowflake create or replace table here to achieve an atomic replace. {% if old_relation is not none %} + {# -- I'm preserving one of the old checks here for a view, and to make sure Snowflake doesn't + -- complain that we're running a replace table on a view. #} {% if old_relation.type == 'view' %} - {#-- This is the primary difference between Snowflake and Redshift. Renaming this view - -- would cause an error if the view has become invalid due to upstream schema changes #} {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} {{ drop_relation_if_exists(old_relation) }} - {% else %} - {{ adapter.rename_relation(target_relation, backup_relation) }} {% endif %} {% endif %} + -- + {{create_or_replace_table_as(target_relation, sql)}} + {%- endcall %} - {{ adapter.rename_relation(intermediate_relation, target_relation) }} - {%- endif %} + -- skiping all previous renames here since they are not needed in Snowflake {{ run_hooks(post_hooks, inside_transaction=True) }} @@ -94,6 +74,8 @@ {{ adapter.commit() }} -- finally, drop the existing/backup relation after the commit + {# -- TODO: Check with Drew wether this backup_relation gets used at all should this materialisation + -- fail #} {{ drop_relation_if_exists(backup_relation) }} {{ run_hooks(post_hooks, inside_transaction=False) }} From a35ad186e38e0d0fe5d59914c646ab8033a48717 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 20 Apr 2019 18:13:37 +0100 Subject: [PATCH 05/29] implement insert when no unique key and full refresh solution --- .../macros/materializations/incremental.sql | 44 +++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 99d9f2e941d..23cba1f4663 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -16,8 +16,10 @@ {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} - {%- set should_drop = (full_refresh_mode or exists_not_as_table) -%} - {%- set force_create = (full_refresh_mode) -%} + -- FIXME: Double check the operators syntax as non-destrive always has to be false here + {%- set should_drop = (full_refresh_mode or exists_not_as_table and not non_destructive_mode) -%} + {%- set force_create_or_replace = full_refresh_mode -%} + -- setup {% if old_relation is none -%} @@ -45,15 +47,38 @@ {{ run_hooks(pre_hooks, inside_transaction=True) }} -- build model - {% if force_create or old_relation is none -%} + {% if force_create_or_replace or old_relation is none -%} {%- call statement('main') -%} - {{ create_table_as(False, target_relation, sql) }} + + {# -- create or replace logic because we're in a full refresh or table is non existant. #} + {% if old_relation is not none and old_relation.type == 'view' %} + {# -- I'm preserving one of the old checks here for a view, and to make sure Snowflake doesn't + -- complain that we're running a replace table on a view. #} + {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} + {{ adapter.drop_relation(old_relation) }} + {% endif %} + + {# -- now create or replace the table because we're in full-refresh #} + {{create_or_replace_table_as(target_relation, source_sql)}} {%- endcall -%} + {%- else -%} - {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} - {%- call statement('main') -%} - {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} - {% endcall %} + {# -- here is the incremental part #} + {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} + {%- call statement('main') -%} + {%- if unique_key is none -%} + {# -- if no unique_key is provided run regular insert as Snowflake may complain #} + insert into {{ target_relation }} ({{ dest_columns }}) + ( + select {{ dest_columns }} + from {{ source_sql }} + ); + {%- else -%} + {# -- use merge if a unique key is provided #} + {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} + {%- endif -%} + {% endcall %} + {%- endif %} {{ run_hooks(post_hooks, inside_transaction=True) }} @@ -61,6 +86,9 @@ -- `COMMIT` happens here {{ adapter.commit() }} +{# -- FIXME: There doesn't seem to be any backup relation created here. Need to check whether we +-- should have one #} + {{ run_hooks(post_hooks, inside_transaction=False) }} {%- endmaterialization %} From 2d5525e887c3aa5299b3eb79cdbed6752a240bae Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 20 Apr 2019 19:09:46 +0100 Subject: [PATCH 06/29] add some logging --- .../dbt/include/snowflake/macros/materializations/table.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index d8dd4d4ffb8..7101055f74f 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -63,6 +63,7 @@ {% endif %} {% endif %} -- + {{ log("Using snowflake create or replace method, from TABLE view.") }} {{create_or_replace_table_as(target_relation, sql)}} {%- endcall %} From 6a104c193805a703ceeb9009a6f35cc76a23b684 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 20 Apr 2019 19:25:16 +0100 Subject: [PATCH 07/29] test --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ffa6cef5164..a41e4744af5 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(fname): package_name = "dbt" -package_version = "0.13.0a1" +package_version = "0.13.0a2" description = """dbt (data build tool) is a command line tool that helps \ analysts and engineers transform data in their warehouse more effectively""" From 91d869e61adb64d354eaa5ec7dd2061889cc7788 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 20 Apr 2019 19:29:01 +0100 Subject: [PATCH 08/29] revert test --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a41e4744af5..ffa6cef5164 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(fname): package_name = "dbt" -package_version = "0.13.0a2" +package_version = "0.13.0a1" description = """dbt (data build tool) is a command line tool that helps \ analysts and engineers transform data in their warehouse more effectively""" From dacce7c8649c5c1c2bae0408d2d3a05e0d5bf000 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 22 Apr 2019 01:27:25 +0200 Subject: [PATCH 09/29] fix insert cols call and temp workaround call of snowflake --- .../macros/materializations/incremental.sql | 28 ++++++++----------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 23cba1f4663..c87d5e8fd82 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -16,18 +16,11 @@ {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} - -- FIXME: Double check the operators syntax as non-destrive always has to be false here - {%- set should_drop = (full_refresh_mode or exists_not_as_table and not non_destructive_mode) -%} + {%- set force_create_or_replace = full_refresh_mode -%} -- setup - {% if old_relation is none -%} - -- noop - {%- elif should_drop -%} - {{ adapter.drop_relation(old_relation) }} - {%- set old_relation = none -%} - {%- endif %} {% set source_sql -%} {#-- wrap sql in parens to make it a subquery --#} @@ -57,24 +50,28 @@ {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} {{ adapter.drop_relation(old_relation) }} {% endif %} - {# -- now create or replace the table because we're in full-refresh #} - {{create_or_replace_table_as(target_relation, source_sql)}} + {{ log("Creating or Replacing table") }} + {{snowflake__create_or_replace_table_as(target_relation, source_sql)}} {%- endcall -%} {%- else -%} {# -- here is the incremental part #} {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} + {% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %} {%- call statement('main') -%} + {%- if unique_key is none -%} - {# -- if no unique_key is provided run regular insert as Snowflake may complain #} - insert into {{ target_relation }} ({{ dest_columns }}) + {{ log("Incremental Run happening via INSERT") }} + {# -- if no unique_key is provided run regular insert as Snowflake may complain #} + insert into {{ target_relation }} ({{ dest_cols_csv }}) ( - select {{ dest_columns }} + select {{ dest_cols_csv }} from {{ source_sql }} ); {%- else -%} - {# -- use merge if a unique key is provided #} + {# -- use merge if a unique key is provided #} + {{ log("Incremental Run happening via MERGE") }} {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} {%- endif -%} {% endcall %} @@ -86,9 +83,6 @@ -- `COMMIT` happens here {{ adapter.commit() }} -{# -- FIXME: There doesn't seem to be any backup relation created here. Need to check whether we --- should have one #} - {{ run_hooks(post_hooks, inside_transaction=False) }} {%- endmaterialization %} From 54c02ef4b4bcb9a4bbd6054a1314ae813bea1c3a Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 22 Apr 2019 01:32:07 +0200 Subject: [PATCH 10/29] some logging and temp call workaround --- .../dbt/include/snowflake/macros/materializations/table.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 7101055f74f..cd6f686e1db 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -63,8 +63,8 @@ {% endif %} {% endif %} -- - {{ log("Using snowflake create or replace method, from TABLE view.") }} - {{create_or_replace_table_as(target_relation, sql)}} + {{ log("Using snowflake create or replace method.") }} + {{snowflake__create_or_replace_table_as(target_relation, sql)}} {%- endcall %} -- skiping all previous renames here since they are not needed in Snowflake From 2830b6a8990fa5573410cba700f9a32a44e1f049 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 22 Apr 2019 11:06:58 +0200 Subject: [PATCH 11/29] make default create or replace macro to allow snowflake adapter pick it up --- .../include/global_project/macros/adapters/common.sql | 11 +++++++++++ .../snowflake/macros/materializations/table.sql | 2 +- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index b14004fa68b..702d555a435 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -60,6 +60,17 @@ ); {% endmacro %} +{% macro create_or_replace_table_as(relation, sql) -%} + {{ adapter_macro('create_or_replace_table_as', relation, sql) }} +{%- endmacro %} + +{% macro default__create_or_replace_table_as(relation, sql) -%} + create or replace table + {{ relation.include(database=(not temporary), schema=(not temporary)) }} + as ( + {{ sql }} + ); +{% endmacro %} {% macro create_view_as(relation, sql) -%} {{ adapter_macro('create_view_as', relation, sql) }} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index cd6f686e1db..c4edeb3da4e 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -64,7 +64,7 @@ {% endif %} -- {{ log("Using snowflake create or replace method.") }} - {{snowflake__create_or_replace_table_as(target_relation, sql)}} + {{create_or_replace_table_as(target_relation, sql)}} {%- endcall %} -- skiping all previous renames here since they are not needed in Snowflake From 95c9f76e321e9e6fad1d6e1fa3bed4ef339726fd Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 22 Apr 2019 11:10:02 +0200 Subject: [PATCH 12/29] remove snowflake__ direct call in incremental --- .../include/snowflake/macros/materializations/incremental.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index c87d5e8fd82..c7d3f0a4abe 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -52,7 +52,7 @@ {% endif %} {# -- now create or replace the table because we're in full-refresh #} {{ log("Creating or Replacing table") }} - {{snowflake__create_or_replace_table_as(target_relation, source_sql)}} + {{create_or_replace_table_as(target_relation, source_sql)}} {%- endcall -%} {%- else -%} From 04333699a05ac152f3df83a276c5e3a5638bb1be Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 22 Apr 2019 15:49:59 +0200 Subject: [PATCH 13/29] remove testing logging messages --- .../include/snowflake/macros/materializations/incremental.sql | 3 --- .../dbt/include/snowflake/macros/materializations/table.sql | 2 -- 2 files changed, 5 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index c7d3f0a4abe..d88aea9feec 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -51,7 +51,6 @@ {{ adapter.drop_relation(old_relation) }} {% endif %} {# -- now create or replace the table because we're in full-refresh #} - {{ log("Creating or Replacing table") }} {{create_or_replace_table_as(target_relation, source_sql)}} {%- endcall -%} @@ -62,7 +61,6 @@ {%- call statement('main') -%} {%- if unique_key is none -%} - {{ log("Incremental Run happening via INSERT") }} {# -- if no unique_key is provided run regular insert as Snowflake may complain #} insert into {{ target_relation }} ({{ dest_cols_csv }}) ( @@ -71,7 +69,6 @@ ); {%- else -%} {# -- use merge if a unique key is provided #} - {{ log("Incremental Run happening via MERGE") }} {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} {%- endif -%} {% endcall %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index c4edeb3da4e..71f2faf74b2 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -62,8 +62,6 @@ {{ drop_relation_if_exists(old_relation) }} {% endif %} {% endif %} - -- - {{ log("Using snowflake create or replace method.") }} {{create_or_replace_table_as(target_relation, sql)}} {%- endcall %} From e83edd30debfb2595e19dff2ab8b40e7cabfdd1d Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 22 Apr 2019 16:02:03 +0200 Subject: [PATCH 14/29] fixme/todo regarding non destructive flag --- .../dbt/include/snowflake/macros/materializations/table.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 71f2faf74b2..2b09ed23b2f 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -35,6 +35,8 @@ {{ adapter.drop_relation(intermediate_relation) }} -- setup: if the target relation already exists, truncate or drop it (if it's a view) + {# TODO: Would like to check this. New materialsiation makes these tests a bit moot. We should + be able to deprecate non-destructive flag all together here. #} {% if non_destructive_mode -%} {% if exists_as_table -%} --noop we can do away with this step all together since the table can be replaced in Snowflake. From 9591b86430f9000b2e51aa37a9311b698e6e0355 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Fri, 26 Apr 2019 12:43:11 +0200 Subject: [PATCH 15/29] (PR fdbk) rm extra macro --- .../global_project/macros/adapters/common.sql | 4 ++-- .../dbt/include/snowflake/macros/adapters.sql | 6 +++--- .../macros/materializations/incremental.sql | 12 ++++++------ .../snowflake/macros/materializations/table.sql | 15 +++------------ 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index 702d555a435..254cc5ba46b 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -60,7 +60,7 @@ ); {% endmacro %} -{% macro create_or_replace_table_as(relation, sql) -%} +{# {% macro create_or_replace_table_as(relation, sql) -%} {{ adapter_macro('create_or_replace_table_as', relation, sql) }} {%- endmacro %} @@ -70,7 +70,7 @@ as ( {{ sql }} ); -{% endmacro %} +{% endmacro %} #} {% macro create_view_as(relation, sql) -%} {{ adapter_macro('create_view_as', relation, sql) }} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index 1feb36cc09f..c608f496916 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -5,7 +5,7 @@ {%- set transient = config.get('transient', default=true) -%} - create {% if temporary -%} + create or replace {% if temporary -%} temporary {%- elif transient -%} transient @@ -15,7 +15,7 @@ ); {% endmacro %} -{% macro snowflake__create_or_replace_table_as(relation, sql) -%} +{# {% macro snowflake__create_or_replace_table_as(relation, sql) -%} {% if temporary %} use schema {{ adapter.quote_as_configured(schema, 'schema') }}; {% endif %} @@ -30,7 +30,7 @@ as ( {{ sql }} ); -{% endmacro %} +{% endmacro %} #} {% macro snowflake__create_view_as(relation, sql) -%} create or replace view {{ relation }} as ( diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index d88aea9feec..f8f19c1a7e9 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -17,13 +17,13 @@ {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} - {%- set force_create_or_replace = full_refresh_mode -%} + {%- set force_create = full_refresh_mode -%} -- setup - {% set source_sql -%} - {#-- wrap sql in parens to make it a subquery --#} + {% set source_sql = sql -%} + {# -- wrap sql in parens to make it a subquery -- ( select * from ( {{ sql }} @@ -32,7 +32,7 @@ where ({{ sql_where }}) or ({{ sql_where }}) is null {% endif %} ) - {%- endset -%} + {%- endset -%} #} {{ run_hooks(pre_hooks, inside_transaction=False) }} @@ -40,7 +40,7 @@ {{ run_hooks(pre_hooks, inside_transaction=True) }} -- build model - {% if force_create_or_replace or old_relation is none -%} + {% if force_create or old_relation is none -%} {%- call statement('main') -%} {# -- create or replace logic because we're in a full refresh or table is non existant. #} @@ -51,7 +51,7 @@ {{ adapter.drop_relation(old_relation) }} {% endif %} {# -- now create or replace the table because we're in full-refresh #} - {{create_or_replace_table_as(target_relation, source_sql)}} + {{create_table_as(target_relation, source_sql)}} {%- endcall -%} {%- else -%} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 2b09ed23b2f..0648984e353 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -56,13 +56,9 @@ --build model {% call statement('main') -%} -- we can leverage Snowflake create or replace table here to achieve an atomic replace. - {% if old_relation is not none %} - {# -- I'm preserving one of the old checks here for a view, and to make sure Snowflake doesn't - -- complain that we're running a replace table on a view. #} - {% if old_relation.type == 'view' %} - {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} - {{ drop_relation_if_exists(old_relation) }} - {% endif %} + {% if old_relation is not none and old_relation.type == 'view' %} + {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} + {{ drop_relation_if_exists(old_relation) }} {% endif %} {{create_or_replace_table_as(target_relation, sql)}} {%- endcall %} @@ -74,10 +70,5 @@ -- `COMMIT` happens here {{ adapter.commit() }} - -- finally, drop the existing/backup relation after the commit - {# -- TODO: Check with Drew wether this backup_relation gets used at all should this materialisation - -- fail #} - {{ drop_relation_if_exists(backup_relation) }} - {{ run_hooks(post_hooks, inside_transaction=False) }} {% endmaterialization %} From f99efbf72ee51047f87f831c733f2f0459df953f Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Fri, 26 Apr 2019 20:04:21 +0200 Subject: [PATCH 16/29] remove references to temp and backup relations --- .../macros/materializations/incremental.sql | 4 +--- .../snowflake/macros/materializations/table.sql | 17 +---------------- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index f8f19c1a7e9..f5323d80c43 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -3,10 +3,8 @@ {%- set unique_key = config.get('unique_key') -%} {%- set sql_where = config.get('sql_where') -%} - {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} - {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} @@ -14,8 +12,8 @@ {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} + {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} {%- set force_create = full_refresh_mode -%} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 0648984e353..7a8c174d234 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -1,31 +1,16 @@ {% materialization table, adapter='snowflake' %} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = identifier + '__dbt_tmp' -%} - {%- set backup_identifier = identifier + '__dbt_backup' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} - {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier, - schema=schema, - database=database, type='table') -%} - /* + /* --TODO: Is this still up to date? See ../view/view.sql for more information about this relation. */ - -- drop the backup relation if it exists, then make a new one that uses the old relation's type - {%- set backup_relation = adapter.get_relation(database=database, schema=schema, identifier=backup_identifier) -%} - {% if backup_relation is not none -%} - {{ adapter.drop_relation(backup_relation) }} - {%- endif %} - {%- set backup_relation = api.Relation.create(identifier=backup_identifier, - schema=schema, - database=database, - type=(old_relation.type or 'table')) -%} - {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} {%- set create_as_temporary = (exists_as_table and non_destructive_mode) -%} From 5c1c5880b6aa48fc38ee2900f9fbfcef888cad1c Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Fri, 26 Apr 2019 20:10:12 +0200 Subject: [PATCH 17/29] more explicit comments and quick formatting --- .../include/snowflake/macros/materializations/table.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 7a8c174d234..8fbb6193582 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -40,15 +40,15 @@ --build model {% call statement('main') -%} - -- we can leverage Snowflake create or replace table here to achieve an atomic replace. + {# Drop the relation if it was a view to essencially "convert" it in a table. This does lead to + downtime but I think it makes sense and should happen. Impact will be minimal I suspect. #} {% if old_relation is not none and old_relation.type == 'view' %} {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} {{ drop_relation_if_exists(old_relation) }} {% endif %} - {{create_or_replace_table_as(target_relation, sql)}} - {%- endcall %} - -- skiping all previous renames here since they are not needed in Snowflake + {{ create_or_replace_table_as(target_relation, sql) }} + {%- endcall %} {{ run_hooks(post_hooks, inside_transaction=True) }} From 43a9db55b103b61be39da6a8b204b6751b891b3b Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Apr 2019 14:35:10 +0200 Subject: [PATCH 18/29] quick todo marker --- .../dbt/include/snowflake/macros/materializations/table.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 8fbb6193582..31f2139ae62 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -19,6 +19,7 @@ -- drop the temp relations if they exists for some reason {{ adapter.drop_relation(intermediate_relation) }} + --- FIXME: Do we want to put this block all together? I think it serves no purpose, but need to check -- setup: if the target relation already exists, truncate or drop it (if it's a view) {# TODO: Would like to check this. New materialsiation makes these tests a bit moot. We should be able to deprecate non-destructive flag all together here. #} From 4f62978de56b2e7f09778dedd9d0f6c440dd59c7 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Apr 2019 06:46:41 -0700 Subject: [PATCH 19/29] Revert "Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace" This reverts commit 3ab8238cfbdbfe6ba7ca2cd33617a82ba8e44c9f, reversing changes made to 43a9db55b103b61be39da6a8b204b6751b891b3b. --- .bumpversion.cfg | 4 +- .circleci/config.yml | 2 +- .coveragerc | 2 +- CHANGELOG.md | 77 +-- CONTRIBUTING.md | 4 +- Makefile | 2 +- RELEASE.md | 17 +- appveyor.yml | 64 ++ azure-pipelines.yml | 125 ---- core/dbt/adapters/base/__init__.py | 13 +- core/dbt/adapters/base/connections.py | 262 ++++---- core/dbt/adapters/base/impl.py | 327 +++------- core/dbt/adapters/base/meta.py | 27 +- core/dbt/adapters/base/plugin.py | 2 + core/dbt/adapters/base/relation.py | 141 +--- core/dbt/adapters/factory.py | 4 +- core/dbt/adapters/sql/__init__.py | 5 +- core/dbt/adapters/sql/connections.py | 53 +- core/dbt/adapters/sql/impl.py | 92 ++- core/dbt/api/object.py | 4 +- core/dbt/clients/_jinja_blocks.py | 481 -------------- core/dbt/clients/jinja.py | 10 +- core/dbt/clients/registry.py | 4 - core/dbt/clients/system.py | 12 +- core/dbt/compat.py | 9 +- core/dbt/compilation.py | 35 +- core/dbt/config/__init__.py | 27 +- core/dbt/config/profile.py | 8 +- core/dbt/config/project.py | 17 +- core/dbt/config/runtime.py | 17 +- core/dbt/context/common.py | 57 +- core/dbt/context/parser.py | 18 +- core/dbt/context/runtime.py | 5 +- core/dbt/contracts/connection.py | 2 + core/dbt/contracts/graph/compiled.py | 3 + core/dbt/contracts/graph/manifest.py | 16 +- core/dbt/contracts/graph/parsed.py | 95 +-- core/dbt/contracts/graph/unparsed.py | 28 +- core/dbt/contracts/project.py | 4 - core/dbt/contracts/results.py | 88 +-- core/dbt/deprecations.py | 12 + core/dbt/exceptions.py | 120 +--- core/dbt/flags.py | 5 +- core/dbt/graph/selector.py | 1 + core/dbt/hooks.py | 2 +- .../global_project/macros/adapters/common.sql | 22 +- .../macros/etc/get_custom_alias.sql | 26 - .../materializations/archive/archive.sql | 251 ++----- .../macros/materializations/helpers.sql | 4 +- .../incremental/incremental.sql | 17 +- .../macros/materializations/table/table.sql | 4 +- .../macros/materializations/view/view.sql | 4 +- core/dbt/linker.py | 42 +- core/dbt/loader.py | 25 +- core/dbt/logger.py | 115 +--- core/dbt/main.py | 304 ++++----- core/dbt/node_runners.py | 264 +++----- core/dbt/node_types.py | 3 - core/dbt/parser/__init__.py | 2 - core/dbt/parser/analysis.py | 5 - core/dbt/parser/archives.py | 97 +-- core/dbt/parser/base.py | 71 +- core/dbt/parser/base_sql.py | 84 +-- core/dbt/parser/docs.py | 14 +- core/dbt/parser/hooks.py | 4 +- core/dbt/parser/macros.py | 2 + core/dbt/parser/schemas.py | 59 +- core/dbt/parser/source_config.py | 5 +- core/dbt/parser/util.py | 195 +++--- core/dbt/rpc.py | 395 ------------ core/dbt/schema.py | 177 +++++ core/dbt/semver.py | 12 +- core/dbt/ssh_forward.py | 10 + core/dbt/task/archive.py | 7 +- core/dbt/task/base.py | 127 ---- core/dbt/task/base_task.py | 13 + core/dbt/task/clean.py | 4 +- core/dbt/task/compile.py | 145 +---- core/dbt/task/debug.py | 8 +- core/dbt/task/deps.py | 4 +- core/dbt/task/freshness.py | 4 +- core/dbt/task/generate.py | 8 +- core/dbt/task/init.py | 4 +- core/dbt/task/rpc_server.py | 83 --- core/dbt/task/run.py | 47 +- core/dbt/task/run_operation.py | 64 -- core/dbt/task/runnable.py | 121 +--- core/dbt/task/serve.py | 6 +- core/dbt/task/test.py | 17 +- core/dbt/ui/printer.py | 15 +- core/dbt/utils.py | 26 +- core/dbt/version.py | 3 +- .../invocation_env_context.json | 0 .../invocation_event.json | 0 .../platform_context.json | 0 .../run_model_context.json | 0 core/setup.py | 6 +- dev_requirements.txt | 7 +- docker-compose.yml | 2 +- etc/dbt-horizontal.png | Bin 8968 -> 18063 bytes .../dbt/adapters/bigquery/__init__.py | 5 +- .../dbt/adapters/bigquery/connections.py | 72 +-- .../bigquery/dbt/adapters/bigquery/impl.py | 118 ++-- .../dbt/adapters/bigquery/relation.py | 102 +-- .../dbt/include/bigquery/macros/adapters.sql | 14 +- .../macros/materializations/archive.sql | 11 +- .../macros/materializations/incremental.sql | 8 +- plugins/bigquery/setup.py | 2 +- .../dbt/adapters/postgres/__init__.py | 3 +- .../dbt/adapters/postgres/connections.py | 15 +- .../postgres/dbt/adapters/postgres/impl.py | 29 +- .../dbt/include/postgres/macros/adapters.sql | 12 +- .../dbt/include/postgres/macros/catalog.sql | 8 +- plugins/postgres/setup.py | 2 +- .../dbt/adapters/redshift/__init__.py | 2 +- .../dbt/adapters/redshift/connections.py | 20 +- .../redshift/dbt/adapters/redshift/impl.py | 7 +- .../redshift/dbt/include/redshift/__init__.py | 2 +- .../dbt/include/redshift/macros/adapters.sql | 12 +- .../dbt/include/redshift/macros/catalog.sql | 14 +- plugins/redshift/setup.py | 2 +- .../dbt/adapters/snowflake/__init__.py | 4 +- .../dbt/adapters/snowflake/connections.py | 34 +- .../snowflake/dbt/adapters/snowflake/impl.py | 4 + .../dbt/adapters/snowflake/relation.py | 3 +- .../dbt/include/snowflake/macros/adapters.sql | 21 +- .../dbt/include/snowflake/macros/catalog.sql | 9 +- plugins/snowflake/setup.py | 2 +- scripts/build-sdists.sh | 23 - setup.py | 2 +- test.env.sample | 2 +- .../001_simple_copy_test/test_simple_copy.py | 1 + .../invalidate_bigquery.sql | 2 +- .../invalidate_postgres.sql | 25 +- .../invalidate_snowflake.sql | 10 +- .../models/ref_archive.sql | 1 - .../004_simple_archive_test/seed.sql | 225 ++----- .../004_simple_archive_test/seed_bq.sql | 82 +-- .../test-archives-bq/archive.sql | 14 - .../test-archives-invalid/archive.sql | 12 - .../test-archives-pg/archive.sql | 14 - .../test-archives-select/archives.sql | 45 -- .../test-check-col-archives-bq/archive.sql | 27 - .../test-check-col-archives/archive.sql | 28 - .../test_simple_archive.py | 310 ++------- .../004_simple_archive_test/update.sql | 278 ++------ .../004_simple_archive_test/update_bq.sql | 94 +-- .../test_seed_type_override.py | 9 +- .../005_simple_seed_test/test_simple_seed.py | 15 +- .../test_local_dependency.py | 13 +- .../test_simple_dependency.py | 11 +- .../test_simple_dependency_with_configs.py | 11 +- .../models/users_rollup_dependency.sql | 5 - .../test_graph_selection.py | 91 ++- .../test_schema_test_graph_selection.py | 31 +- .../test_tag_selection.py | 10 +- .../ephemeral/ephemeral.sql | 4 - .../ephemeral/schema.yml | 8 - .../test_schema_v2_tests.py | 41 +- .../009_data_tests_test/test_data_tests.py | 7 +- .../010_permission_tests/test_permissions.py | 7 +- .../test_invalid_models.py | 7 +- .../models/sql_where.sql | 3 + .../test_deprecations.py | 4 +- .../test_context_vars.py | 15 +- .../014_hook_tests/test_model_hooks.py | 19 +- .../014_hook_tests/test_model_hooks_bq.py | 7 +- .../014_hook_tests/test_run_hooks.py | 9 +- .../014_hook_tests/test_run_hooks_bq.py | 7 +- .../test_cli_invocation.py | 11 +- .../016_macro_tests/test_macros.py | 9 +- .../test_runtime_materialization.py | 13 +- .../018_adapter_ddl_tests/test_adapter_ddl.py | 5 +- .../019_analysis_tests/test_analyses.py | 5 +- .../020_ephemeral_test/test_ephemeral.py | 9 +- .../021_concurrency_test/test_concurrency.py | 7 +- .../test_bigquery_adapter_functions.py | 5 +- .../test_bigquery_date_partitioning.py | 5 +- .../023_exit_codes_test/test_exit_codes.py | 25 +- .../024_custom_schema_test/models/view_3.sql | 2 +- .../test_custom_schema.py | 49 +- .../test_duplicate_model.py | 16 +- .../025_timezones_test/test_timezones.py | 7 +- .../026_aliases_test/test_aliases.py | 13 +- .../integration/027_cycle_test/test_cycles.py | 7 +- .../028_cli_vars/test_cli_var_override.py | 7 +- .../integration/028_cli_vars/test_cli_vars.py | 9 +- .../ref_models/schema.yml | 5 - .../test_docs_generate.py | 132 ++-- .../test_concurrent_transaction.py | 19 +- .../033_event_tracking_test/test_events.py | 39 +- .../test_late_binding_view.py | 1 + .../test_external_reference.py | 9 +- .../test_override_database.py | 15 +- .../042_sources_test/macros/macro.sql | 7 - .../models/ephemeral_model.sql | 3 - .../042_sources_test/models/schema.yml | 4 - .../042_sources_test/test_sources.py | 610 +----------------- .../043_custom_aliases_test/macros/macros.sql | 21 - .../043_custom_aliases_test/models/model1.sql | 3 - .../043_custom_aliases_test/models/model2.sql | 3 - .../043_custom_aliases_test/models/schema.yml | 15 - .../test_custom_aliases.py | 23 - .../macros/happy_macros.sql | 24 - .../macros/sad_macros.sql | 7 - .../044_run_operations_test/models/model.sql | 1 - .../test_run_operations.py | 58 -- test/integration/base.py | 137 ++-- test/unit/test_bigquery_adapter.py | 10 +- test/unit/test_config.py | 30 +- test/unit/test_deps.py | 9 +- test/unit/test_docs_blocks.py | 30 +- test/unit/test_graph.py | 6 +- test/unit/test_jinja.py | 316 --------- test/unit/test_parser.py | 139 ++-- test/unit/test_postgres_adapter.py | 50 +- test/unit/test_redshift_adapter.py | 26 +- .../{test_base_column.py => test_schema.py} | 9 +- test/unit/test_snowflake_adapter.py | 43 +- test/unit/test_system_client.py | 7 +- test/unit/utils.py | 12 +- tox.ini | 100 +-- 222 files changed, 2481 insertions(+), 6949 deletions(-) create mode 100644 appveyor.yml delete mode 100644 azure-pipelines.yml delete mode 100644 core/dbt/clients/_jinja_blocks.py delete mode 100644 core/dbt/include/global_project/macros/etc/get_custom_alias.sql delete mode 100644 core/dbt/rpc.py create mode 100644 core/dbt/schema.py create mode 100644 core/dbt/ssh_forward.py delete mode 100644 core/dbt/task/base.py create mode 100644 core/dbt/task/base_task.py delete mode 100644 core/dbt/task/rpc_server.py delete mode 100644 core/dbt/task/run_operation.py rename {events => core/events}/schemas/com.fishtownanalytics/invocation_env_context.json (100%) rename {events => core/events}/schemas/com.fishtownanalytics/invocation_event.json (100%) rename {events => core/events}/schemas/com.fishtownanalytics/platform_context.json (100%) rename {events => core/events}/schemas/com.fishtownanalytics/run_model_context.json (100%) delete mode 100755 scripts/build-sdists.sh delete mode 100644 test/integration/004_simple_archive_test/models/ref_archive.sql delete mode 100644 test/integration/004_simple_archive_test/test-archives-bq/archive.sql delete mode 100644 test/integration/004_simple_archive_test/test-archives-invalid/archive.sql delete mode 100644 test/integration/004_simple_archive_test/test-archives-pg/archive.sql delete mode 100644 test/integration/004_simple_archive_test/test-archives-select/archives.sql delete mode 100644 test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql delete mode 100644 test/integration/004_simple_archive_test/test-check-col-archives/archive.sql delete mode 100644 test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql delete mode 100644 test/integration/008_schema_tests_test/ephemeral/ephemeral.sql delete mode 100644 test/integration/008_schema_tests_test/ephemeral/schema.yml create mode 100644 test/integration/012_deprecation_tests/models/sql_where.sql delete mode 100644 test/integration/042_sources_test/macros/macro.sql delete mode 100644 test/integration/042_sources_test/models/ephemeral_model.sql delete mode 100644 test/integration/043_custom_aliases_test/macros/macros.sql delete mode 100644 test/integration/043_custom_aliases_test/models/model1.sql delete mode 100644 test/integration/043_custom_aliases_test/models/model2.sql delete mode 100644 test/integration/043_custom_aliases_test/models/schema.yml delete mode 100644 test/integration/043_custom_aliases_test/test_custom_aliases.py delete mode 100644 test/integration/044_run_operations_test/macros/happy_macros.sql delete mode 100644 test/integration/044_run_operations_test/macros/sad_macros.sql delete mode 100644 test/integration/044_run_operations_test/models/model.sql delete mode 100644 test/integration/044_run_operations_test/test_run_operations.py rename test/unit/{test_base_column.py => test_schema.py} (87%) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 1afdbf592fd..00c2666d1f2 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.13.0 +current_version = 0.13.0a1 parse = (?P\d+) \.(?P\d+) \.(?P\d+) @@ -20,8 +20,6 @@ values = [bumpversion:part:num] first_value = 1 -[bumpversion:file:setup.py] - [bumpversion:file:core/setup.py] [bumpversion:file:core/dbt/version.py] diff --git a/.circleci/config.yml b/.circleci/config.yml index 6d0474d57e6..fa3942ffeef 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -19,7 +19,7 @@ jobs: PGUSER: root PGPASSWORD: password PGDATABASE: postgres - - run: tox -e flake8,unit-py27,unit-py36 + - run: tox -e pep8,unit-py27,unit-py36 integration-postgres-py36: docker: *test_and_postgres steps: diff --git a/.coveragerc b/.coveragerc index 5233b856876..18244411816 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,4 @@ [report] include = core/dbt/* - plugins/*/dbt/* + plugins/adapters/dbt/* diff --git a/CHANGELOG.md b/CHANGELOG.md index 4400f7af66e..41a5fa0db2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,71 +1,20 @@ -## dbt 0.13.0 - Stephen Girard (March 21, 2019) +## dbt dev/stephen-girard (0.13.0? - To be released) -### Overview +## Overview -This release provides [a stable API for building new adapters](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter) and reimplements dbt's adapters as "plugins". Additionally, a new adapter for [Presto](https://github.com/fishtown-analytics/dbt-presto) was added using this architecture. Beyond adapters, this release of dbt also includes [Sources](https://docs.getdbt.com/v0.13/docs/using-sources) which can be used to document and test source data tables. See the full list of features added in 0.13.0 below. +This release makes dbt and its adapters into a core-and-plugin architecture. ### Breaking Changes -- version 1 schema.yml specs are no longer implemented. Please use the version 2 spec instead ([migration guide](https://docs.getdbt.com/docs/upgrading-from-0-10-to-0-11#section-schema-yml-v2-syntax)) -- `{{this}}` is no longer implemented for `on-run-start` and `on-run-end` hooks. Use `{{ target }}` or an [`on-run-end` context variable](https://docs.getdbt.com/reference#schemas) instead ([#1176](https://github.com/fishtown-analytics/dbt/pull/1176), implementing [#878](https://github.com/fishtown-analytics/dbt/issues/878)) -- A number of materialization-specific adapter methods have changed in breaking ways. If you use these adapter methods in your macros or materializations, you may need to update your code accordingly. - - query_for_existing - **removed**, use [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) instead. - - [get_missing_columns](https://docs.getdbt.com/v0.13/reference#adapter-get-missing-columns) - changed to take `Relation`s instead of schemas and identifiers - - [expand_target_column_types](https://docs.getdbt.com/v0.13/reference#adapter-expand-target-column-types) - changed to take a `Relation` instead of schema, identifier - - [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) - added a `database` argument - - [create_schema](https://docs.getdbt.com/v0.13/reference#adapter-create-schema) - added a `database` argument - - [drop_schema](https://docs.getdbt.com/v0.13/reference#adapter-drop-schema) - added a `database` argument - -### Deprecations -- The following adapter methods are now deprecated, and will be removed in a future release: - - get_columns_in_table - deprecated in favor of [get_columns_in_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-columns-in-relation) - - already_exists - deprecated in favor of [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) - -### Features -- Add `source`s to dbt, use them to calculate source data freshness ([docs](https://docs.getdbt.com/v0.13/docs/using-sources) ) ([#814](https://github.com/fishtown-analytics/dbt/issues/814), [#1240](https://github.com/fishtown-analytics/dbt/issues/1240)) -- Add support for Presto ([docs](https://docs.getdbt.com/v0.13/docs/profile-presto), [repo](https://github.com/fishtown-analytics/dbt-presto)) ([#1106](https://github.com/fishtown-analytics/dbt/issues/1106)) -- Add `require-dbt-version` option to `dbt_project.yml` to state the supported versions of dbt for packages ([docs](https://docs.getdbt.com/v0.13/docs/requiring-dbt-versions)) ([#581](https://github.com/fishtown-analytics/dbt/issues/581)) -- Add an output line indicating the installed version of dbt to every run ([#1134](https://github.com/fishtown-analytics/dbt/issues/1134)) -- Add a new model selector (`@`) which build models, their children, and their children's parents ([docs](https://docs.getdbt.com/v0.13/reference#section-the-at-operator)) ([#1156](https://github.com/fishtown-analytics/dbt/issues/1156)) -- Add support for Snowflake Key Pair Authentication ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-key-pair-authentication)) ([#1232](https://github.com/fishtown-analytics/dbt/pull/1232)) -- Support SSO Authentication for Snowflake ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-sso-authentication)) ([#1172](https://github.com/fishtown-analytics/dbt/issues/1172)) -- Add support for Snowflake's transient tables ([docs](https://docs.getdbt.com/v0.13/docs/snowflake-configs#section-transient-tables)) ([#946](https://github.com/fishtown-analytics/dbt/issues/946)) -- Capture build timing data in `run_results.json` to visualize project performance ([#1179](https://github.com/fishtown-analytics/dbt/issues/1179)) -- Add CLI flag to toggle warnings as errors ([docs](https://docs.getdbt.com/v0.13/reference#section-treat-warnings-as-errors)) ([#1243](https://github.com/fishtown-analytics/dbt/issues/1243)) -- Add tab completion script for Bash ([docs](https://github.com/fishtown-analytics/dbt-completion.bash)) ([#1197](https://github.com/fishtown-analytics/dbt/issues/1197)) -- Added docs on how to build a new adapter ([docs](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter)) ([#560](https://github.com/fishtown-analytics/dbt/issues/560)) -- Use new logo ([#1349](https://github.com/fishtown-analytics/dbt/pull/1349)) - -### Fixes -- Fix for Postgres character columns treated as string types ([#1194](https://github.com/fishtown-analytics/dbt/issues/1194)) -- Fix for hard to reach edge case in which dbt could hang ([#1223](https://github.com/fishtown-analytics/dbt/issues/1223)) -- Fix for `dbt deps` in non-English shells ([#1222](https://github.com/fishtown-analytics/dbt/issues/1222)) -- Fix for over eager schema creation when models are run with `--models` ([#1239](https://github.com/fishtown-analytics/dbt/issues/1239)) -- Fix for `dbt seed --show` ([#1288](https://github.com/fishtown-analytics/dbt/issues/1288)) -- Fix for `is_incremental()` which should only return `True` if the target relation is a `table` ([#1292](https://github.com/fishtown-analytics/dbt/issues/1292)) -- Fix for error in Snowflake table materializations with custom schemas ([#1316](https://github.com/fishtown-analytics/dbt/issues/1316)) -- Fix errored out concurrent transactions on Redshift and Postgres ([#1356](https://github.com/fishtown-analytics/dbt/pull/1356)) -- Fix out of order execution on model select ([#1354](https://github.com/fishtown-analytics/dbt/issues/1354), [#1355](https://github.com/fishtown-analytics/dbt/pull/1355)) -- Fix adapter macro namespace issue ([#1352](https://github.com/fishtown-analytics/dbt/issues/1352), [#1353](https://github.com/fishtown-analytics/dbt/pull/1353)) -- Re-add CLI flag to toggle warnings as errors ([#1347](https://github.com/fishtown-analytics/dbt/pull/1347)) -- Fix release candidate regression that runs run hooks on test invocations ([#1346](https://github.com/fishtown-analytics/dbt/pull/1346)) -- Fix Snowflake source quoting ([#1338](https://github.com/fishtown-analytics/dbt/pull/1338), [#1317](https://github.com/fishtown-analytics/dbt/issues/1317), [#1332](https://github.com/fishtown-analytics/dbt/issues/1332)) -- Handle unexpected max_loaded_at types ([#1330](https://github.com/fishtown-analytics/dbt/pull/1330)) - -### Under the hood -- Replace all SQL in Python code with Jinja in macros ([#1204](https://github.com/fishtown-analytics/dbt/issues/1204)) -- Loosen restrictions of boto3 dependency ([#1234](https://github.com/fishtown-analytics/dbt/issues/1234)) -- Rewrote Postgres introspective queries to be faster on large databases ([#1192](https://github.com/fishtown-analytics/dbt/issues/1192) - - -### Contributors: -Thanks for your contributions to dbt! - -- [@patrickgoss](https://github.com/patrickgoss) [#1193](https://github.com/fishtown-analytics/dbt/issues/1193) -- [@brianhartsock](https://github.com/brianhartsock) [#1191](https://github.com/fishtown-analytics/dbt/pull/1191) -- [@alexyer](https://github.com/alexyer) [#1232](https://github.com/fishtown-analytics/dbt/pull/1232) -- [@adriank-convoy](https://github.com/adriank-convoy) [#1224](https://github.com/fishtown-analytics/dbt/pull/1224) -- [@mikekaminsky](https://github.com/mikekaminsky) [#1216](https://github.com/fishtown-analytics/dbt/pull/1216) -- [@vijaykiran](https://github.com/vijaykiran) [#1198](https://github.com/fishtown-analytics/dbt/pull/1198), [#1199](https://github.com/fishtown-analytics/dbt/pull/1199) +- '{{this}}' is no longer respected in hooks [#1176](https://github.com/fishtown-analytics/dbt/pull/1176), implementing [#878](https://github.com/fishtown-analytics/dbt/issues/878) +- A number of widely-used adapter methods previously available in macros/materializations have changed in breaking ways: + - get_missing_columns - takes Relations instead of schemas and identifiers + - get_columns_in_table - deprecated in favor of get_columns_in_relation (takes a Relation instead of schema, identifier) + - expand_target_column_types - takes a Relation instead of schema, identifier + - query_for_existing - removed + - get_relation - database parameter added + - create_schema - database parameter added + - drop_schema - database parameter added + - already_exists - deprecated in favor of get_relation (takes a relation) ## dbt 0.12.2 - Grace Kelly (January 8, 2019) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cf58e691b67..8cbfc4facaf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -76,8 +76,8 @@ A short list of tools used in dbt testing that will be helpful to your understan - [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions - [nosetests](http://nose.readthedocs.io/en/latest) to discover/run tests - [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple -- [flake8](https://gitlab.com/pycqa/flake8) for code linting -- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) +- [pep8](https://pep8.readthedocs.io/en/release-1.7.x/) for code linting +- [CircleCI](https://circleci.com/product/) and [Appveyor](https://www.appveyor.com/docs/) If you're unfamiliar with any or all of these, that's fine! You really do not have to have a deep understanding of any of these to get by. diff --git a/Makefile b/Makefile index e0e9a176360..4350de0d140 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ test: test-unit: @echo "Unit test run starting..." - @time docker-compose run test tox -e unit-py27,unit-py36,flake8 + @time docker-compose run test tox -e unit-py27,unit-py36,pep8 test-integration: @echo "Integration test run starting..." diff --git a/RELEASE.md b/RELEASE.md index 22c8cbf3b97..99321e4eb15 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -11,22 +11,21 @@ dbt has three types of branches: #### Git & PyPI 1. Update CHANGELOG.md with the most recent changes -2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it into the appropriate trunk (`X.X.latest`) +2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it. 3. Bump the version using `bumpversion`: - Dry run first by running `bumpversion --new-version ` and checking the diff. If it looks correct, clean up the chanages and move on: - Alpha releases: `bumpversion --commit --tag --new-version 0.10.2a1 num` - Patch releases: `bumpversion --commit --tag --new-version 0.10.2 patch` - Minor releases: `bumpversion --commit --tag --new-version 0.11.0 minor` - Major releases: `bumpversion --commit --tag --new-version 1.0.0 major` -4. (If this is a not a release candidate) Merge to `x.x.latest` and (optionally) `master`. +4. (If this is a not a release candidate) Merge to x.x.latest and master. 5. Update the default branch to the next dev release branch. -6. Build source distributions for all packages by running `./scripts/build-sdists.sh`. Note that this will clean out your `dist/` folder, so if you have important stuff in there, don't run it!!! -7. Deploy to pypi - - `twine upload dist/*` -8. Deploy to homebrew (see below) -9. Deploy to conda-forge (see below) -10. Git release notes (points to changelog) -11. Post to slack (point to changelog) +6. Deploy to pypi + - `python setup.py sdist upload -r pypi` +7. Deploy to homebrew (see below) +8. Deploy to conda-forge (see below) +9. Git release notes (points to changelog) +10. Post to slack (point to changelog) After releasing a new version, it's important to merge the changes back into the other outstanding release branches. This avoids merge conflicts moving forward. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 00000000000..f44c1569dbe --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,64 @@ +version: 1.0.{build}-{branch} + +environment: + # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the + # /E:ON and /V:ON options are not enabled in the batch script intepreter + # See: http://stackoverflow.com/a/13751649/163740 + CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" + TOX_ENV: "pywin" + + matrix: + - PYTHON: "C:\\Python35" + PYTHON_VERSION: "3.5.2" + PYTHON_ARCH: "32" + + #- PYTHON: "C:\\Python35" + # PYTHON_VERSION: "3.5.2" + # PYTHON_ARCH: "32" + + PGUSER: postgres + PGPASSWORD: Password12! + +services: + - postgresql94 + +hosts: + database: 127.0.0.1 + +init: + - PATH=C:\Program Files\PostgreSQL\9.4\bin\;%PATH% + - ps: Set-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all ::1/128 trust" + - ps: Add-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all 127.0.0.1/32 trust" + +install: + # Download setup scripts and unzip + - ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip" + - "7z e master.zip */appveyor/* -oappveyor" + + # Install Python (from the official .msi of http://python.org) and pip when + # not already installed. + - "powershell ./appveyor/install.ps1" + + # Prepend newly installed Python to the PATH of this build (this cannot be + # done from inside the powershell script as it would require to restart + # the parent CMD process). + - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" + + # Check that we have the expected version and architecture for Python + - "python --version" + - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" + +build: false # Not a C# project, build stuff at the test step instead. + +before_test: + - "%CMD_IN_ENV% python -m pip install --upgrade pip" # sometimes on appveyor we get pip 7.x! + - "%CMD_IN_ENV% pip install psycopg2==2.6.2" + - "%CMD_IN_ENV% pip install tox" + +test_script: + - "bash test/setup_db.sh" + + # this is generally a bad idea TODO + - git config --system http.sslverify false + + - "%CMD_IN_ENV% tox -e %TOX_ENV%" diff --git a/azure-pipelines.yml b/azure-pipelines.yml deleted file mode 100644 index b523a14c47d..00000000000 --- a/azure-pipelines.yml +++ /dev/null @@ -1,125 +0,0 @@ -# Python package -# Create and test a Python package on multiple Python versions. -# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: -# https://docs.microsoft.com/azure/devops/pipelines/languages/python - -trigger: - branches: - include: - - master - - dev/* - - pr/* - -jobs: -- job: UnitTest - pool: - vmImage: 'vs2017-win2016' - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.5' - architecture: 'x64' - - - script: python -m pip install --upgrade pip && pip install tox - displayName: 'Install dependencies' - - - script: python -m tox -e pywin-unit - displayName: Run unit tests - -- job: PostgresIntegrationTest - pool: - vmImage: 'vs2017-win2016' - dependsOn: UnitTest - - steps: - - pwsh: | - choco install postgresql --params '/Password:password' --params-global --version 10.6 - Set-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all ::1/128 trust" - Add-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all 127.0.0.1/32 trust" - # the service name is "postgresql-x64-10", conveniently it's both the display name and the actual name - Restart-Service postgresql-x64-10 - - & "C:\program files\postgresql\10\bin\createdb.exe" -U postgres dbt - & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE root WITH PASSWORD 'password';" - & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE root WITH LOGIN;" - & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root WITH GRANT OPTION;" - & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE noaccess WITH PASSWORD 'password' NOSUPERUSER;" - & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE noaccess WITH LOGIN;" - & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CONNECT ON DATABASE dbt TO noaccess;" - displayName: Install postgresql and set up database - - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.5' - architecture: 'x64' - - - script: python -m pip install --upgrade pip && pip install tox - displayName: 'Install dependencies' - - - script: python -m tox -e pywin-postgres - displayName: Run integration tests - -# These three are all similar except secure environment variables, which MUST be passed along to their tasks, -# but there's probably a better way to do this! -- job: SnowflakeIntegrationTest - pool: - vmImage: 'vs2017-win2016' - dependsOn: PostgresIntegrationTest - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.5' - architecture: 'x64' - - - script: python -m pip install --upgrade pip && pip install tox - displayName: 'Install dependencies' - - - script: python -m tox -e pywin-snowflake - env: - SNOWFLAKE_TEST_ACCOUNT: $(SNOWFLAKE_TEST_ACCOUNT) - SNOWFLAKE_TEST_PASSWORD: $(SNOWFLAKE_TEST_PASSWORD) - SNOWFLAKE_TEST_USER: $(SNOWFLAKE_TEST_USER) - SNOWFLAKE_TEST_WAREHOUSE: $(SNOWFLAKE_TEST_WAREHOUSE) - displayName: Run integration tests - -- job: BigQueryIntegrationTest - pool: - vmImage: 'vs2017-win2016' - dependsOn: PostgresIntegrationTest - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.5' - architecture: 'x64' - - - script: python -m pip install --upgrade pip && pip install tox - displayName: 'Install dependencies' - - script: python -m tox -e pywin-bigquery - env: - BIGQUERY_SERVICE_ACCOUNT_JSON: $(BIGQUERY_SERVICE_ACCOUNT_JSON) - displayName: Run integration tests - -- job: RedshiftIntegrationTest - pool: - vmImage: 'vs2017-win2016' - dependsOn: PostgresIntegrationTest - - steps: - - task: UsePythonVersion@0 - inputs: - versionSpec: '3.5' - architecture: 'x64' - - - script: python -m pip install --upgrade pip && pip install tox - displayName: 'Install dependencies' - - - script: python -m tox -e pywin-redshift - env: - REDSHIFT_TEST_DBNAME: $(REDSHIFT_TEST_DBNAME) - REDSHIFT_TEST_PASS: $(REDSHIFT_TEST_PASS) - REDSHIFT_TEST_USER: $(REDSHIFT_TEST_USER) - REDSHIFT_TEST_PORT: $(REDSHIFT_TEST_PORT) - REDSHIFT_TEST_HOST: $(REDSHIFT_TEST_HOST) - displayName: Run integration tests diff --git a/core/dbt/adapters/base/__init__.py b/core/dbt/adapters/base/__init__.py index 5edf237447b..98f96abe1b4 100644 --- a/core/dbt/adapters/base/__init__.py +++ b/core/dbt/adapters/base/__init__.py @@ -1,8 +1,5 @@ -# these are all just exports, #noqa them so flake8 will be happy -from dbt.adapters.base.meta import available # noqa -from dbt.adapters.base.relation import BaseRelation # noqa -from dbt.adapters.base.relation import Column # noqa -from dbt.adapters.base.connections import BaseConnectionManager # noqa -from dbt.adapters.base.connections import Credentials # noqa -from dbt.adapters.base.impl import BaseAdapter # noqa -from dbt.adapters.base.plugin import AdapterPlugin # noqa +from dbt.adapters.base.meta import available +from dbt.adapters.base.relation import BaseRelation +from dbt.adapters.base.connections import BaseConnectionManager, Credentials +from dbt.adapters.base.impl import BaseAdapter +from dbt.adapters.base.plugin import AdapterPlugin diff --git a/core/dbt/adapters/base/connections.py b/core/dbt/adapters/base/connections.py index 8a29e7d9ff7..c65e932454e 100644 --- a/core/dbt/adapters/base/connections.py +++ b/core/dbt/adapters/base/connections.py @@ -1,13 +1,12 @@ import abc import multiprocessing -import os import six import dbt.exceptions import dbt.flags from dbt.api import APIObject -from dbt.compat import abstractclassmethod, get_ident +from dbt.compat import abstractclassmethod from dbt.contracts.connection import Connection from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import translate_aliases @@ -72,7 +71,6 @@ class BaseConnectionManager(object): - open - begin - commit - - clear_transaction - execute You must also set the 'TYPE' class attribute with a class-unique constant @@ -82,95 +80,83 @@ class BaseConnectionManager(object): def __init__(self, profile): self.profile = profile - self.thread_connections = {} + self.in_use = {} + self.available = [] self.lock = multiprocessing.RLock() - - @staticmethod - def get_thread_identifier(): - # note that get_ident() may be re-used, but we should never experience - # that within a single process - return (os.getpid(), get_ident()) - - def get_thread_connection(self): - key = self.get_thread_identifier() - with self.lock: - if key not in self.thread_connections: - raise RuntimeError( - 'connection never acquired for thread {}, have {}' - .format(key, list(self.thread_connections)) - ) - return self.thread_connections[key] - - def get_if_exists(self): - key = self.get_thread_identifier() - with self.lock: - return self.thread_connections.get(key) - - def clear_thread_connection(self): - key = self.get_thread_identifier() - with self.lock: - if key in self.thread_connections: - del self.thread_connections[key] - - def clear_transaction(self): - """Clear any existing transactions.""" - conn = self.get_thread_connection() - if conn is not None: - if conn.transaction_open: - self._rollback(conn) - self.begin() - self.commit() + self._set_initial_connections() + + def _set_initial_connections(self): + self.available = [] + # set up the array of connections in the 'init' state. + # we add a magic number, 2 because there are overhead connections, + # one for pre- and post-run hooks and other misc operations that occur + # before the run starts, and one for integration tests. + for idx in range(self.profile.threads + 2): + self.available.append(self._empty_connection()) + + def _empty_connection(self): + return Connection( + type=self.TYPE, + name=None, + state='init', + transaction_open=False, + handle=None, + credentials=self.profile.credentials + ) @abc.abstractmethod - def exception_handler(self, sql): + def exception_handler(self, sql, connection_name='master'): """Create a context manager that handles exceptions caused by database interactions. :param str sql: The SQL string that the block inside the context manager is executing. + :param str connection_name: The name of the connection being used :return: A context manager that handles exceptions raised by the underlying database. """ raise dbt.exceptions.NotImplementedException( '`exception_handler` is not implemented for this adapter!') - def set_connection_name(self, name=None): + def get(self, name=None): + """This is thread-safe as long as two threads don't use the same + "name". + """ if name is None: # if a name isn't specified, we'll re-use a single handle # named 'master' name = 'master' - conn = self.get_if_exists() - thread_id_key = self.get_thread_identifier() + with self.lock: + if name in self.in_use: + return self.in_use[name] - if conn is None: - conn = Connection( - type=self.TYPE, - name=None, - state='init', - transaction_open=False, - handle=None, - credentials=self.profile.credentials - ) - self.thread_connections[thread_id_key] = conn + logger.debug('Acquiring new {} connection "{}".' + .format(self.TYPE, name)) - if conn.name == name and conn.state == 'open': - return conn + if not self.available: + raise dbt.exceptions.InternalException( + 'Tried to request a new connection "{}" but ' + 'the maximum number of connections are already ' + 'allocated!'.format(name) + ) - logger.debug('Acquiring new {} connection "{}".' - .format(self.TYPE, name)) + connection = self.available.pop() + # connection is temporarily neither in use nor available, but both + # collections are in a sane state, so we can release the lock. - if conn.state == 'open': - logger.debug( - 'Re-using an available connection from the pool (formerly {}).' - .format(conn.name)) - else: - logger.debug('Opening a new connection, currently in state {}' - .format(conn.state)) - self.open(conn) + # this potentially calls open(), but does so without holding the lock + connection = self.assign(connection, name) - conn.name = name - return conn + with self.lock: + if name in self.in_use: + raise dbt.exceptions.InternalException( + 'Two threads concurrently tried to get the same name: {}' + .format(name) + ) + self.in_use[name] = connection + + return connection @abc.abstractmethod def cancel_open(self): @@ -197,39 +183,81 @@ def open(cls, connection): '`open` is not implemented for this adapter!' ) - def release(self): + def assign(self, conn, name): + """Open a connection if it's not already open, and assign it name + regardless. + + The caller is responsible for putting the assigned connection into the + in_use collection. + + :param Connection conn: A connection, in any state. + :param str name: The name of the connection to set. + """ + if name is None: + name = 'master' + + conn.name = name + + if conn.state == 'open': + logger.debug('Re-using an available connection from the pool.') + else: + logger.debug('Opening a new connection, currently in state {}' + .format(conn.state)) + conn = self.open(conn) + + return conn + + def _release_connection(self, conn): + if conn.state == 'open': + if conn.transaction_open is True: + self._rollback(conn) + conn.name = None + else: + self.close(conn) + + def release(self, name): with self.lock: - conn = self.get_if_exists() - if conn is None: + if name not in self.in_use: return + to_release = self.in_use.pop(name) + # to_release is temporarily neither in use nor available, but both + # collections are in a sane state, so we can release the lock. + try: - if conn.state == 'open': - if conn.transaction_open is True: - self._rollback(conn) - else: - self.close(conn) - except Exception: - # if rollback or close failed, remove our busted connection - self.clear_thread_connection() + self._release_connection(to_release) + except: + # if rollback or close failed, replace our busted connection with + # a new one + to_release = self._empty_connection() raise + finally: + # now that this connection has been rolled back and the name reset, + # or the connection has been closed, put it back on the available + # list + with self.lock: + self.available.append(to_release) def cleanup_all(self): with self.lock: - for connection in self.thread_connections.values(): - if connection.state not in {'closed', 'init'}: + for name, connection in self.in_use.items(): + if connection.state != 'closed': logger.debug("Connection '{}' was left open." - .format(connection.name)) + .format(name)) else: logger.debug("Connection '{}' was properly closed." - .format(connection.name)) - self.close(connection) + .format(name)) + + conns_in_use = list(self.in_use.values()) + for conn in conns_in_use + self.available: + self.close(conn) # garbage collect these connections - self.thread_connections.clear() + self.in_use.clear() + self._set_initial_connections() @abc.abstractmethod - def begin(self): + def begin(self, name): """Begin a transaction. (passable) :param str name: The name of the connection to use. @@ -238,32 +266,34 @@ def begin(self): '`begin` is not implemented for this adapter!' ) + def get_if_exists(self, name): + if name is None: + name = 'master' + + if self.in_use.get(name) is None: + return + + return self.get(name) + @abc.abstractmethod - def commit(self): - """Commit a transaction. (passable)""" + def commit(self, connection): + """Commit a transaction. (passable) + + :param str name: The name of the connection to use. + """ raise dbt.exceptions.NotImplementedException( '`commit` is not implemented for this adapter!' ) - @classmethod - def _rollback_handle(cls, connection): + def _rollback_handle(self, connection): """Perform the actual rollback operation.""" connection.handle.rollback() - @classmethod - def _close_handle(cls, connection): - """Perform the actual close operation.""" - # On windows, sometimes connection handles don't have a close() attr. - if hasattr(connection.handle, 'close'): - logger.debug('On {}: Close'.format(connection.name)) - connection.handle.close() - else: - logger.debug('On {}: No close available on handle' - .format(connection.name)) - - @classmethod - def _rollback(cls, connection): + def _rollback(self, connection): """Roll back the given connection. + + The connection does not have to be in in_use or available, so this + operation does not require the lock. """ if dbt.flags.STRICT_MODE: assert isinstance(connection, Connection) @@ -274,7 +304,7 @@ def _rollback(cls, connection): 'it does not have one open!'.format(connection.name)) logger.debug('On {}: ROLLBACK'.format(connection.name)) - cls._rollback_handle(connection) + self._rollback_handle(connection) connection.transaction_open = False @@ -290,28 +320,40 @@ def close(cls, connection): return connection if connection.transaction_open and connection.handle: - cls._rollback_handle(connection) + connection.handle.rollback() connection.transaction_open = False - cls._close_handle(connection) + # On windows, sometimes connection handles don't have a close() attr. + if hasattr(connection.handle, 'close'): + connection.handle.close() + else: + logger.debug('On {}: No close available on handle' + .format(connection.name)) + connection.state = 'closed' return connection - def commit_if_has_connection(self): + def commit_if_has_connection(self, name): """If the named connection exists, commit the current transaction. :param str name: The name of the connection to use. """ - connection = self.get_if_exists() + connection = self.in_use.get(name) if connection: - self.commit() + self.commit(connection) + + def clear_transaction(self, conn_name='master'): + conn = self.begin(conn_name) + self.commit(conn) + return conn_name @abc.abstractmethod - def execute(self, sql, auto_begin=False, fetch=False): + def execute(self, sql, name=None, auto_begin=False, fetch=False): """Execute the given SQL. :param str sql: The sql to execute. + :param Optional[str] name: The name to use for the connection. :param bool auto_begin: If set, and dbt is not currently inside a transaction, automatically begin one. :param bool fetch: If set, fetch results. diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 1207645fa83..1f579fb9ab9 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -1,5 +1,7 @@ import abc -from contextlib import contextmanager +import copy +import multiprocessing +import time import agate import pytz @@ -7,17 +9,19 @@ import dbt.exceptions import dbt.flags +import dbt.schema import dbt.clients.agate_helper from dbt.compat import abstractclassmethod, classmethod -from dbt.node_types import NodeType +from dbt.contracts.connection import Connection from dbt.loader import GraphLoader from dbt.logger import GLOBAL_LOGGER as logger -from dbt.utils import filter_null_values +from dbt.schema import Column +from dbt.utils import filter_null_values, translate_aliases -from dbt.adapters.base.meta import AdapterMeta, available, available_deprecated +from dbt.adapters.base.meta import AdapterMeta, available, available_raw, \ + available_deprecated from dbt.adapters.base import BaseRelation -from dbt.adapters.base import Column from dbt.adapters.cache import RelationsCache @@ -65,76 +69,16 @@ def test(row): return test -def _utc(dt, source, field_name): +def _utc(dt): """If dt has a timezone, return a new datetime that's in UTC. Otherwise, assume the datetime is already for UTC and add the timezone. """ - if dt is None: - raise dbt.exceptions.raise_database_error( - "Expected a non-null value when querying field '{}' of table " - " {} but received value 'null' instead".format( - field_name, - source)) - - elif not hasattr(dt, 'tzinfo'): - raise dbt.exceptions.raise_database_error( - "Expected a timestamp value when querying field '{}' of table " - "{} but received value of type '{}' instead".format( - field_name, - source, - type(dt).__name__)) - - elif dt.tzinfo: + if dt.tzinfo: return dt.astimezone(pytz.UTC) else: return dt.replace(tzinfo=pytz.UTC) -class SchemaSearchMap(dict): - """A utility class to keep track of what information_schema tables to - search for what schemas - """ - def add(self, relation): - key = relation.information_schema_only() - if key not in self: - self[key] = set() - self[key].add(relation.schema.lower()) - - def search(self): - for information_schema_name, schemas in self.items(): - for schema in schemas: - yield information_schema_name, schema - - def schemas_searched(self): - result = set() - for information_schema_name, schemas in self.items(): - result.update( - (information_schema_name.database, schema) - for schema in schemas - ) - return result - - def flatten(self): - new = self.__class__() - - database = None - # iterate once to look for a database name - seen = {r.database.lower() for r in self if r.database} - if len(seen) > 1: - dbt.exceptions.raise_compiler_error(str(seen)) - elif len(seen) == 1: - database = list(seen)[0] - - for information_schema_name, schema in self.search(): - new.add(information_schema_name.incorporate( - path={'database': database, 'schema': schema}, - quote_policy={'database': False}, - include_policy={'database': False}, - )) - - return new - - @six.add_metaclass(AdapterMeta) class BaseAdapter(object): """The BaseAdapter provides an abstract base class for adapters. @@ -192,40 +136,29 @@ def __init__(self, config): ### # Methods that pass through to the connection manager ### - def acquire_connection(self, name=None): - return self.connections.set_connection_name(name) + def acquire_connection(self, name): + return self.connections.get(name) - def release_connection(self): - return self.connections.release() + def release_connection(self, name): + return self.connections.release(name) def cleanup_connections(self): return self.connections.cleanup_all() - def clear_transaction(self): - self.connections.clear_transaction() + def clear_transaction(self, conn_name='master'): + return self.connections.clear_transaction(conn_name) - def commit_if_has_connection(self): - return self.connections.commit_if_has_connection() - - def nice_connection_name(self): - conn = self.connections.get_thread_connection() - if conn is None or conn.name is None: - return '' - return conn.name - - @contextmanager - def connection_named(self, name): - try: - yield self.acquire_connection(name) - finally: - self.release_connection() + def commit_if_has_connection(self, name): + return self.connections.commit_if_has_connection(name) @available - def execute(self, sql, auto_begin=False, fetch=False): + def execute(self, sql, model_name=None, auto_begin=False, fetch=False): """Execute the given SQL. This is a thin wrapper around ConnectionManager.execute. :param str sql: The sql to execute. + :param Optional[str] model_name: The model name to use for the + connection. :param bool auto_begin: If set, and dbt is not currently inside a transaction, automatically begin one. :param bool fetch: If set, fetch results. @@ -234,6 +167,7 @@ def execute(self, sql, auto_begin=False, fetch=False): """ return self.connections.execute( sql=sql, + name=model_name, auto_begin=auto_begin, fetch=fetch ) @@ -267,15 +201,14 @@ def check_internal_manifest(self): ### # Caching methods ### - def _schema_is_cached(self, database, schema): + def _schema_is_cached(self, database, schema, model_name=None): """Check if the schema is cached, and by default logs if it is not.""" - if dbt.flags.USE_CACHE is False: return False elif (database, schema) not in self.cache: logger.debug( 'On "{}": cache miss for schema "{}.{}", this is inefficient' - .format(self.nice_connection_name(), database, schema) + .format(model_name or '', database, schema) ) return False else: @@ -289,27 +222,6 @@ def _relations_filter_table(cls, table, schemas): """ return table.where(_relations_filter_schemas(schemas)) - def _get_cache_schemas(self, manifest, exec_only=False): - """Get a mapping of each node's "information_schema" relations to a - set of all schemas expected in that information_schema. - - There may be keys that are technically duplicates on the database side, - for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as - databases, and values could overlap as appropriate. All values are - lowercase strings. - """ - info_schema_name_map = SchemaSearchMap() - for node in manifest.nodes.values(): - if exec_only and node.resource_type not in NodeType.executable(): - continue - relation = self.Relation.create_from(self.config, node) - info_schema_name_map.add(relation) - # result is a map whose keys are information_schema Relations without - # identifiers that have appropriate database prefixes, and whose values - # are sets of lowercase schema names that are valid members of those - # schemas - return info_schema_name_map - def _relations_cache_for_schemas(self, manifest): """Populate the relations cache for the given schemas. Returns an iteratble of the schemas populated, as strings. @@ -317,16 +229,17 @@ def _relations_cache_for_schemas(self, manifest): if not dbt.flags.USE_CACHE: return - info_schema_name_map = self._get_cache_schemas(manifest, - exec_only=True) - for db, schema in info_schema_name_map.search(): + schemas = manifest.get_used_schemas() + + relations = [] + # add all relations + for db, schema in schemas: for relation in self.list_relations_without_caching(db, schema): self.cache.add(relation) - # it's possible that there were no relations in some schemas. We want # to insert the schemas we query into the cache's `.schemas` attribute # so we can check it later - self.cache.update_schemas(info_schema_name_map.schemas_searched()) + self.cache.update_schemas(schemas) def set_relations_cache(self, manifest, clear=False): """Run a query that gets a populated cache of the relations in the @@ -340,12 +253,11 @@ def set_relations_cache(self, manifest, clear=False): self.cache.clear() self._relations_cache_for_schemas(manifest) - def cache_new_relation(self, relation): + def cache_new_relation(self, relation, model_name=None): """Cache a new relation in dbt. It will show up in `list relations`.""" if relation is None: - name = self.nice_connection_name() dbt.exceptions.raise_compiler_error( - 'Attempted to cache a null relation for {}'.format(name) + 'Attempted to cache a null relation for {}'.format(model_name) ) if dbt.flags.USE_CACHE: self.cache.add(relation) @@ -375,10 +287,11 @@ def is_cancelable(cls): # Abstract methods about schemas ### @abc.abstractmethod - def list_schemas(self, database): + def list_schemas(self, database, model_name=None): """Get a list of existing schemas. :param str database: The name of the database to list under. + :param Optional[str] model_name: The name of the connection to query as :return: All schemas that currently exist in the database :rtype: List[str] """ @@ -386,7 +299,7 @@ def list_schemas(self, database): '`list_schemas` is not implemented for this adapter!' ) - def check_schema_exists(self, database, schema): + def check_schema_exists(self, database, schema, model_name=None): """Check if a schema exists. The default implementation of this is potentially unnecessarily slow, @@ -395,7 +308,7 @@ def check_schema_exists(self, database, schema): """ search = ( s.lower() for s in - self.list_schemas(database=database) + self.list_schemas(database=database, model_name=model_name) ) return schema.lower() in search @@ -404,12 +317,14 @@ def check_schema_exists(self, database, schema): ### @abc.abstractmethod @available - def drop_relation(self, relation): + def drop_relation(self, relation, model_name=None): """Drop the given relation. *Implementors must call self.cache.drop() to preserve cache state!* :param self.Relation relation: The relation to drop + :param Optional[str] model_name: The name of the model to use for the + connection. """ raise dbt.exceptions.NotImplementedException( '`drop_relation` is not implemented for this adapter!' @@ -417,24 +332,27 @@ def drop_relation(self, relation): @abc.abstractmethod @available - def truncate_relation(self, relation): + def truncate_relation(self, relation, model_name=None): """Truncate the given relation. :param self.Relation relation: The relation to truncate - """ + :param Optional[str] model_name: The name of the model to use for the + connection.""" raise dbt.exceptions.NotImplementedException( '`truncate_relation` is not implemented for this adapter!' ) @abc.abstractmethod @available - def rename_relation(self, from_relation, to_relation): + def rename_relation(self, from_relation, to_relation, model_name=None): """Rename the relation from from_relation to to_relation. Implementors must call self.cache.rename() to preserve cache state. :param self.Relation from_relation: The original relation name :param self.Relation to_relation: The new relation name + :param Optional[str] model_name: The name of the model to use for the + connection. """ raise dbt.exceptions.NotImplementedException( '`rename_relation` is not implemented for this adapter!' @@ -442,10 +360,12 @@ def rename_relation(self, from_relation, to_relation): @abc.abstractmethod @available - def get_columns_in_relation(self, relation): + def get_columns_in_relation(self, relation, model_name=None): """Get a list of the columns in the given Relation. :param self.Relation relation: The relation to query for. + :param Optional[str] model_name: The name of the model to use for the + connection. :return: Information about all columns in the given relation. :rtype: List[self.Column] """ @@ -454,7 +374,7 @@ def get_columns_in_relation(self, relation): ) @available_deprecated('get_columns_in_relation') - def get_columns_in_table(self, schema, identifier): + def get_columns_in_table(self, schema, identifier, model_name=None): """DEPRECATED: Get a list of the columns in the given table.""" relation = self.Relation.create( database=self.config.credentials.database, @@ -462,30 +382,34 @@ def get_columns_in_table(self, schema, identifier): identifier=identifier, quote_policy=self.config.quoting ) - return self.get_columns_in_relation(relation) + return self.get_columns_in_relation(relation, model_name=model_name) @abc.abstractmethod - def expand_column_types(self, goal, current): + def expand_column_types(self, goal, current, model_name=None): """Expand the current table's types to match the goal table. (passable) :param self.Relation goal: A relation that currently exists in the database with columns of the desired types. :param self.Relation current: A relation that currently exists in the database with columns of unspecified types. + :param Optional[str] model_name: The name of the model to use for the + connection. """ raise dbt.exceptions.NotImplementedException( '`expand_target_column_types` is not implemented for this adapter!' ) @abc.abstractmethod - def list_relations_without_caching(self, information_schema, schema): + def list_relations_without_caching(self, database, schema, + model_name=None): """List relations in the given schema, bypassing the cache. This is used as the underlying behavior to fill the cache. - :param Relation information_schema: The information schema to list - relations from. + :param str database: The name of the database to list relations from. :param str schema: The name of the schema to list relations from. + :param Optional[str] model_name: The name of the model to use for the + connection. :return: The relations in schema :retype: List[self.Relation] """ @@ -498,7 +422,7 @@ def list_relations_without_caching(self, information_schema, schema): # Provided methods about relations ### @available - def get_missing_columns(self, from_relation, to_relation): + def get_missing_columns(self, from_relation, to_relation, model_name=None): """Returns dict of {column:type} for columns in from_table that are missing from to_relation """ @@ -518,12 +442,12 @@ def get_missing_columns(self, from_relation, to_relation): from_columns = { col.name: col for col in - self.get_columns_in_relation(from_relation) + self.get_columns_in_relation(from_relation, model_name=model_name) } to_columns = { col.name: col for col in - self.get_columns_in_relation(to_relation) + self.get_columns_in_relation(to_relation, model_name=model_name) } missing_columns = set(from_columns.keys()) - set(to_columns.keys()) @@ -534,49 +458,8 @@ def get_missing_columns(self, from_relation, to_relation): ] @available - def valid_archive_target(self, relation): - """Ensure that the target relation is valid, by making sure it has the - expected columns. - - :param Relation relation: The relation to check - :raises dbt.exceptions.CompilationException: If the columns are - incorrect. - """ - if not isinstance(relation, self.Relation): - dbt.exceptions.invalid_type_error( - method_name='is_existing_old_style_archive', - arg_name='relation', - got_value=relation, - expected_type=self.Relation) - - columns = self.get_columns_in_relation(relation) - names = set(c.name.lower() for c in columns) - expanded_keys = ('scd_id', 'valid_from', 'valid_to') - extra = [] - missing = [] - for legacy in expanded_keys: - desired = 'dbt_' + legacy - if desired not in names: - missing.append(desired) - if legacy in names: - extra.append(legacy) - - if missing: - if extra: - msg = ( - 'Archive target has ("{}") but not ("{}") - is it an ' - 'unmigrated previous version archive?' - .format('", "'.join(extra), '", "'.join(missing)) - ) - else: - msg = ( - 'Archive target is not an archive table (missing "{}")' - .format('", "'.join(missing)) - ) - dbt.exceptions.raise_compiler_error(msg) - - @available - def expand_target_column_types(self, temp_table, to_relation): + def expand_target_column_types(self, temp_table, to_relation, + model_name=None): if not isinstance(to_relation, self.Relation): dbt.exceptions.invalid_type_error( method_name='expand_target_column_types', @@ -591,25 +474,20 @@ def expand_target_column_types(self, temp_table, to_relation): type='table', quote_policy=self.config.quoting ) - self.expand_column_types(goal, to_relation) + self.expand_column_types(goal, to_relation, model_name) - def list_relations(self, database, schema): - if self._schema_is_cached(database, schema): + def list_relations(self, database, schema, model_name=None): + if self._schema_is_cached(database, schema, model_name): return self.cache.get_relations(database, schema) - information_schema = self.Relation.create( - database=database, - schema=schema, - model_name='').information_schema() - # we can't build the relations cache because we don't have a # manifest so we can't run any operations. relations = self.list_relations_without_caching( - information_schema, schema + database, schema, model_name=model_name ) - logger.debug('with database={}, schema={}, relations={}' - .format(database, schema, relations)) + logger.debug('with schema={}, model_name={}, relations={}' + .format(schema, model_name, relations)) return relations def _make_match_kwargs(self, database, schema, identifier): @@ -642,8 +520,8 @@ def _make_match(self, relations_list, database, schema, identifier): return matches @available - def get_relation(self, database, schema, identifier): - relations_list = self.list_relations(database, schema) + def get_relation(self, database, schema, identifier, model_name=None): + relations_list = self.list_relations(database, schema, model_name) matches = self._make_match(relations_list, database, schema, identifier) @@ -664,10 +542,11 @@ def get_relation(self, database, schema, identifier): return None @available_deprecated('get_relation') - def already_exists(self, schema, name): + def already_exists(self, schema, name, model_name=None): """DEPRECATED: Return if a model already exists in the database""" database = self.config.credentials.database - relation = self.get_relation(database, schema, name) + relation = self.get_relation(database, schema, name, + model_name=model_name) return relation is not None ### @@ -676,26 +555,30 @@ def already_exists(self, schema, name): ### @abc.abstractmethod @available - def create_schema(self, database, schema): + def create_schema(self, database, schema, model_name=None): """Create the given schema if it does not exist. :param str schema: The schema name to create. + :param Optional[str] model_name: The name of the model to use for the + connection. """ raise dbt.exceptions.NotImplementedException( '`create_schema` is not implemented for this adapter!' ) @abc.abstractmethod - def drop_schema(self, database, schema): + def drop_schema(self, database, schema, model_name=None): """Drop the given schema (and everything in it) if it exists. :param str schema: The schema name to drop. + :param Optional[str] model_name: The name of the model to use for the + connection. """ raise dbt.exceptions.NotImplementedException( '`drop_schema` is not implemented for this adapter!' ) - @available + @available_raw @abstractclassmethod def quote(cls, identifier): """Quote the given identifier, as appropriate for the database. @@ -709,7 +592,7 @@ def quote(cls, identifier): ) @available - def quote_as_configured(self, identifier, quote_key): + def quote_as_configured(self, identifier, quote_key, model_name=None): """Quote or do not quote the given identifer as configured in the project config for the quote key. @@ -804,7 +687,7 @@ def convert_time_type(cls, agate_table, col_idx): raise dbt.exceptions.NotImplementedException( '`convert_time_type` is not implemented for this adapter!') - @available + @available_raw @classmethod def convert_type(cls, agate_table, col_idx): return cls.convert_agate_type(agate_table, col_idx) @@ -828,7 +711,8 @@ def convert_agate_type(cls, agate_table, col_idx): # Operations involving the manifest ### def execute_macro(self, macro_name, manifest=None, project=None, - context_override=None, kwargs=None, release=False): + context_override=None, kwargs=None, release=False, + connection_name=None): """Look macro_name up in the manifest and execute its results. :param str macro_name: The name of the macro to execute. @@ -842,6 +726,8 @@ def execute_macro(self, macro_name, manifest=None, project=None, :param Optional[dict] kwargs: An optional dict of keyword args used to pass to the macro. :param bool release: If True, release the connection after executing. + :param Optional[str] connection_name: The connection name to use, or + use the macro name. Return an an AttrDict with three attributes: 'table', 'data', and 'status'. 'table' is an agate.Table. @@ -850,30 +736,27 @@ def execute_macro(self, macro_name, manifest=None, project=None, kwargs = {} if context_override is None: context_override = {} + if connection_name is None: + connection_name = macro_name if manifest is None: manifest = self._internal_manifest macro = manifest.find_macro_by_name(macro_name, project) if macro is None: - if project is None: - package_name = 'any package' - else: - package_name = 'the "{}" package'.format(project) - - # The import of dbt.context.runtime below shadows 'dbt' - import dbt.exceptions raise dbt.exceptions.RuntimeException( - 'dbt could not find a macro with the name "{}" in {}' - .format(macro_name, package_name) + 'Could not find macro with name {} in project {}' + .format(macro_name, project) ) + # This causes a reference cycle, as dbt.context.runtime.generate() # ends up calling get_adapter, so the import has to be here. import dbt.context.runtime macro_context = dbt.context.runtime.generate_macro( macro, self.config, - manifest + manifest, + connection_name ) macro_context.update(context_override) @@ -883,7 +766,7 @@ def execute_macro(self, macro_name, manifest=None, project=None, result = macro_function(**kwargs) finally: if release: - self.release_connection() + self.release_connection(connection_name) return result @classmethod @@ -897,11 +780,10 @@ def get_catalog(self, manifest): """Get the catalog for this manifest by running the get catalog macro. Returns an agate.Table of catalog information. """ - information_schemas = list(self._get_cache_schemas(manifest).keys()) # make it a list so macros can index into it. - kwargs = {'information_schemas': information_schemas} + context = {'databases': list(manifest.get_used_databases())} table = self.execute_macro(GET_CATALOG_MACRO_NAME, - kwargs=kwargs, + context_override=context, release=True) results = self._catalog_filter_table(table, manifest) @@ -911,7 +793,8 @@ def cancel_open_connections(self): """Cancel all open connections.""" return self.connections.cancel_open() - def calculate_freshness(self, source, loaded_at_field, manifest=None): + def calculate_freshness(self, source, loaded_at_field, manifest=None, + connection_name=None): """Calculate the freshness of sources in dbt, and return it""" # in the future `source` will be a Relation instead of a string kwargs = { @@ -924,7 +807,8 @@ def calculate_freshness(self, source, loaded_at_field, manifest=None): FRESHNESS_MACRO_NAME, kwargs=kwargs, release=True, - manifest=manifest + manifest=manifest, + connection_name=connection_name ) # now we have a 1-row table of the maximum `loaded_at_field` value and # the current time according to the db. @@ -932,12 +816,11 @@ def calculate_freshness(self, source, loaded_at_field, manifest=None): dbt.exceptions.raise_compiler_error( 'Got an invalid result from "{}" macro: {}'.format( FRESHNESS_MACRO_NAME, [tuple(r) for r in table] - ) + ), + node=node ) - max_loaded_at = _utc(table[0][0], source, loaded_at_field) - snapshotted_at = _utc(table[0][1], source, loaded_at_field) - + max_loaded_at, snapshotted_at = map(_utc, table[0]) age = (snapshotted_at - max_loaded_at).total_seconds() return { 'max_loaded_at': max_loaded_at, diff --git a/core/dbt/adapters/base/meta.py b/core/dbt/adapters/base/meta.py index 14201c93563..b7968fe06ba 100644 --- a/core/dbt/adapters/base/meta.py +++ b/core/dbt/adapters/base/meta.py @@ -9,6 +9,17 @@ def available(func): arguments. """ func._is_available_ = True + func._model_name_ = True + return func + + +def available_raw(func): + """A decorator to indicate that a method on the adapter will be exposed to + the database wrapper, and the model name will be injected into the + arguments. + """ + func._is_available_ = True + func._model_name_ = False return func @@ -46,16 +57,24 @@ def __new__(mcls, name, bases, namespace, **kwargs): # dict mapping the method name to whether the model name should be # injected into the arguments. All methods in here are exposed to the # context. - available = set() + available_model = set() + available_raw = set() # collect base class data first for base in bases: - available.update(getattr(base, '_available_', set())) + available_model.update(getattr(base, '_available_model_', set())) + available_raw.update(getattr(base, '_available_raw_', set())) # override with local data if it exists for name, value in namespace.items(): if getattr(value, '_is_available_', False): - available.add(name) + if getattr(value, '_model_name_', False): + available_raw.discard(name) + available_model.add(name) + else: + available_model.discard(name) + available_raw.add(name) - cls._available_ = frozenset(available) + cls._available_model_ = frozenset(available_model) + cls._available_raw_ = frozenset(available_raw) return cls diff --git a/core/dbt/adapters/base/plugin.py b/core/dbt/adapters/base/plugin.py index a1961a35b98..523b8a43fa9 100644 --- a/core/dbt/adapters/base/plugin.py +++ b/core/dbt/adapters/base/plugin.py @@ -1,3 +1,5 @@ +import os + from dbt.config.project import Project diff --git a/core/dbt/adapters/base/relation.py b/core/dbt/adapters/base/relation.py index 2192fed466c..0a40346b5be 100644 --- a/core/dbt/adapters/base/relation.py +++ b/core/dbt/adapters/base/relation.py @@ -1,6 +1,5 @@ from dbt.api import APIObject from dbt.utils import filter_null_values -from dbt.node_types import NodeType import dbt.exceptions @@ -31,7 +30,7 @@ class BaseRelation(APIObject): 'database': True, 'schema': True, 'identifier': True - }, + } } PATH_SCHEMA = { @@ -39,7 +38,7 @@ class BaseRelation(APIObject): 'properties': { 'database': {'type': ['string', 'null']}, 'schema': {'type': ['string', 'null']}, - 'identifier': {'type': ['string', 'null']}, + 'identifier': {'type': 'string'}, }, 'required': ['database', 'schema', 'identifier'], } @@ -136,36 +135,6 @@ def include(self, database=None, schema=None, identifier=None): return self.incorporate(include_policy=policy) - def information_schema(self, identifier=None): - include_db = self.database is not None - include_policy = filter_null_values({ - 'database': include_db, - 'schema': True, - 'identifier': identifier is not None - }) - quote_policy = filter_null_values({ - 'database': self.quote_policy['database'], - 'schema': False, - 'identifier': False, - }) - - path_update = { - 'schema': 'information_schema', - 'identifier': identifier - } - - return self.incorporate( - quote_policy=quote_policy, - include_policy=include_policy, - path=path_update, - table_name=identifier) - - def information_schema_only(self): - return self.information_schema() - - def information_schema_table(self, identifier): - return self.information_schema(identifier) - def render(self, use_table_name=True): parts = [] @@ -205,16 +174,15 @@ def quoted(self, identifier): @classmethod def create_from_source(cls, source, **kwargs): - quote_policy = dbt.utils.deep_merge( - cls.DEFAULTS['quote_policy'], - source.quoting, - kwargs.get('quote_policy', {}) - ) return cls.create( database=source.database, schema=source.schema, identifier=source.identifier, - quote_policy=quote_policy, + quote_policy={ + 'database': True, + 'schema': True, + 'identifier': True, + }, **kwargs ) @@ -234,13 +202,6 @@ def create_from_node(cls, config, node, table_name=None, quote_policy=None, quote_policy=quote_policy, **kwargs) - @classmethod - def create_from(cls, config, node, **kwargs): - if node.resource_type == NodeType.Source: - return cls.create_from_source(node, **kwargs) - else: - return cls.create_from_node(config, node, **kwargs) - @classmethod def create(cls, database=None, schema=None, identifier=None, table_name=None, @@ -303,91 +264,3 @@ def is_cte(self): @property def is_view(self): return self.type == self.View - - -class Column(object): - TYPE_LABELS = { - 'STRING': 'TEXT', - 'TIMESTAMP': 'TIMESTAMP', - 'FLOAT': 'FLOAT', - 'INTEGER': 'INT' - } - - def __init__(self, column, dtype, char_size=None, numeric_precision=None, - numeric_scale=None): - self.column = column - self.dtype = dtype - self.char_size = char_size - self.numeric_precision = numeric_precision - self.numeric_scale = numeric_scale - - @classmethod - def translate_type(cls, dtype): - return cls.TYPE_LABELS.get(dtype.upper(), dtype) - - @classmethod - def create(cls, name, label_or_dtype): - column_type = cls.translate_type(label_or_dtype) - return cls(name, column_type) - - @property - def name(self): - return self.column - - @property - def quoted(self): - return '"{}"'.format(self.column) - - @property - def data_type(self): - if self.is_string(): - return Column.string_type(self.string_size()) - elif self.is_numeric(): - return Column.numeric_type(self.dtype, self.numeric_precision, - self.numeric_scale) - else: - return self.dtype - - def is_string(self): - return self.dtype.lower() in ['text', 'character varying', 'character', - 'varchar'] - - def is_numeric(self): - return self.dtype.lower() in ['numeric', 'number'] - - def string_size(self): - if not self.is_string(): - raise RuntimeError("Called string_size() on non-string field!") - - if self.dtype == 'text' or self.char_size is None: - # char_size should never be None. Handle it reasonably just in case - return 255 - else: - return int(self.char_size) - - def can_expand_to(self, other_column): - """returns True if this column can be expanded to the size of the - other column""" - if not self.is_string() or not other_column.is_string(): - return False - - return other_column.string_size() > self.string_size() - - def literal(self, value): - return "{}::{}".format(value, self.data_type) - - @classmethod - def string_type(cls, size): - return "character varying({})".format(size) - - @classmethod - def numeric_type(cls, dtype, precision, scale): - # This could be decimal(...), numeric(...), number(...) - # Just use whatever was fed in here -- don't try to get too clever - if precision is None or scale is None: - return dtype - else: - return "{}({},{})".format(dtype, precision, scale) - - def __repr__(self): - return "".format(self.name, self.data_type) diff --git a/core/dbt/adapters/factory.py b/core/dbt/adapters/factory.py index 39ba9d070c8..2cbe2dc7ac6 100644 --- a/core/dbt/adapters/factory.py +++ b/core/dbt/adapters/factory.py @@ -1,3 +1,5 @@ +from dbt.logger import GLOBAL_LOGGER as logger + import dbt.exceptions from importlib import import_module from dbt.include.global_project import PACKAGES @@ -28,7 +30,7 @@ def get_relation_class_by_name(adapter_name): def load_plugin(adapter_name): try: - mod = import_module('.' + adapter_name, 'dbt.adapters') + mod = import_module('.'+adapter_name, 'dbt.adapters') except ImportError: raise dbt.exceptions.RuntimeException( "Could not find adapter type {}!".format(adapter_name) diff --git a/core/dbt/adapters/sql/__init__.py b/core/dbt/adapters/sql/__init__.py index 3535806364d..e73e49ff99d 100644 --- a/core/dbt/adapters/sql/__init__.py +++ b/core/dbt/adapters/sql/__init__.py @@ -1,3 +1,2 @@ -# these are all just exports, #noqa them so flake8 will be happy -from dbt.adapters.sql.connections import SQLConnectionManager # noqa -from dbt.adapters.sql.impl import SQLAdapter # noqa +from dbt.adapters.sql.connections import SQLConnectionManager +from dbt.adapters.sql.impl import SQLAdapter diff --git a/core/dbt/adapters/sql/connections.py b/core/dbt/adapters/sql/connections.py index a6db10d1215..a0c7bedf2ed 100644 --- a/core/dbt/adapters/sql/connections.py +++ b/core/dbt/adapters/sql/connections.py @@ -30,30 +30,31 @@ def cancel(self, connection): def cancel_open(self): names = [] - this_connection = self.get_if_exists() with self.lock: - for connection in self.thread_connections.values(): - if connection is this_connection: + for name, connection in self.in_use.items(): + if name == 'master': continue self.cancel(connection) - names.append(connection.name) + names.append(name) return names - def add_query(self, sql, auto_begin=True, bindings=None, + def add_query(self, sql, name=None, auto_begin=True, bindings=None, abridge_sql_log=False): - connection = self.get_thread_connection() + connection = self.get(name) + connection_name = connection.name + if auto_begin and connection.transaction_open is False: - self.begin() + self.begin(connection_name) logger.debug('Using {} connection "{}".' - .format(self.TYPE, connection.name)) + .format(self.TYPE, connection_name)) - with self.exception_handler(sql): + with self.exception_handler(sql, connection_name): if abridge_sql_log: - logger.debug('On %s: %s....', connection.name, sql[0:512]) + logger.debug('On %s: %s....', connection_name, sql[0:512]) else: - logger.debug('On %s: %s', connection.name, sql) + logger.debug('On %s: %s', connection_name, sql) pre = time.time() cursor = connection.handle.cursor() @@ -89,8 +90,9 @@ def get_result_from_cursor(cls, cursor): return dbt.clients.agate_helper.table_from_data(data, column_names) - def execute(self, sql, auto_begin=False, fetch=False): - _, cursor = self.add_query(sql, auto_begin) + def execute(self, sql, name=None, auto_begin=False, fetch=False): + self.get(name) + _, cursor = self.add_query(sql, name, auto_begin) status = self.get_status(cursor) if fetch: table = self.get_result_from_cursor(cursor) @@ -98,14 +100,14 @@ def execute(self, sql, auto_begin=False, fetch=False): table = dbt.clients.agate_helper.empty_table() return status, table - def add_begin_query(self): - return self.add_query('BEGIN', auto_begin=False) + def add_begin_query(self, name): + return self.add_query('BEGIN', name, auto_begin=False) - def add_commit_query(self): - return self.add_query('COMMIT', auto_begin=False) + def add_commit_query(self, name): + return self.add_query('COMMIT', name, auto_begin=False) - def begin(self): - connection = self.get_thread_connection() + def begin(self, name): + connection = self.get(name) if dbt.flags.STRICT_MODE: assert isinstance(connection, Connection) @@ -115,24 +117,29 @@ def begin(self): 'Tried to begin a new transaction on connection "{}", but ' 'it already had one open!'.format(connection.get('name'))) - self.add_begin_query() + self.add_begin_query(name) connection.transaction_open = True + self.in_use[name] = connection + return connection - def commit(self): - connection = self.get_thread_connection() + def commit(self, connection): + if dbt.flags.STRICT_MODE: assert isinstance(connection, Connection) + connection = self.get(connection.name) + if connection.transaction_open is False: raise dbt.exceptions.InternalException( 'Tried to commit transaction on connection "{}", but ' 'it does not have one open!'.format(connection.name)) logger.debug('On {}: COMMIT'.format(connection.name)) - self.add_commit_query() + self.add_commit_query(connection.name) connection.transaction_open = False + self.in_use[connection.name] = connection return connection diff --git a/core/dbt/adapters/sql/impl.py b/core/dbt/adapters/sql/impl.py index 245b812def1..c7fa6d79b7f 100644 --- a/core/dbt/adapters/sql/impl.py +++ b/core/dbt/adapters/sql/impl.py @@ -1,10 +1,15 @@ +import abc +import time + import agate +import six import dbt.clients.agate_helper import dbt.exceptions import dbt.flags from dbt.adapters.base import BaseAdapter, available from dbt.logger import GLOBAL_LOGGER as logger +from dbt.compat import abstractclassmethod LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' @@ -36,12 +41,14 @@ class SQLAdapter(BaseAdapter): - get_columns_in_relation """ @available - def add_query(self, sql, auto_begin=True, bindings=None, + def add_query(self, sql, model_name=None, auto_begin=True, bindings=None, abridge_sql_log=False): """Add a query to the current transaction. A thin wrapper around ConnectionManager.add_query. :param str sql: The SQL query to add + :param Optional[str] model_name: The name of the connection the + transaction is on :param bool auto_begin: If set and there is no transaction in progress, begin a new one. :param Optional[List[object]]: An optional list of bindings for the @@ -49,8 +56,8 @@ def add_query(self, sql, auto_begin=True, bindings=None, :param bool abridge_sql_log: If set, limit the raw sql logged to 512 characters """ - return self.connections.add_query(sql, auto_begin, bindings, - abridge_sql_log) + return self.connections.add_query(sql, model_name, auto_begin, + bindings, abridge_sql_log) @classmethod def convert_text_type(cls, agate_table, col_idx): @@ -81,15 +88,15 @@ def convert_time_type(cls, agate_table, col_idx): def is_cancelable(cls): return True - def expand_column_types(self, goal, current): + def expand_column_types(self, goal, current, model_name=None): reference_columns = { c.name: c for c in - self.get_columns_in_relation(goal) + self.get_columns_in_relation(goal, model_name=model_name) } target_columns = { c.name: c for c - in self.get_columns_in_relation(current) + in self.get_columns_in_relation(current, model_name=model_name) } for column_name, reference_column in reference_columns.items(): @@ -102,9 +109,14 @@ def expand_column_types(self, goal, current): logger.debug("Changing col type from %s to %s in table %s", target_column.data_type, new_type, current) - self.alter_column_type(current, column_name, new_type) + self.alter_column_type(current, column_name, new_type, + model_name=model_name) + + if model_name is None: + self.release_connection('master') - def alter_column_type(self, relation, column_name, new_column_type): + def alter_column_type(self, relation, column_name, new_column_type, + model_name=None): """ 1. Create a new column (w/ temp name and correct type) 2. Copy data over to it @@ -118,10 +130,11 @@ def alter_column_type(self, relation, column_name, new_column_type): } self.execute_macro( ALTER_COLUMN_TYPE_MACRO_NAME, - kwargs=kwargs + kwargs=kwargs, + connection_name=model_name ) - def drop_relation(self, relation): + def drop_relation(self, relation, model_name=None): if dbt.flags.USE_CACHE: self.cache.drop(relation) if relation.type is None: @@ -131,54 +144,65 @@ def drop_relation(self, relation): self.execute_macro( DROP_RELATION_MACRO_NAME, - kwargs={'relation': relation} + kwargs={'relation': relation}, + connection_name=model_name ) - def truncate_relation(self, relation): + def truncate_relation(self, relation, model_name=None): self.execute_macro( TRUNCATE_RELATION_MACRO_NAME, - kwargs={'relation': relation} + kwargs={'relation': relation}, + connection_name=model_name ) - def rename_relation(self, from_relation, to_relation): + def rename_relation(self, from_relation, to_relation, model_name=None): if dbt.flags.USE_CACHE: self.cache.rename(from_relation, to_relation) kwargs = {'from_relation': from_relation, 'to_relation': to_relation} self.execute_macro( RENAME_RELATION_MACRO_NAME, - kwargs=kwargs + kwargs=kwargs, + connection_name=model_name ) - def get_columns_in_relation(self, relation): + def get_columns_in_relation(self, relation, model_name=None): return self.execute_macro( GET_COLUMNS_IN_RELATION_MACRO_NAME, - kwargs={'relation': relation} + kwargs={'relation': relation}, + connection_name=model_name ) - def create_schema(self, database, schema): + def create_schema(self, database, schema, model_name=None): logger.debug('Creating schema "%s"."%s".', database, schema) + if model_name is None: + model_name = 'master' kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } - self.execute_macro(CREATE_SCHEMA_MACRO_NAME, kwargs=kwargs) - self.commit_if_has_connection() + self.execute_macro(CREATE_SCHEMA_MACRO_NAME, + kwargs=kwargs, + connection_name=model_name) + self.commit_if_has_connection(model_name) - def drop_schema(self, database, schema): + def drop_schema(self, database, schema, model_name=None): logger.debug('Dropping schema "%s"."%s".', database, schema) kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } self.execute_macro(DROP_SCHEMA_MACRO_NAME, - kwargs=kwargs) + kwargs=kwargs, + connection_name=model_name) - def list_relations_without_caching(self, information_schema, schema): - kwargs = {'information_schema': information_schema, 'schema': schema} + def list_relations_without_caching(self, database, schema, + model_name=None): results = self.execute_macro( LIST_RELATIONS_MACRO_NAME, - kwargs=kwargs + kwargs={'database': database, 'schema': schema}, + connection_name=model_name, + release=True ) relations = [] @@ -199,22 +223,22 @@ def list_relations_without_caching(self, information_schema, schema): def quote(cls, identifier): return '"{}"'.format(identifier) - def list_schemas(self, database): + def list_schemas(self, database, model_name=None): results = self.execute_macro( LIST_SCHEMAS_MACRO_NAME, - kwargs={'database': database} + kwargs={'database': database}, + connection_name=model_name, + # release when the model_name is none, as that implies we were + # called by node_runners.py. + release=(model_name is None) ) return [row[0] for row in results] - def check_schema_exists(self, database, schema): - information_schema = self.Relation.create( - database=database, schema=schema - ).information_schema() - - kwargs = {'information_schema': information_schema, 'schema': schema} + def check_schema_exists(self, database, schema, model_name=None): results = self.execute_macro( CHECK_SCHEMA_EXISTS_MACRO_NAME, - kwargs=kwargs + kwargs={'database': database, 'schema': schema}, + connection_name=model_name ) return results[0][0] > 0 diff --git a/core/dbt/api/object.py b/core/dbt/api/object.py index 771d13e9919..b12e37a3652 100644 --- a/core/dbt/api/object.py +++ b/core/dbt/api/object.py @@ -1,6 +1,6 @@ import copy from collections import Mapping -from jsonschema import Draft7Validator +from jsonschema import Draft4Validator from dbt.exceptions import JSONValidationException from dbt.utils import deep_merge @@ -79,7 +79,7 @@ def validate(self): of this instance. If any attributes are missing or invalid, raise a ValidationException. """ - validator = Draft7Validator(self.SCHEMA) + validator = Draft4Validator(self.SCHEMA) errors = set() # make errors a set to avoid duplicates diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py deleted file mode 100644 index 6207ab4956c..00000000000 --- a/core/dbt/clients/_jinja_blocks.py +++ /dev/null @@ -1,481 +0,0 @@ -import re - -import dbt.exceptions - - -def regex(pat): - return re.compile(pat, re.DOTALL | re.MULTILINE) - - -class BlockData(object): - """raw plaintext data from the top level of the file.""" - def __init__(self, contents): - self.block_type_name = '__dbt__data' - self.contents = contents - self.full_block = contents - - -class BlockTag(object): - def __init__(self, block_type_name, block_name, contents=None, - full_block=None, **kw): - self.block_type_name = block_type_name - self.block_name = block_name - self.contents = contents - self.full_block = full_block - - def __str__(self): - return 'BlockTag({!r}, {!r})'.format(self.block_type_name, - self.block_name) - - def __repr__(self): - return str(self) - - @property - def end_block_type_name(self): - return 'end{}'.format(self.block_type_name) - - def end_pat(self): - # we don't want to use string formatting here because jinja uses most - # of the string formatting operators in its syntax... - pattern = ''.join(( - r'(?P((?:\s*\{\%\-|\{\%)\s*', - self.end_block_type_name, - r'\s*(?:\-\%\}\s*|\%\})))', - )) - return regex(pattern) - - -_NAME_PATTERN = r'[A-Za-z_][A-Za-z_0-9]*' - -COMMENT_START_PATTERN = regex(r'(?:(?P(\s*\{\#)))') -COMMENT_END_PATTERN = regex(r'(.*?)(\s*\#\})') -RAW_START_PATTERN = regex( - r'(?:\s*\{\%\-|\{\%)\s*(?P(raw))\s*(?:\-\%\}\s*|\%\})' -) - -BLOCK_START_PATTERN = regex(''.join(( - r'(?:\s*\{\%\-|\{\%)\s*', - r'(?P({}))'.format(_NAME_PATTERN), - # some blocks have a 'block name'. - r'(?:\s+(?P({})))?'.format(_NAME_PATTERN), -))) - -TAG_CLOSE_PATTERN = regex(r'(?:(?P(\-\%\}\s*|\%\})))') -# if you do {% materialization foo, adapter="myadapter' %} and end up with -# mismatched quotes this will still match, but jinja will fail somewhere -# since the adapter= argument has to be an adapter name, and none have quotes -# or anything else in them. So this should be fine. -MATERIALIZATION_ARGS_PATTERN = regex( - r'\s*,\s*' - r'''(?P(adapter=(?:['"]{}['"])|default))''' - .format(_NAME_PATTERN) -) -# macros an stuff like macros get open parents, followed by a very complicated -# argument spec! In fact, it's easiest to parse it in tiny little chunks -# because we have to handle awful stuff like string parsing ;_; -MACRO_ARGS_START_PATTERN = regex(r'\s*(?P\()\s*') -MACRO_ARGS_END_PATTERN = regex(r'\s*(?P(\)))\s*') - -# macros can be like {% macro foo(bar) %} or {% macro foo(bar, baz) %} or -# {% macro foo(bar, baz="quux") %} or ... -# I think jinja disallows default values after required (like Python), but we -# can ignore that and let jinja deal -MACRO_ARG_PATTERN = regex(''.join(( - r'\s*(?P({}))\s*', - r'((?P=)|(?P,)?)\s*'.format(_NAME_PATTERN), -))) - -# stolen from jinja's lexer. Note that we've consumed all prefix whitespace by -# the time we want to use this. -STRING_PATTERN = regex( - r"(?P('([^'\\]*(?:\\.[^'\\]*)*)'|" - r'"([^"\\]*(?:\\.[^"\\]*)*)"))' -) - -# any number of non-quote characters, followed by: -# - quote: a quote mark indicating start of a string (you'll want to backtrack -# the regex end on quotes and then match with the string pattern) -# - a comma (so there will be another full argument) -# - a closing parenthesis (you can now expect a closing tag) -NON_STRING_MACRO_ARGS_PATTERN = regex( - # anything, followed by a quote, open/close paren, or comma - r'''(.*?)''' - r'''((?P(['"]))|(?P(\())|(?P(\)))|(?P(\,)))''' -) - - -NON_STRING_DO_BLOCK_MEMBER_PATTERN = regex( - # anything, followed by a quote, paren, or a tag end - r'''(.*?)''' - r'''((?P(['"]))|(?P(\())|(?P(\))))''' -) - - -class BlockIterator(object): - def __init__(self, data): - self.data = data - self.blocks = [] - self._block_contents = None - self._parenthesis_stack = [] - self.pos = 0 - - def advance(self, new_position): - blk = self.data[self.pos:new_position] - - if self._block_contents is not None: - self._block_contents += blk - - self.pos = new_position - - def rewind(self, amount=1): - if self._block_contents is not None: - self._block_contents = self._block_contents[:-amount] - - self.pos -= amount - - def _search(self, pattern): - return pattern.search(self.data, self.pos) - - def _match(self, pattern): - return pattern.match(self.data, self.pos) - - def expect_comment_end(self): - """Expect a comment end and return the match object. - """ - match = self._expect_match('#}', COMMENT_END_PATTERN) - self.advance(match.end()) - - def expect_raw_end(self): - end_pat = BlockTag('raw', None).end_pat() - match = self._search(end_pat) - if match is None: - dbt.exceptions.raise_compiler_error( - 'unexpected EOF, expected {% endraw %}' - ) - self.advance(match.end()) - - def _first_match(self, *patterns, **kwargs): - matches = [] - for pattern in patterns: - # default to 'search', but sometimes we want to 'match'. - if kwargs.get('method', 'search') == 'search': - match = self._search(pattern) - else: - match = self._match(pattern) - if match: - matches.append(match) - if not matches: - return None - # if there are multiple matches, pick the least greedy match - # TODO: do I need to account for m.start(), or is this ok? - return min(matches, key=lambda m: m.end()) - - def _expect_match(self, expected_name, *patterns, **kwargs): - match = self._first_match(*patterns, **kwargs) - if match is None: - msg = 'unexpected EOF, expected {}, got "{}"'.format( - expected_name, self.data[self.pos:] - ) - dbt.exceptions.raise_compiler_error(msg) - return match - - def handle_block(self, match, block_start=None): - """Handle a block. The current state of the parser should be after the - open block is completed: - {% blk foo %}my data {% endblk %} - ^ right here - """ - # we have to handle comments inside blocks because you could do this: - # {% blk foo %}asdf {# {% endblk %} #} {%endblk%} - # they still end up in the data/raw_data of the block itself, but we - # have to know to ignore stuff until the end comment marker! - found = BlockTag(**match.groupdict()) - # the full block started at the given match start, which may include - # prefixed whitespace! we'll strip it later - if block_start is None: - block_start = match.start() - - self._block_contents = '' - - # you can have as many comments in your block as you'd like! - while True: - match = self._expect_match( - '"{}"'.format(found.end_block_type_name), - found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN, - regex('''(?P(['"]))''') - ) - groups = match.groupdict() - if groups.get('endblock') is not None: - break - - self.advance(match.end()) - - if groups.get('comment_start') is not None: - self.expect_comment_end() - elif groups.get('raw_start') is not None: - self.expect_raw_end() - elif groups.get('quote') is not None: - self.rewind() - match = self._expect_match('any string', STRING_PATTERN) - self.advance(match.end()) - else: - raise dbt.exceptions.InternalException( - 'unhandled regex in handle_block, no match: {}' - .format(groups) - ) - - # we want to advance to just the end tag at first, to extract the - # contents - self.advance(match.start()) - found.contents = self._block_contents - self._block_contents = None - # now advance to the end - self.advance(match.end()) - found.full_block = self.data[block_start:self.pos] - return found - - def handle_materialization(self, match): - self._expect_match('materialization args', - MATERIALIZATION_ARGS_PATTERN) - endtag = self._expect_match('%}', TAG_CLOSE_PATTERN) - self.advance(endtag.end()) - # handle the block we started with! - self.blocks.append(self.handle_block(match)) - - def handle_do(self, match, expect_block): - if expect_block: - # we might be wrong to expect a block ({% do (...) %}, for example) - # so see if there's more data before the tag closes. if there - # isn't, we expect a block. - close_match = self._expect_match('%}', TAG_CLOSE_PATTERN) - unprocessed = self.data[match.end():close_match.start()].strip() - expect_block = not unprocessed - - if expect_block: - # if we're here, expect_block is True and we must have set - # close_match - self.advance(close_match.end()) - block = self.handle_block(match) - else: - # we have a do-statement like {% do thing() %}, so no {% enddo %} - # also, we don't want to advance to the end of the match, as it - # might be inside a string or something! So go back and figure out - self._process_rval_components() - block = BlockTag('do', None, - full_block=self.data[match.start():self.pos]) - self.blocks.append(block) - - def handle_set(self, match): - equal_or_close = self._expect_match('%} or =', - TAG_CLOSE_PATTERN, regex(r'=')) - self.advance(equal_or_close.end()) - if equal_or_close.groupdict().get('tag_close') is None: - # it's an equals sign, must be like {% set x = 1 %} - self._process_rval_components() - # watch out, order matters here on python 2 - block = BlockTag(full_block=self.data[match.start():self.pos], - **match.groupdict()) - else: - # it's a tag close, must be like {% set x %}...{% endset %} - block = self.handle_block(match) - self.blocks.append(block) - - def find_block(self): - open_block = ( - r'(?:\s*\{\%\-|\{\%)\s*' - r'(?P([A-Za-z_][A-Za-z_0-9]*))' - # some blocks have a 'block name'. - r'(?:\s+(?P([A-Za-z_][A-Za-z_0-9]*)))?' - ) - - match = self._first_match(regex(open_block), COMMENT_START_PATTERN) - if match is None: - return False - - raw_toplevel = self.data[self.pos:match.start()] - if len(raw_toplevel) > 0: - self.blocks.append(BlockData(raw_toplevel)) - - matchgroups = match.groupdict() - - # comments are easy - if matchgroups.get('comment_start') is not None: - start = match.start() - self.advance(match.end()) - self.expect_comment_end() - self.blocks.append(BlockData(self.data[start:self.pos])) - return True - - block_type_name = matchgroups.get('block_type_name') - - if block_type_name == 'raw': - start = match.start() - self.expect_raw_end() - self.blocks.append(BlockData(self.data[start:self.pos])) - return True - - if block_type_name == 'materialization': - self.advance(match.end()) - self.handle_materialization(match) - return True - - if block_type_name == 'do': - # if there is a "block_name" in the match groups, we don't expect a - # block as the "block name" is actually part of the do-statement. - # we need to do this to handle the (weird and probably wrong!) case - # of a do-statement that is only a single identifier - techincally - # allowed in jinja. (for example, {% do thing %}) - expect_block = matchgroups.get('block_name') is None - self.handle_do(match, expect_block=expect_block) - return True - - if block_type_name == 'set': - self.advance(match.end()) - self.handle_set(match) - return True - - # we're somewhere like this {% block_type_name block_type - # we've either got arguments, a close of tag (%}), or bad input. - # we've handled materializations already (they're weird!) - # thankfully, comments aren't allowed *inside* a block def... - block_end_match = self._expect_match('%} or (...)', - TAG_CLOSE_PATTERN, - MACRO_ARGS_START_PATTERN) - self.advance(block_end_match.end()) - if block_end_match.groupdict().get('macro_start') is not None: - # we've hit our first parenthesis! - self._parenthesis_stack = [True] - self._process_macro_args() - self.advance(self._expect_match('%}', TAG_CLOSE_PATTERN).end()) - - # tag close time! - self.blocks.append(self.handle_block(match)) - return True - - def _process_rval_components(self): - """This is suspiciously similar to _process_macro_default_arg, probably - want to figure out how to merge the two. - - Process the rval of an assignment statement or a do-block - """ - while True: - match = self._expect_match( - 'do block component', - # you could have a string, though that would be weird - STRING_PATTERN, - # a quote or an open/close parenthesis - NON_STRING_DO_BLOCK_MEMBER_PATTERN, - # a tag close - TAG_CLOSE_PATTERN - ) - matchgroups = match.groupdict() - self.advance(match.end()) - if matchgroups.get('string') is not None: - continue - elif matchgroups.get('quote') is not None: - self.rewind() - # now look for a string - match = self._expect_match('any string', STRING_PATTERN) - self.advance(match.end()) - elif matchgroups.get('open'): - self._parenthesis_stack.append(True) - elif matchgroups.get('close'): - self._parenthesis_stack.pop() - elif matchgroups.get('tag_close'): - if self._parenthesis_stack: - msg = ('Found "%}", expected ")"') - dbt.exceptions.raise_compiler_error(msg) - return - # else whitespace - - def _process_macro_default_arg(self): - """Handle the bit after an '=' in a macro default argument. This is - probably the trickiest thing. The goal here is to accept all strings - jinja would accept and always handle block start/end correctly: It's - fine to have false positives, jinja can fail later. - - Return True if there are more arguments expected. - """ - while self._parenthesis_stack: - match = self._expect_match( - 'macro argument', - # you could have a string - STRING_PATTERN, - # a quote, a comma, or a open/close parenthesis - NON_STRING_MACRO_ARGS_PATTERN, - # we want to "match", not "search" - method='match' - ) - matchgroups = match.groupdict() - self.advance(match.end()) - if matchgroups.get('string') is not None: - # we got a string value. There could be more data. - continue - elif matchgroups.get('quote') is not None: - # we got a bunch of data and then a string opening value. - # put the quote back on the menu - self.rewind() - # now look for a string - match = self._expect_match('any string', STRING_PATTERN) - self.advance(match.end()) - elif matchgroups.get('comma') is not None: - # small hack: if we hit a comma and there is one parenthesis - # left, return to look for a new name. otherwise we're still - # looking for the parameter close. - if len(self._parenthesis_stack) == 1: - return - elif matchgroups.get('open'): - self._parenthesis_stack.append(True) - elif matchgroups.get('close'): - self._parenthesis_stack.pop() - else: - raise dbt.exceptions.InternalException( - 'unhandled regex in _process_macro_default_arg(), no match' - ': {}'.format(matchgroups) - ) - - def _process_macro_args(self): - """Macro args are pretty tricky! Arg names themselves are simple, but - you can set arbitrary default values, including doing stuff like: - {% macro my_macro(arg="x" + ("}% {# {% endmacro %}" * 2)) %} - - Which makes you a jerk, but is valid jinja. - """ - # we are currently after the first parenthesis (+ any whitespace) after - # the macro args started. You can either have the close paren, or a - # name. - while self._parenthesis_stack: - match = self._expect_match('macro arguments', - MACRO_ARGS_END_PATTERN, - MACRO_ARG_PATTERN) - self.advance(match.end()) - matchgroups = match.groupdict() - if matchgroups.get('macro_end') is not None: - self._parenthesis_stack.pop() - # we got an argument. let's see what it has - elif matchgroups.get('value') is not None: - # we have to process a single macro argument. This mutates - # the parenthesis stack! If it finds a comma, it will continue - # the loop. - self._process_macro_default_arg() - elif matchgroups.get('more_args') is not None: - continue - else: - raise dbt.exceptions.InternalException( - 'unhandled regex in _process_macro_args(), no match: {}' - .format(matchgroups) - ) - # if there are more arguments or a macro arg end we'll catch them - # on the next loop around - - def lex_for_blocks(self): - while self.data[self.pos:]: - found = self.find_block() - if not found: - break - - raw_toplevel = self.data[self.pos:] - if len(raw_toplevel) > 0: - self.blocks.append(BlockData(raw_toplevel)) - - return self.blocks diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index 156017a38cb..f83076a1709 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -11,9 +11,9 @@ import dbt.compat import dbt.exceptions -import dbt.utils -from dbt.clients._jinja_blocks import BlockIterator +from dbt.node_types import NodeType +from dbt.utils import AttrDict from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -56,7 +56,7 @@ def _compile(self, source, filename): linecache.cache[filename] = ( len(source), None, - [line + '\n' for line in source.splitlines()], + [line+'\n' for line in source.splitlines()], filename ) @@ -281,7 +281,3 @@ def get_rendered(string, ctx, node=None, def undefined_error(msg): raise jinja2.exceptions.UndefinedError(msg) - - -def extract_toplevel_blocks(data): - return BlockIterator(data).lex_for_blocks() diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index 4ba817d0fcf..0873cc509ad 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -3,7 +3,6 @@ import requests from dbt.exceptions import RegistryException from dbt.utils import memoized -from dbt.logger import GLOBAL_LOGGER as logger import os if os.getenv('DBT_PACKAGE_HUB_URL'): @@ -33,10 +32,7 @@ def wrapper(*args, **kwargs): @_wrap_exceptions def _get(path, registry_base_url=None): url = _get_url(path, registry_base_url) - logger.debug('Making package registry request: GET {}'.format(url)) resp = requests.get(url) - logger.debug('Response from registry: GET {} {}'.format(url, - resp.status_code)) resp.raise_for_status() return resp.json() diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index 5a3b8353b95..e3733189f8f 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -35,7 +35,6 @@ def find_matching(root_path, 'searched_path': 'models' } ] """ matching = [] - root_path = os.path.normpath(root_path) for relative_path_to_search in relative_paths_to_search: absolute_path_to_search = os.path.join( @@ -241,17 +240,12 @@ def _handle_windows_error(exc, cwd, cmd): message = ("Could not find command, ensure it is in the user's PATH " "and that the user has permissions to run it") cls = dbt.exceptions.ExecutableError - elif exc.errno == errno.ENOEXEC: - message = ('Command was not executable, ensure it is valid') - cls = dbt.exceptions.ExecutableError elif exc.errno == errno.ENOTDIR: message = ('Unable to cd: path does not exist, user does not have' ' permissions, or not a directory') cls = dbt.exceptions.WorkingDirectoryError else: - message = 'Unknown error: {} (errno={}: "{}")'.format( - str(exc), exc.errno, errno.errorcode.get(exc.errno, '') - ) + message = 'Unknown error: {}'.format(str(exc)) raise cls(cwd, cmd, message) @@ -312,7 +306,7 @@ def run_cmd(cwd, cmd, env=None): def download(url, path): response = requests.get(url) with open(path, 'wb') as handle: - for block in response.iter_content(1024 * 64): + for block in response.iter_content(1024*64): handle.write(block) @@ -382,7 +376,7 @@ def move(src, dst): except OSError: # probably different drives if os.path.isdir(src): - if _absnorm(dst + '\\').startswith(_absnorm(src + '\\')): + if _absnorm(dst+'\\').startswith(_absnorm(src+'\\')): # dst is inside src raise EnvironmentError( "Cannot move a directory '{}' into itself '{}'" diff --git a/core/dbt/compat.py b/core/dbt/compat.py index 50f9c217914..a3fe87d273f 100644 --- a/core/dbt/compat.py +++ b/core/dbt/compat.py @@ -1,7 +1,6 @@ -# flake8: noqa - import abc import codecs +import json import warnings import decimal @@ -34,13 +33,11 @@ if WHICH_PYTHON == 2: from SimpleHTTPServer import SimpleHTTPRequestHandler from SocketServer import TCPServer - from Queue import PriorityQueue, Empty as QueueEmpty - from thread import get_ident + from Queue import PriorityQueue else: from http.server import SimpleHTTPRequestHandler from socketserver import TCPServer - from queue import PriorityQueue, Empty as QueueEmpty - from threading import get_ident + from queue import PriorityQueue def to_unicode(s): diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index 86d2fc23fae..9f101c39222 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -1,11 +1,14 @@ import itertools import os -from collections import defaultdict +import json +from collections import OrderedDict, defaultdict +import sqlparse import dbt.utils import dbt.include import dbt.tracking +from dbt import deprecations from dbt.utils import get_materialization, NodeType, is_type from dbt.linker import Linker @@ -16,8 +19,9 @@ import dbt.flags import dbt.loader import dbt.config -from dbt.contracts.graph.compiled import CompiledNode +from dbt.contracts.graph.compiled import CompiledNode, CompiledGraph +from dbt.clients.system import write_json from dbt.logger import GLOBAL_LOGGER as logger graph_file_name = 'graph.gpickle' @@ -32,7 +36,6 @@ def print_compile_stats(stats): NodeType.Macro: 'macros', NodeType.Operation: 'operations', NodeType.Seed: 'seed files', - NodeType.Source: 'sources', } results = {k: 0 for k in names.keys()} @@ -41,7 +44,7 @@ def print_compile_stats(stats): stat_line = ", ".join( ["{} {}".format(ct, names.get(t)) for t, ct in results.items()]) - logger.notice("Found {}".format(stat_line)) + logger.info("Found {}".format(stat_line)) def _add_prepended_cte(prepended_ctes, new_cte): @@ -180,7 +183,7 @@ def link_graph(self, linker, manifest): if cycle: raise RuntimeError("Found a cycle: {}".format(cycle)) - def compile(self, manifest, write=True): + def compile(self, manifest): linker = Linker() self.link_graph(linker, manifest) @@ -192,35 +195,25 @@ def compile(self, manifest, write=True): manifest.macros.items()): stats[node.resource_type] += 1 - if write: - self.write_graph_file(linker, manifest) + self.write_graph_file(linker, manifest) print_compile_stats(stats) return linker -def compile_manifest(config, manifest, write=True): +def compile_manifest(config, manifest): compiler = Compiler(config) compiler.initialize() - return compiler.compile(manifest, write=write) + return compiler.compile(manifest) -def _is_writable(node): - if not node.injected_sql: - return False - - if dbt.utils.is_type(node, NodeType.Archive): - return False - - return True - - -def compile_node(adapter, config, node, manifest, extra_context, write=True): +def compile_node(adapter, config, node, manifest, extra_context): compiler = Compiler(config) node = compiler.compile_node(node, manifest, extra_context) node = _inject_runtime_config(adapter, node, extra_context) - if write and _is_writable(node): + if(node.injected_sql is not None and + not (dbt.utils.is_type(node, NodeType.Archive))): logger.debug('Writing injected SQL for node "{}"'.format( node.unique_id)) diff --git a/core/dbt/config/__init__.py b/core/dbt/config/__init__.py index d20916525ee..b5280511ef7 100644 --- a/core/dbt/config/__init__.py +++ b/core/dbt/config/__init__.py @@ -1,5 +1,22 @@ -# all these are just exports, they need "noqa" so flake8 will not complain. -from .renderer import ConfigRenderer # noqa -from .profile import Profile, UserConfig, PROFILES_DIR # noqa -from .project import Project # noqa -from .runtime import RuntimeConfig # noqa + +from .renderer import ConfigRenderer +from .profile import Profile, UserConfig +from .project import Project +from .profile import read_profile +from .profile import PROFILES_DIR +from .runtime import RuntimeConfig + + +def read_profiles(profiles_dir=None): + """This is only used in main, for some error handling""" + if profiles_dir is None: + profiles_dir = PROFILES_DIR + + raw_profiles = read_profile(profiles_dir) + + if raw_profiles is None: + profiles = {} + else: + profiles = {k: v for (k, v) in raw_profiles.items() if k != 'config'} + + return profiles diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index 835718ea25a..bb5c91cc246 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -335,12 +335,14 @@ def from_raw_profiles(cls, raw_profiles, profile_name, cli_vars, ) @classmethod - def from_args(cls, args, project_profile_name=None): + def from_args(cls, args, project_profile_name=None, cli_vars=None): """Given the raw profiles as read from disk and the name of the desired profile if specified, return the profile component of the runtime config. :param args argparse.Namespace: The arguments as parsed from the cli. + :param cli_vars dict: The command-line variables passed as arguments, + as a dict. :param project_profile_name Optional[str]: The profile name, if specified in a project. :raises DbtProjectError: If there is no profile name specified in the @@ -350,7 +352,9 @@ def from_args(cls, args, project_profile_name=None): target could not be found. :returns Profile: The new Profile object. """ - cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) + if cli_vars is None: + cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) + threads_override = getattr(args, 'threads', None) target_override = getattr(args, 'target', None) raw_profiles = read_profile(args.profiles_dir) diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index 17729196cd3..13d01599143 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -1,3 +1,4 @@ + from copy import deepcopy import hashlib import os @@ -13,6 +14,7 @@ from dbt.exceptions import SemverException from dbt.exceptions import ValidationException from dbt.exceptions import warn_or_error +from dbt.logger import GLOBAL_LOGGER as logger from dbt.semver import VersionSpecifier from dbt.semver import versions_compatible from dbt.version import get_installed_version @@ -143,10 +145,9 @@ def _parse_versions(versions): class Project(object): def __init__(self, project_name, version, project_root, profile_name, source_paths, macro_paths, data_paths, test_paths, - analysis_paths, docs_paths, target_path, archive_paths, - clean_targets, log_path, modules_path, quoting, models, - on_run_start, on_run_end, archive, seeds, dbt_version, - packages): + analysis_paths, docs_paths, target_path, clean_targets, + log_path, modules_path, quoting, models, on_run_start, + on_run_end, archive, seeds, dbt_version, packages): self.project_name = project_name self.version = version self.project_root = project_root @@ -158,7 +159,6 @@ def __init__(self, project_name, version, project_root, profile_name, self.analysis_paths = analysis_paths self.docs_paths = docs_paths self.target_path = target_path - self.archive_paths = archive_paths self.clean_targets = clean_targets self.log_path = log_path self.modules_path = modules_path @@ -241,7 +241,6 @@ def from_project_config(cls, project_dict, packages_dict=None): analysis_paths = project_dict.get('analysis-paths', []) docs_paths = project_dict.get('docs-paths', source_paths[:]) target_path = project_dict.get('target-path', 'target') - archive_paths = project_dict.get('archive-paths', ['archives']) # should this also include the modules path by default? clean_targets = project_dict.get('clean-targets', [target_path]) log_path = project_dict.get('log-path', 'logs') @@ -275,7 +274,6 @@ def from_project_config(cls, project_dict, packages_dict=None): analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, - archive_paths=archive_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, @@ -323,7 +321,6 @@ def to_project_config(self, with_packages=False): 'analysis-paths': self.analysis_paths, 'docs-paths': self.docs_paths, 'target-path': self.target_path, - 'archive-paths': self.archive_paths, 'clean-targets': self.clean_targets, 'log-path': self.log_path, 'quoting': self.quoting, @@ -380,10 +377,6 @@ def from_project_root(cls, project_root, cli_vars): def from_current_directory(cls, cli_vars): return cls.from_project_root(os.getcwd(), cli_vars) - @classmethod - def from_args(cls, args): - return cls.from_current_directory(getattr(args, 'vars', '{}')) - def hashed_name(self): return hashlib.md5(self.project_name.encode('utf-8')).hexdigest() diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index 23a0e4c81d5..ee654474a5b 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -18,10 +18,10 @@ class RuntimeConfig(Project, Profile): """ def __init__(self, project_name, version, project_root, source_paths, macro_paths, data_paths, test_paths, analysis_paths, - docs_paths, target_path, archive_paths, clean_targets, - log_path, modules_path, quoting, models, on_run_start, - on_run_end, archive, seeds, dbt_version, profile_name, - target_name, config, threads, credentials, packages, args): + docs_paths, target_path, clean_targets, log_path, + modules_path, quoting, models, on_run_start, on_run_end, + archive, seeds, dbt_version, profile_name, target_name, + config, threads, credentials, packages, args): # 'vars' self.args = args self.cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) @@ -39,7 +39,6 @@ def __init__(self, project_name, version, project_root, source_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, - archive_paths=archive_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, @@ -88,7 +87,6 @@ def from_parts(cls, project, profile, args): analysis_paths=project.analysis_paths, docs_paths=project.docs_paths, target_path=project.target_path, - archive_paths=project.archive_paths, clean_targets=project.clean_targets, log_path=project.log_path, modules_path=project.modules_path, @@ -173,13 +171,16 @@ def from_args(cls, args): :raises DbtProfileError: If the profile is invalid or missing. :raises ValidationException: If the cli variables are invalid. """ + cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) + # build the project and read in packages.yml - project = Project.from_args(args) + project = Project.from_current_directory(cli_vars) # build the profile profile = Profile.from_args( args=args, - project_profile_name=project.profile_name + project_profile_name=project.profile_name, + cli_vars=cli_vars ) return cls.from_parts( diff --git a/core/dbt/context/common.py b/core/dbt/context/common.py index 6361d675ad4..f528af89ad3 100644 --- a/core/dbt/context/common.py +++ b/core/dbt/context/common.py @@ -1,3 +1,5 @@ +import copy +import functools import json import os @@ -11,10 +13,12 @@ import dbt.clients.jinja import dbt.clients.agate_helper import dbt.flags +import dbt.schema import dbt.tracking -import dbt.writer import dbt.utils +import dbt.hooks + from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -32,11 +36,6 @@ def __init__(self, adapter): def __getattr__(self, key): return getattr(self.relation_type, key) - def create_from_source(self, *args, **kwargs): - # bypass our create when creating from source so as not to mess up - # the source quoting - return self.relation_type.create_from_source(*args, **kwargs) - def create(self, *args, **kwargs): kwargs['quote_policy'] = dbt.utils.merge( self.quoting_config, @@ -47,15 +46,27 @@ def create(self, *args, **kwargs): class DatabaseWrapper(object): """ - Wrapper for runtime database interaction. Applies the runtime quote policy - via a relation proxy. + Wrapper for runtime database interaction. Mostly a compatibility layer now. """ - def __init__(self, adapter): + def __init__(self, connection_name, adapter): + self.connection_name = connection_name self.adapter = adapter self.Relation = RelationProxy(adapter) + def wrap(self, name): + func = getattr(self.adapter, name) + + @functools.wraps(func) + def wrapped(*args, **kwargs): + kwargs['model_name'] = self.connection_name + return func(*args, **kwargs) + + return wrapped + def __getattr__(self, name): - if name in self.adapter._available_: + if name in self.adapter._available_model_: + return self.wrap(name) + elif name in self.adapter._available_raw_: return getattr(self.adapter, name) else: raise AttributeError( @@ -72,7 +83,7 @@ def type(self): return self.adapter.type() def commit(self): - return self.adapter.commit_if_has_connection() + return self.adapter.commit_if_has_connection(self.connection_name) def _add_macro_map(context, package_name, macro_map): @@ -297,14 +308,14 @@ def fn(string): def fromjson(string, default=None): try: return json.loads(string) - except ValueError: + except ValueError as e: return default def tojson(value, default=None): try: return json.dumps(value) - except ValueError: + except ValueError as e: return default @@ -312,7 +323,7 @@ def try_or_compiler_error(model): def impl(message_if_exception, func, *args, **kwargs): try: return func(*args, **kwargs) - except Exception: + except Exception as e: dbt.exceptions.raise_compiler_error(message_if_exception, model) return impl @@ -348,7 +359,7 @@ def get_datetime_module_context(): def generate_base(model, model_dict, config, manifest, source_config, - provider, adapter=None): + provider, connection_name): """Generate the common aspects of the config dict.""" if provider is None: raise dbt.exceptions.InternalException( @@ -361,7 +372,6 @@ def generate_base(model, model_dict, config, manifest, source_config, target['type'] = config.credentials.type target.pop('pass', None) target['name'] = target_name - adapter = get_adapter(config) context = {'env': target} @@ -369,7 +379,7 @@ def generate_base(model, model_dict, config, manifest, source_config, pre_hooks = None post_hooks = None - db_wrapper = DatabaseWrapper(adapter) + db_wrapper = DatabaseWrapper(connection_name, adapter) context = dbt.utils.merge(context, { "adapter": db_wrapper, @@ -381,7 +391,7 @@ def generate_base(model, model_dict, config, manifest, source_config, "config": provider.Config(model_dict, source_config), "database": config.credentials.database, "env_var": env_var, - "exceptions": dbt.exceptions.wrapped_exports(model), + "exceptions": dbt.exceptions.CONTEXT_EXPORTS, "execute": provider.execute, "flags": dbt.flags, # TODO: Do we have to leave this in? @@ -428,7 +438,7 @@ def modify_generated_context(context, model, model_dict, config, manifest): return context -def generate_execute_macro(model, config, manifest, provider): +def generate_execute_macro(model, config, manifest, provider, connection_name): """Internally, macros can be executed like nodes, with some restrictions: - they don't have have all values available that nodes do: @@ -437,8 +447,8 @@ def generate_execute_macro(model, config, manifest, provider): - they can't be configured with config() directives """ model_dict = model.serialize() - context = generate_base(model, model_dict, config, manifest, None, - provider) + context = generate_base(model, model_dict, config, manifest, + None, provider, connection_name) return modify_generated_context(context, model, model_dict, config, manifest) @@ -447,7 +457,7 @@ def generate_execute_macro(model, config, manifest, provider): def generate_model(model, config, manifest, source_config, provider): model_dict = model.to_dict() context = generate_base(model, model_dict, config, manifest, - source_config, provider) + source_config, provider, model.get('name')) # operations (hooks) don't get a 'this' if model.resource_type != NodeType.Operation: this = get_this_relation(context['adapter'], config, model_dict) @@ -472,4 +482,5 @@ def generate(model, config, manifest, source_config=None, provider=None): or dbt.context.runtime.generate """ - return generate_model(model, config, manifest, source_config, provider) + return generate_model(model, config, manifest, source_config, + provider) diff --git a/core/dbt/context/parser.py b/core/dbt/context/parser.py index 3d2a8da5d78..2a9d2a87881 100644 --- a/core/dbt/context/parser.py +++ b/core/dbt/context/parser.py @@ -1,7 +1,6 @@ import dbt.exceptions import dbt.context.common -from dbt.adapters.factory import get_adapter execute = False @@ -49,7 +48,7 @@ def do_docs(*args): def source(db_wrapper, model, config, manifest): def do_source(source_name, table_name): model.sources.append([source_name, table_name]) - return db_wrapper.adapter.Relation.create_from_node(config, model) + return '' return do_source @@ -98,17 +97,12 @@ def get(self, name, validator=None, default=None): def generate(model, runtime_config, manifest, source_config): - # during parsing, we don't have a connection, but we might need one, so we - # have to acquire it. - # In the future, it would be nice to lazily open the connection, as in some - # projects it would be possible to parse without connecting to the db - with get_adapter(runtime_config).connection_named(model.get('name')): - return dbt.context.common.generate( - model, runtime_config, manifest, source_config, dbt.context.parser - ) + return dbt.context.common.generate( + model, runtime_config, manifest, source_config, dbt.context.parser) -def generate_macro(model, runtime_config, manifest): +def generate_macro(model, runtime_config, manifest, connection_name): return dbt.context.common.generate_execute_macro( - model, runtime_config, manifest, dbt.context.parser + model, runtime_config, manifest, dbt.context.parser, + connection_name ) diff --git a/core/dbt/context/runtime.py b/core/dbt/context/runtime.py index 2fc7b32cddb..40dcb77e73f 100644 --- a/core/dbt/context/runtime.py +++ b/core/dbt/context/runtime.py @@ -123,7 +123,8 @@ def generate(model, runtime_config, manifest): model, runtime_config, manifest, None, dbt.context.runtime) -def generate_macro(model, runtime_config, manifest): +def generate_macro(model, runtime_config, manifest, connection_name): return dbt.context.common.generate_execute_macro( - model, runtime_config, manifest, dbt.context.runtime + model, runtime_config, manifest, dbt.context.runtime, + connection_name ) diff --git a/core/dbt/contracts/connection.py b/core/dbt/contracts/connection.py index 0a79186e6c8..84572c23ab6 100644 --- a/core/dbt/contracts/connection.py +++ b/core/dbt/contracts/connection.py @@ -1,5 +1,7 @@ +import dbt.exceptions from dbt.api.object import APIObject from dbt.contracts.common import named_property +from dbt.logger import GLOBAL_LOGGER as logger # noqa CONNECTION_CONTRACT = { diff --git a/core/dbt/contracts/graph/compiled.py b/core/dbt/contracts/graph/compiled.py index f4e44040872..7030bdf9e5f 100644 --- a/core/dbt/contracts/graph/compiled.py +++ b/core/dbt/contracts/graph/compiled.py @@ -1,4 +1,7 @@ +from copy import copy, deepcopy + from dbt.api import APIObject +from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import deep_merge from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \ PARSED_MACRO_CONTRACT, ParsedNode diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 7002b5af512..99866cd8ddb 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1,9 +1,10 @@ from dbt.api import APIObject +from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \ PARSED_MACRO_CONTRACT, PARSED_DOCUMENTATION_CONTRACT, \ PARSED_SOURCE_DEFINITION_CONTRACT from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT, CompiledNode -from dbt.exceptions import raise_duplicate_resource_name +from dbt.exceptions import ValidationException from dbt.node_types import NodeType from dbt.logger import GLOBAL_LOGGER as logger from dbt import tracking @@ -400,22 +401,11 @@ def __getattr__(self, name): type(self).__name__, name) ) - def get_used_schemas(self, resource_types=None): + def get_used_schemas(self): return frozenset({ (node.database, node.schema) for node in self.nodes.values() - if not resource_types or node.resource_type in resource_types }) def get_used_databases(self): return frozenset(node.database for node in self.nodes.values()) - - def deepcopy(self, config=None): - return Manifest( - nodes={k: v.incorporate() for k, v in self.nodes.items()}, - macros={k: v.incorporate() for k, v in self.macros.items()}, - docs={k: v.incorporate() for k, v in self.docs.items()}, - generated_at=self.generated_at, - disabled=[n.incorporate() for n in self.disabled], - config=config - ) diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index d0e77c20ab5..966ba0f90f4 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -1,6 +1,8 @@ from dbt.api import APIObject from dbt.utils import deep_merge from dbt.node_types import NodeType +from dbt.exceptions import raise_duplicate_resource_name, \ + raise_patch_targets_not_found import dbt.clients.jinja @@ -441,79 +443,6 @@ def config(self, value): self._contents['config'] = value -ARCHIVE_CONFIG_CONTRACT = { - 'properties': { - 'target_database': { - 'type': 'string', - }, - 'target_schema': { - 'type': 'string', - }, - 'unique_key': { - 'type': 'string', - }, - 'anyOf': [ - { - 'properties': { - 'strategy': { - 'enum': ['timestamp'], - }, - 'updated_at': { - 'type': 'string', - 'description': ( - 'The column name with the timestamp to compare' - ), - }, - }, - 'required': ['updated_at'], - }, - { - 'properties': { - 'strategy': { - 'enum': ['check'], - }, - 'check_cols': { - 'oneOf': [ - { - 'type': 'array', - 'items': {'type': 'string'}, - 'description': 'The columns to check', - 'minLength': 1, - }, - { - 'enum': ['all'], - 'description': 'Check all columns', - }, - ], - }, - }, - 'required': ['check_cols'], - } - ] - }, - 'required': [ - 'target_database', 'target_schema', 'unique_key', 'strategy', - ], -} - - -PARSED_ARCHIVE_NODE_CONTRACT = deep_merge( - PARSED_NODE_CONTRACT, - { - 'properties': { - 'config': ARCHIVE_CONFIG_CONTRACT, - 'resource_type': { - 'enum': [NodeType.Archive], - }, - }, - } -) - - -class ParsedArchiveNode(ParsedNode): - SCHEMA = PARSED_ARCHIVE_NODE_CONTRACT - - # The parsed node update is only the 'patch', not the test. The test became a # regular parsed node. Note that description and columns must be present, but # may be empty. @@ -629,7 +558,6 @@ def generator(self): # available in this class. should we just generate this here? return dbt.clients.jinja.macro_generator(self._contents) - # This is just the file + its ID PARSED_DOCUMENTATION_CONTRACT = deep_merge( UNPARSED_DOCUMENTATION_FILE_CONTRACT, @@ -706,26 +634,9 @@ class Hook(APIObject): } -QUOTING_CONTRACT = { - 'properties': { - 'quoting': { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'database': {'type': 'boolean'}, - 'schema': {'type': 'boolean'}, - 'identifier': {'type': 'boolean'}, - }, - }, - }, - 'required': ['quoting'], -} - - PARSED_SOURCE_DEFINITION_CONTRACT = deep_merge( UNPARSED_BASE_CONTRACT, FRESHNESS_CONTRACT, - QUOTING_CONTRACT, HAS_DESCRIPTION_CONTRACT, HAS_UNIQUE_ID_CONTRACT, HAS_DOCREFS_CONTRACT, @@ -765,7 +676,7 @@ class Hook(APIObject): # the manifest search stuff really requires this, sadly 'resource_type': { 'enum': [NodeType.Source], - }, + } }, # note that while required, loaded_at_field and freshness may be null 'required': [ diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index c6e6bbbd75b..30de42ef695 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -75,7 +75,6 @@ NodeType.Seed, # we need this if parse_node is going to handle archives. NodeType.Archive, - NodeType.RPCCall, ] }, }, @@ -219,29 +218,6 @@ class UnparsedNodeUpdate(APIObject): } -_QUOTING_CONTRACT = { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'database': {'type': 'boolean'}, - 'schema': {'type': 'boolean'}, - 'identifier': {'type': 'boolean'}, - }, -} - - -QUOTING_CONTRACT = { - 'properties': { - 'quoting': { - 'anyOf': [ - {'type': 'null'}, - _QUOTING_CONTRACT, - ], - }, - }, -} - - FRESHNESS_CONTRACT = { 'properties': { 'loaded_at_field': { @@ -262,7 +238,6 @@ class UnparsedNodeUpdate(APIObject): UNPARSED_NODE_DESCRIPTION_CONTRACT, UNPARSED_COLUMN_DESCRIPTION_CONTRACT, FRESHNESS_CONTRACT, - QUOTING_CONTRACT, { 'description': ( 'A source table definition, as provided in the "tables" ' @@ -281,7 +256,6 @@ class UnparsedNodeUpdate(APIObject): UNPARSED_SOURCE_DEFINITION_CONTRACT = deep_merge( FRESHNESS_CONTRACT, - QUOTING_CONTRACT, { 'type': 'object', 'additionalProperties': False, @@ -361,7 +335,7 @@ def tables(self): 'type': 'string', 'description': ( 'Relative path to the originating file from the project root.' - ), + ), }, 'file_contents': { 'type': 'string', diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 9e79101f52a..58e884abcea 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -91,10 +91,6 @@ 'target-path': { 'type': 'string', }, - 'archive-paths': { - 'type': 'array', - 'items': {'type': 'string'}, - }, 'clean-targets': { 'type': 'array', 'items': {'type': 'string'}, diff --git a/core/dbt/contracts/results.py b/core/dbt/contracts/results.py index e43cc8eb56e..0a991c89417 100644 --- a/core/dbt/contracts/results.py +++ b/core/dbt/contracts/results.py @@ -3,7 +3,10 @@ from dbt.contracts.common import named_property from dbt.contracts.graph.manifest import COMPILE_RESULT_NODE_CONTRACT from dbt.contracts.graph.unparsed import TIME_CONTRACT -from dbt.contracts.graph.parsed import PARSED_SOURCE_DEFINITION_CONTRACT +from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \ + PARSED_SOURCE_DEFINITION_CONTRACT +from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT +from dbt.contracts.graph.manifest import PARSED_MANIFEST_CONTRACT TIMING_INFO_CONTRACT = { @@ -194,8 +197,8 @@ def skipped(self): 'type': 'array', 'items': { 'anyOf': [ - RUN_MODEL_RESULT_CONTRACT, - PARTIAL_RESULT_CONTRACT, + RUN_MODEL_RESULT_CONTRACT, + PARTIAL_RESULT_CONTRACT, ] }, 'description': 'An array of results, one per model', @@ -258,8 +261,8 @@ class SourceFreshnessResult(NodeSerializable): def __init__(self, node, max_loaded_at, snapshotted_at, age, status, thread_id, error=None, timing=None, execution_time=0): - max_loaded_at = max_loaded_at.isoformat() - snapshotted_at = snapshotted_at.isoformat() + max_loaded_at = max_loaded_at.isoformat() + 'Z' + snapshotted_at = snapshotted_at.isoformat() + 'Z' if timing is None: timing = [] super(SourceFreshnessResult, self).__init__( @@ -455,78 +458,3 @@ class FreshnessRunOutput(APIObject): def __init__(self, meta, sources): super(FreshnessRunOutput, self).__init__(meta=meta, sources=sources) - - -REMOTE_COMPILE_RESULT_CONTRACT = { - 'type': 'object', - 'additionalProperties': False, - 'properties': { - 'raw_sql': { - 'type': 'string', - }, - 'compiled_sql': { - 'type': 'string', - }, - 'timing': { - 'type': 'array', - 'items': TIMING_INFO_CONTRACT, - }, - }, - 'required': ['raw_sql', 'compiled_sql', 'timing'] -} - - -class RemoteCompileResult(APIObject): - SCHEMA = REMOTE_COMPILE_RESULT_CONTRACT - - def __init__(self, raw_sql, compiled_sql, node, timing=None, **kwargs): - if timing is None: - timing = [] - # this should not show up in the serialized output. - self.node = node - super(RemoteCompileResult, self).__init__( - raw_sql=raw_sql, - compiled_sql=compiled_sql, - timing=timing, - **kwargs - ) - - @property - def error(self): - return None - - -REMOTE_RUN_RESULT_CONTRACT = deep_merge(REMOTE_COMPILE_RESULT_CONTRACT, { - 'properties': { - 'table': { - 'type': 'object', - 'properties': { - 'column_names': { - 'type': 'array', - 'items': {'type': 'string'}, - }, - 'rows': { - 'type': 'array', - # any item type is ok - }, - }, - 'required': ['rows', 'column_names'], - }, - }, - 'required': ['table'], -}) - - -class RemoteRunResult(RemoteCompileResult): - SCHEMA = REMOTE_RUN_RESULT_CONTRACT - - def __init__(self, raw_sql, compiled_sql, node, timing=None, table=None): - if table is None: - table = [] - super(RemoteRunResult, self).__init__( - raw_sql=raw_sql, - compiled_sql=compiled_sql, - timing=timing, - table=table, - node=node - ) diff --git a/core/dbt/deprecations.py b/core/dbt/deprecations.py index baf92043cf5..25e27d3ab8a 100644 --- a/core/dbt/deprecations.py +++ b/core/dbt/deprecations.py @@ -1,3 +1,4 @@ +from dbt.logger import GLOBAL_LOGGER as logger import dbt.links import dbt.flags @@ -29,6 +30,16 @@ class DBTRepositoriesDeprecation(DBTDeprecation): """ +class SqlWhereDeprecation(DBTDeprecation): + name = "sql_where" + description = """\ +The `sql_where` option for incremental models is deprecated and will be + removed in a future release. Check the docs for more information + + {} + """.format(dbt.links.IncrementalDocs) + + class SeedDropExistingDeprecation(DBTDeprecation): name = 'drop-existing' description = """The --drop-existing argument to `dbt seed` has been @@ -72,6 +83,7 @@ def warn(name, *args, **kwargs): deprecations_list = [ DBTRepositoriesDeprecation(), SeedDropExistingDeprecation(), + SqlWhereDeprecation(), ] deprecations = {d.name: d for d in deprecations_list} diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 2319e748cc2..10e80c6422a 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -1,22 +1,11 @@ -import sys -import six -import functools - -from dbt.compat import builtins +from dbt.compat import basestring, builtins from dbt.logger import GLOBAL_LOGGER as logger import dbt.flags +import re class Exception(builtins.Exception): - CODE = -32000 - MESSAGE = "Server Error" - - def data(self): - # if overriding, make sure the result is json-serializable. - return { - 'type': self.__class__.__name__, - 'message': str(self), - } + pass class MacroReturn(builtins.BaseException): @@ -33,9 +22,6 @@ class InternalException(Exception): class RuntimeException(RuntimeError, Exception): - CODE = 10001 - MESSAGE = "Runtime error" - def __init__(self, msg, node=None): self.stack = [] self.node = node @@ -95,59 +81,8 @@ def __str__(self, prefix="! "): return lines[0] + "\n" + "\n".join( [" " + line for line in lines[1:]]) - def data(self): - result = Exception.data(self) - if self.node is None: - return result - - result.update({ - 'raw_sql': self.node.get('raw_sql'), - 'compiled_sql': self.node.get('injected_sql'), - }) - return result - - -class RPCFailureResult(RuntimeException): - CODE = 10002 - MESSAGE = "RPC execution error" - - -class RPCTimeoutException(RuntimeException): - CODE = 10008 - MESSAGE = 'RPC timeout error' - - def __init__(self, timeout): - super(RPCTimeoutException, self).__init__(self.MESSAGE) - self.timeout = timeout - - def data(self): - result = super(RPCTimeoutException, self).data() - result.update({ - 'timeout': self.timeout, - 'message': 'RPC timed out after {}s'.format(self.timeout), - }) - return result - - -class RPCKilledException(RuntimeException): - CODE = 10009 - MESSAGE = 'RPC process killed' - - def __init__(self, signum): - self.signum = signum - self.message = 'RPC process killed by signal {}'.format(self.signum) - super(RPCKilledException, self).__init__(self.message) - - def data(self): - return { - 'signum': self.signum, - 'message': self.message, - } - class DatabaseException(RuntimeException): - CODE = 10003 - MESSAGE = "Database Error" def process_stack(self): lines = [] @@ -164,9 +99,6 @@ def type(self): class CompilationException(RuntimeException): - CODE = 10004 - MESSAGE = "Compilation Error" - @property def type(self): return 'Compilation' @@ -177,8 +109,7 @@ class RecursionException(RuntimeException): class ValidationException(RuntimeException): - CODE = 10005 - MESSAGE = "Validation Error" + pass class JSONValidationException(ValidationException): @@ -186,9 +117,8 @@ def __init__(self, typename, errors): self.typename = typename self.errors = errors self.errors_message = ', '.join(errors) - msg = 'Invalid arguments passed to "{}" instance: {}'.format( - self.typename, self.errors_message - ) + msg = ('Invalid arguments passed to "{}" instance: {}'.format( + self.typename, self.errors_message)) super(JSONValidationException, self).__init__(msg) def __reduce__(self): @@ -200,16 +130,15 @@ class AliasException(ValidationException): pass +class ParsingException(Exception): + pass + + class DependencyException(Exception): - # this can happen due to raise_dependency_error and its callers - CODE = 10006 - MESSAGE = "Dependency Error" + pass class DbtConfigError(RuntimeException): - CODE = 10007 - MESSAGE = "DBT Configuration Error" - def __init__(self, message, project=None, result_type='invalid_project'): self.project = project super(DbtConfigError, self).__init__(message) @@ -593,8 +522,8 @@ def raise_ambiguous_catalog_match(unique_id, match_1, match_2): def get_match_string(match): return "{}.{}".format( - match.get('metadata', {}).get('schema'), - match.get('metadata', {}).get('name')) + match.get('metadata', {}).get('schema'), + match.get('metadata', {}).get('name')) raise_compiler_error( 'dbt found two relations in your warehouse with similar database ' @@ -683,26 +612,3 @@ def warn_or_error(msg, node=None, log_fmt=None): relation_wrong_type, ] } - - -def wrapper(model): - def wrap(func): - @functools.wraps(func) - def inner(*args, **kwargs): - try: - return func(*args, **kwargs) - except Exception: - exc_type, exc, exc_tb = sys.exc_info() - if hasattr(exc, 'node') and exc.node is None: - exc.node = model - six.reraise(exc_type, exc, exc_tb) - - return inner - return wrap - - -def wrapped_exports(model): - wrap = wrapper(model) - return { - name: wrap(export) for name, export in CONTEXT_EXPORTS.items() - } diff --git a/core/dbt/flags.py b/core/dbt/flags.py index 0d905598447..8bf43049a49 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -3,16 +3,13 @@ FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False -TEST_NEW_PARSER = False def reset(): - global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \ - TEST_NEW_PARSER + global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR STRICT_MODE = False NON_DESTRUCTIVE = False FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False - TEST_NEW_PARSER = False diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index 6282d47d4aa..2b5fc24a838 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -3,6 +3,7 @@ from dbt.utils import is_enabled, get_materialization, coalesce from dbt.node_types import NodeType +from dbt.contracts.graph.parsed import ParsedNode import dbt.exceptions SELECTOR_PARENTS = '+' diff --git a/core/dbt/hooks.py b/core/dbt/hooks.py index 2434f762ca3..c673b59a8c5 100644 --- a/core/dbt/hooks.py +++ b/core/dbt/hooks.py @@ -12,7 +12,7 @@ class ModelHookType: def _parse_hook_to_dict(hook_string): try: hook_dict = json.loads(hook_string) - except ValueError: + except ValueError as e: hook_dict = {"sql": hook_string} if 'transaction' not in hook_dict: diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index a24e707f850..254cc5ba46b 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -94,11 +94,11 @@ {% endmacro %} -{% macro get_catalog(information_schemas) -%} - {{ return(adapter_macro('get_catalog', information_schemas)) }} +{% macro get_catalog() -%} + {{ return(adapter_macro('get_catalog')) }} {%- endmacro %} -{% macro default__get_catalog(information_schemas) -%} +{% macro default__get_catalog() -%} {% set typename = adapter.type() %} {% set msg -%} @@ -210,27 +210,27 @@ {% endmacro %} -{% macro check_schema_exists(information_schema, schema) -%} - {{ return(adapter_macro('check_schema_exists', information_schema, schema)) }} +{% macro check_schema_exists(database, schema) -%} + {{ return(adapter_macro('check_schema_exists', database, schema)) }} {% endmacro %} -{% macro default__check_schema_exists(information_schema, schema) -%} +{% macro default__check_schema_exists(database, schema) -%} {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%} select count(*) - from {{ information_schema }}.schemata - where catalog_name='{{ information_schema.database }}' + from {{ information_schema_name(database) }}.schemata + where catalog_name='{{ database }}' and schema_name='{{ schema }}' {%- endcall %} {{ return(load_result('check_schema_exists').table) }} {% endmacro %} -{% macro list_relations_without_caching(information_schema, schema) %} - {{ return(adapter_macro('list_relations_without_caching', information_schema, schema)) }} +{% macro list_relations_without_caching(database, schema) %} + {{ return(adapter_macro('list_relations_without_caching', database, schema)) }} {% endmacro %} -{% macro default__list_relations_without_caching(information_schema, schema) %} +{% macro default__list_relations_without_caching(database, schema) %} {{ dbt.exceptions.raise_not_implemented( 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }} {% endmacro %} diff --git a/core/dbt/include/global_project/macros/etc/get_custom_alias.sql b/core/dbt/include/global_project/macros/etc/get_custom_alias.sql deleted file mode 100644 index 7a382a58310..00000000000 --- a/core/dbt/include/global_project/macros/etc/get_custom_alias.sql +++ /dev/null @@ -1,26 +0,0 @@ - -{# - Renders a alias name given a custom alias name. If the custom - alias name is none, then the resulting alias is just the filename of the - model. If a alias override is specified, then that is used. - - This macro can be overriden in projects to define different semantics - for rendering a alias name. - - Arguments: - custom_alias_name: The custom alias name specified for a model, or none - -#} -{% macro generate_alias_name(node, custom_alias_name=none) -%} - - {%- if custom_alias_name is none -%} - - {{ node.name }} - - {%- else -%} - - {{ custom_alias_name | trim }} - - {%- endif -%} - -{%- endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/archive/archive.sql b/core/dbt/include/global_project/macros/materializations/archive/archive.sql index ead07b657d7..604b6492e03 100644 --- a/core/dbt/include/global_project/macros/materializations/archive/archive.sql +++ b/core/dbt/include/global_project/macros/materializations/archive/archive.sql @@ -2,12 +2,12 @@ Create SCD Hash SQL fields cross-db #} -{% macro archive_hash_arguments(args) %} - {{ adapter_macro('archive_hash_arguments', args) }} +{% macro archive_scd_hash() %} + {{ adapter_macro('archive_scd_hash') }} {% endmacro %} -{% macro default__archive_hash_arguments(args) %} - md5({% for arg in args %}coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %}{% endfor %}) +{% macro default__archive_scd_hash() %} + md5("dbt_pk" || '|' || "dbt_updated_at") {% endmacro %} {% macro create_temporary_table(sql, relation) %} @@ -48,74 +48,44 @@ {% macro default__archive_update(target_relation, tmp_relation) %} update {{ target_relation }} - set dbt_valid_to = tmp.dbt_valid_to + set {{ adapter.quote('valid_to') }} = tmp.{{ adapter.quote('valid_to') }} from {{ tmp_relation }} as tmp - where tmp.dbt_scd_id = {{ target_relation }}.dbt_scd_id - and change_type = 'update'; + where tmp.{{ adapter.quote('scd_id') }} = {{ target_relation }}.{{ adapter.quote('scd_id') }} + and {{ adapter.quote('change_type') }} = 'update'; {% endmacro %} -{% macro archive_get_time() -%} - {{ adapter_macro('archive_get_time') }} -{%- endmacro %} - -{% macro default__archive_get_time() -%} - {{ current_timestamp() }} -{%- endmacro %} - -{% macro snowflake__archive_get_time() -%} - to_timestamp_ntz({{ current_timestamp() }}) -{%- endmacro %} - - -{% macro archive_select_generic(source_sql, target_relation, transforms, scd_hash) -%} - with source as ( - {{ source_sql }} - ), - {{ transforms }} - merged as ( - - select *, 'update' as change_type from updates - union all - select *, 'insert' as change_type from insertions - - ) - - select *, - {{ scd_hash }} as dbt_scd_id - from merged - -{%- endmacro %} - {# Cross-db compatible archival implementation #} -{% macro archive_select_timestamp(source_sql, target_relation, source_columns, unique_key, updated_at) -%} +{% macro archive_select(source_relation, target_relation, source_columns, unique_key, updated_at) %} + {% set timestamp_column = api.Column.create('_', 'timestamp') %} - {% set transforms -%} - current_data as ( + + with current_data as ( select {% for col in source_columns %} - {{ col.name }} {% if not loop.last %},{% endif %} + {{ adapter.quote(col.name) }} {% if not loop.last %},{% endif %} {% endfor %}, - {{ updated_at }} as dbt_updated_at, - {{ unique_key }} as dbt_pk, - {{ updated_at }} as dbt_valid_from, - {{ timestamp_column.literal('null') }} as tmp_valid_to - from source + {{ updated_at }} as {{ adapter.quote('dbt_updated_at') }}, + {{ unique_key }} as {{ adapter.quote('dbt_pk') }}, + {{ updated_at }} as {{ adapter.quote('valid_from') }}, + {{ timestamp_column.literal('null') }} as {{ adapter.quote('tmp_valid_to') }} + from {{ source_relation }} + ), archived_data as ( select {% for col in source_columns %} - {{ col.name }}, + {{ adapter.quote(col.name) }}, {% endfor %} - {{ updated_at }} as dbt_updated_at, - {{ unique_key }} as dbt_pk, - dbt_valid_from, - dbt_valid_to as tmp_valid_to + {{ updated_at }} as {{ adapter.quote('dbt_updated_at') }}, + {{ unique_key }} as {{ adapter.quote('dbt_pk') }}, + {{ adapter.quote('valid_from') }}, + {{ adapter.quote('valid_to') }} as {{ adapter.quote('tmp_valid_to') }} from {{ target_relation }} ), @@ -124,16 +94,14 @@ select current_data.*, - {{ timestamp_column.literal('null') }} as dbt_valid_to + {{ timestamp_column.literal('null') }} as {{ adapter.quote('valid_to') }} from current_data left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where - archived_data.dbt_pk is null - or ( - archived_data.dbt_pk is not null - and archived_data.dbt_updated_at < current_data.dbt_updated_at - and archived_data.tmp_valid_to is null + on archived_data.{{ adapter.quote('dbt_pk') }} = current_data.{{ adapter.quote('dbt_pk') }} + where archived_data.{{ adapter.quote('dbt_pk') }} is null or ( + archived_data.{{ adapter.quote('dbt_pk') }} is not null and + current_data.{{ adapter.quote('dbt_updated_at') }} > archived_data.{{ adapter.quote('dbt_updated_at') }} and + archived_data.{{ adapter.quote('tmp_valid_to') }} is null ) ), @@ -141,132 +109,56 @@ select archived_data.*, - current_data.dbt_updated_at as dbt_valid_to + current_data.{{ adapter.quote('dbt_updated_at') }} as {{ adapter.quote('valid_to') }} from current_data left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where archived_data.dbt_pk is not null - and archived_data.dbt_updated_at < current_data.dbt_updated_at - and archived_data.tmp_valid_to is null + on archived_data.{{ adapter.quote('dbt_pk') }} = current_data.{{ adapter.quote('dbt_pk') }} + where archived_data.{{ adapter.quote('dbt_pk') }} is not null + and archived_data.{{ adapter.quote('dbt_updated_at') }} < current_data.{{ adapter.quote('dbt_updated_at') }} + and archived_data.{{ adapter.quote('tmp_valid_to') }} is null ), - {%- endset %} - {%- set scd_hash = archive_hash_arguments(['dbt_pk', 'dbt_updated_at']) -%} - {{ archive_select_generic(source_sql, target_relation, transforms, scd_hash) }} -{%- endmacro %} - - -{% macro archive_select_check_cols(source_sql, target_relation, source_columns, unique_key, check_cols) -%} - {%- set timestamp_column = api.Column.create('_', 'timestamp') -%} - - {# if we recognize the primary key, it's the newest record, and anything we care about has changed, it's an update candidate #} - {%- set update_candidate -%} - archived_data.dbt_pk is not null - and ( - {%- for col in check_cols %} - current_data.{{ col }} <> archived_data.{{ col }} - {%- if not loop.last %} or {% endif %} - {% endfor -%} - ) - and archived_data.tmp_valid_to is null - {%- endset %} - {% set transforms -%} - current_data as ( - - select - {% for col in source_columns %} - {{ col.name }} {% if not loop.last %},{% endif %} - {% endfor %}, - {{ archive_get_time() }} as dbt_updated_at, - {{ unique_key }} as dbt_pk, - {{ archive_get_time() }} as dbt_valid_from, - {{ timestamp_column.literal('null') }} as tmp_valid_to - from source - ), - - archived_data as ( - - select - {% for col in source_columns %} - {{ col.name }}, - {% endfor %} - dbt_updated_at, - {{ unique_key }} as dbt_pk, - dbt_valid_from, - dbt_valid_to as tmp_valid_to - from {{ target_relation }} - - ), - - insertions as ( - - select - current_data.*, - {{ timestamp_column.literal('null') }} as dbt_valid_to - from current_data - left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where - archived_data.dbt_pk is null - or ( {{ update_candidate }} ) - ), + merged as ( - updates as ( + select *, 'update' as {{ adapter.quote('change_type') }} from updates + union all + select *, 'insert' as {{ adapter.quote('change_type') }} from insertions - select - archived_data.*, - {{ archive_get_time() }} as dbt_valid_to - from current_data - left outer join archived_data - on archived_data.dbt_pk = current_data.dbt_pk - where {{ update_candidate }} - ), - {%- endset %} - - {%- set hash_components = ['dbt_pk'] %} - {%- do hash_components.extend(check_cols) -%} - {%- set scd_hash = archive_hash_arguments(hash_components) -%} - {{ archive_select_generic(source_sql, target_relation, transforms, scd_hash) }} -{%- endmacro %} - -{# this is gross #} -{% macro create_empty_table_as(sql) %} - {% set tmp_relation = api.Relation.create(identifier=model['name']+'_dbt_archival_view_tmp', type='view') %} - {% set limited_sql -%} - with cte as ( - {{ sql }} ) - select * from cte limit 0 - {%- endset %} - {%- set tmp_relation = create_temporary_table(limited_sql, tmp_relation) -%} - {{ return(tmp_relation) }} + select *, + {{ archive_scd_hash() }} as {{ adapter.quote('scd_id') }} + from merged {% endmacro %} - {% materialization archive, default %} {%- set config = model['config'] -%} {%- set target_database = config.get('target_database') -%} {%- set target_schema = config.get('target_schema') -%} - {%- set target_table = model.get('alias', model.get('name')) -%} - {%- set strategy = config.get('strategy') -%} + {%- set target_table = config.get('target_table') -%} + + {%- set source_database = config.get('source_database') -%} + {%- set source_schema = config.get('source_schema') -%} + {%- set source_table = config.get('source_table') -%} - {% set information_schema = api.Relation.create( - database=target_database, - schema=target_schema, - identifier=target_table).information_schema() %} + {{ create_schema(target_database, target_schema) }} - {% if not check_schema_exists(information_schema, target_schema) %} - {{ create_schema(target_database, target_schema) }} - {% endif %} + {%- set source_relation = adapter.get_relation( + database=source_database, + schema=source_schema, + identifier=source_table) -%} {%- set target_relation = adapter.get_relation( database=target_database, schema=target_schema, identifier=target_table) -%} + {%- if source_relation is none -%} + {{ exceptions.missing_relation('.'.join([source_database, source_schema, source_table])) }} + {%- endif -%} + {%- if target_relation is none -%} {%- set target_relation = api.Relation.create( database=target_database, @@ -276,15 +168,13 @@ {{ exceptions.relation_wrong_type(target_relation, 'table') }} {%- endif -%} - {% set source_info_model = create_empty_table_as(model['injected_sql']) %} - - {%- set source_columns = adapter.get_columns_in_relation(source_info_model) -%} - + {%- set source_columns = adapter.get_columns_in_relation(source_relation) -%} {%- set unique_key = config.get('unique_key') -%} + {%- set updated_at = config.get('updated_at') -%} {%- set dest_columns = source_columns + [ - api.Column.create('dbt_valid_from', 'timestamp'), - api.Column.create('dbt_valid_to', 'timestamp'), - api.Column.create('dbt_scd_id', 'string'), + api.Column.create('valid_from', 'timestamp'), + api.Column.create('valid_to', 'timestamp'), + api.Column.create('scd_id', 'string'), api.Column.create('dbt_updated_at', 'timestamp'), ] -%} @@ -292,31 +182,18 @@ {{ create_archive_table(target_relation, dest_columns) }} {% endcall %} - {% set missing_columns = adapter.get_missing_columns(source_info_model, target_relation) %} + {% set missing_columns = adapter.get_missing_columns(source_relation, target_relation) %} {{ create_columns(target_relation, missing_columns) }} - {{ adapter.valid_archive_target(target_relation) }} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_archival_tmp' -%} + {%- set tmp_identifier = identifier + '__dbt_archival_tmp' -%} {% set tmp_table_sql -%} with dbt_archive_sbq as ( - - {% if strategy == 'timestamp' %} - {%- set updated_at = config.get('updated_at') -%} - {{ archive_select_timestamp(model['injected_sql'], target_relation, source_columns, unique_key, updated_at) }} - {% elif strategy == 'check' %} - {%- set check_cols = config.get('check_cols') -%} - {% if check_cols == 'all' %} - {% set check_cols = source_columns | map(attribute='name') | list %} - {% endif %} - {{ archive_select_check_cols(model['injected_sql'], target_relation, source_columns, unique_key, check_cols)}} - {% else %} - {{ exceptions.raise_compiler_error('Got invalid strategy "{}"'.format(strategy)) }} - {% endif %} + {{ archive_select(source_relation, target_relation, source_columns, unique_key, updated_at) }} ) select * from dbt_archive_sbq @@ -338,7 +215,7 @@ {{ column_list(dest_columns) }} ) select {{ column_list(dest_columns) }} from {{ tmp_relation }} - where change_type = 'insert'; + where {{ adapter.quote('change_type') }} = 'insert'; {% endcall %} {{ adapter.commit() }} diff --git a/core/dbt/include/global_project/macros/materializations/helpers.sql b/core/dbt/include/global_project/macros/materializations/helpers.sql index da78eb93506..c2eadfdab39 100644 --- a/core/dbt/include/global_project/macros/materializations/helpers.sql +++ b/core/dbt/include/global_project/macros/materializations/helpers.sql @@ -14,14 +14,14 @@ {% macro column_list(columns) %} {%- for col in columns %} - {{ col.name }} {% if not loop.last %},{% endif %} + {{ adapter.quote(col.name) }} {% if not loop.last %},{% endif %} {% endfor -%} {% endmacro %} {% macro column_list_for_create_table(columns) %} {%- for col in columns %} - {{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %} + {{ adapter.quote(col.name) }} {{ col.data_type }} {%- if not loop.last %},{% endif %} {% endfor -%} {% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql index f53df57cd4e..438b3a066c3 100644 --- a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql +++ b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql @@ -12,10 +12,11 @@ {%- endmacro %} {% materialization incremental, default -%} + {%- set sql_where = config.get('sql_where') -%} {%- set unique_key = config.get('unique_key') -%} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_incremental_tmp' -%} + {%- set tmp_identifier = identifier + '__dbt_incremental_tmp' -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} @@ -56,7 +57,19 @@ {%- else -%} {%- call statement() -%} - {{ dbt.create_table_as(True, tmp_relation, sql) }} + {% set tmp_table_sql -%} + {# We are using a subselect instead of a CTE here to allow PostgreSQL to use indexes. -#} + select * from ( + {{ sql }} + ) as dbt_incr_sbq + + {% if sql_where %} + where ({{ sql_where }}) + or ({{ sql_where }}) is null + {% endif %} + {%- endset %} + + {{ dbt.create_table_as(True, tmp_relation, tmp_table_sql) }} {%- endcall -%} diff --git a/core/dbt/include/global_project/macros/materializations/table/table.sql b/core/dbt/include/global_project/macros/materializations/table/table.sql index d12062c50a9..2e76017fbb3 100644 --- a/core/dbt/include/global_project/macros/materializations/table/table.sql +++ b/core/dbt/include/global_project/macros/materializations/table/table.sql @@ -1,7 +1,7 @@ {% materialization table, default %} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} - {%- set backup_identifier = model['name'] + '__dbt_backup' -%} + {%- set tmp_identifier = identifier + '__dbt_tmp' -%} + {%- set backup_identifier = identifier + '__dbt_backup' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} diff --git a/core/dbt/include/global_project/macros/materializations/view/view.sql b/core/dbt/include/global_project/macros/materializations/view/view.sql index 2fa2a672678..f5c68963444 100644 --- a/core/dbt/include/global_project/macros/materializations/view/view.sql +++ b/core/dbt/include/global_project/macros/materializations/view/view.sql @@ -1,8 +1,8 @@ {%- materialization view, default -%} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} - {%- set backup_identifier = model['name'] + '__dbt_backup' -%} + {%- set tmp_identifier = identifier + '__dbt_tmp' -%} + {%- set backup_identifier = identifier + '__dbt_backup' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} diff --git a/core/dbt/linker.py b/core/dbt/linker.py index 1a17c791af1..ad1a3bc9e6f 100644 --- a/core/dbt/linker.py +++ b/core/dbt/linker.py @@ -1,6 +1,8 @@ import networkx as nx +from collections import defaultdict import threading +import dbt.utils from dbt.compat import PriorityQueue from dbt.node_types import NodeType @@ -172,28 +174,6 @@ def join(self): self.inner.join() -def _subset_graph(graph, include_nodes): - """Create and return a new graph that is a shallow copy of graph but with - only the nodes in include_nodes. Transitive edges across removed nodes are - preserved as explicit new edges. - """ - new_graph = nx.algorithms.transitive_closure(graph) - - include_nodes = set(include_nodes) - - for node in graph.nodes(): - if node not in include_nodes: - new_graph.remove_node(node) - - for node in include_nodes: - if node not in new_graph: - raise RuntimeError( - "Couldn't find model '{}' -- does it exist or is " - "it disabled?".format(node) - ) - return new_graph - - class Linker(object): def __init__(self, data=None): if data is None: @@ -229,7 +209,23 @@ def as_graph_queue(self, manifest, limit_to=None): else: graph_nodes = limit_to - new_graph = _subset_graph(self.graph, graph_nodes) + new_graph = nx.DiGraph(self.graph) + + to_remove = [] + graph_nodes_lookup = set(graph_nodes) + for node in new_graph.nodes(): + if node not in graph_nodes_lookup: + to_remove.append(node) + + for node in to_remove: + new_graph.remove_node(node) + + for node in graph_nodes: + if node not in new_graph: + raise RuntimeError( + "Couldn't find model '{}' -- does it exist or is " + "it disabled?".format(node) + ) return GraphQueue(new_graph, manifest) def get_dependent_nodes(self, node): diff --git a/core/dbt/loader.py b/core/dbt/loader.py index 7e98d0b29ac..daa74d510c6 100644 --- a/core/dbt/loader.py +++ b/core/dbt/loader.py @@ -1,6 +1,7 @@ import os import itertools +from dbt import deprecations from dbt.include.global_project import PACKAGES import dbt.exceptions import dbt.flags @@ -11,7 +12,7 @@ from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \ DocumentationParser, DataTestParser, HookParser, ArchiveParser, \ - SchemaParser, ParserUtils, ArchiveBlockParser + SchemaParser, ParserUtils from dbt.contracts.project import ProjectList @@ -34,15 +35,15 @@ def _load_sql_nodes(self, parser_type, resource_type, relative_dirs_attr, self.macro_manifest) for project_name, project in self.all_projects.items(): - parse_results = parser.load_and_parse( + nodes, disabled = parser.load_and_parse( package_name=project_name, root_dir=project.project_root, relative_dirs=getattr(project, relative_dirs_attr), resource_type=resource_type, **kwargs ) - self.nodes.update(parse_results.parsed) - self.disabled.extend(parse_results.disabled) + self.nodes.update(nodes) + self.disabled.extend(disabled) def _load_macros(self, internal_manifest=None): # skip any projects in the internal manifest @@ -75,8 +76,6 @@ def _load_seeds(self): def _load_nodes(self): self._load_sql_nodes(ModelParser, NodeType.Model, 'source_paths') - self._load_sql_nodes(ArchiveBlockParser, NodeType.Archive, - 'archive_paths') self._load_sql_nodes(AnalysisParser, NodeType.Analysis, 'analysis_paths') self._load_sql_nodes(DataTestParser, NodeType.Test, 'test_paths', @@ -195,14 +194,12 @@ def _check_resource_uniqueness(manifest): existing_node = names_resources.get(name) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name( - existing_node, node - ) + existing_node, node) existing_alias = alias_resources.get(alias) if existing_alias is not None: dbt.exceptions.raise_ambiguous_alias( - existing_alias, node - ) + existing_alias, node) names_resources[name] = node alias_resources[alias] = node @@ -214,9 +211,17 @@ def _warn_for_unused_resource_config_paths(manifest, config): config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns) +def _warn_for_deprecated_configs(manifest): + for unique_id, node in manifest.nodes.items(): + is_model = node.resource_type == NodeType.Model + if is_model and 'sql_where' in node.config: + deprecations.warn('sql_where') + + def _check_manifest(manifest, config): _check_resource_uniqueness(manifest) _warn_for_unused_resource_config_paths(manifest, config) + _warn_for_deprecated_configs(manifest) def internal_project_names(): diff --git a/core/dbt/logger.py b/core/dbt/logger.py index f658769172f..6b2ab24b391 100644 --- a/core/dbt/logger.py +++ b/core/dbt/logger.py @@ -4,9 +4,11 @@ import logging.handlers import os import sys +import warnings import colorama + # Colorama needs some help on windows because we're using logger.info # intead of print(). If the Windows env doesn't have a TERM var set, # then we should override the logging stream to use the colorama @@ -15,27 +17,6 @@ colorama_stdout = sys.stdout colorama_wrap = True -colorama.init(wrap=colorama_wrap) - -DEBUG = logging.DEBUG -NOTICE = 15 -INFO = logging.INFO -WARNING = logging.WARNING -ERROR = logging.ERROR -CRITICAL = logging.CRITICAL - -logging.addLevelName(NOTICE, 'NOTICE') - - -class Logger(logging.Logger): - def notice(self, msg, *args, **kwargs): - if self.isEnabledFor(NOTICE): - self._log(NOTICE, msg, args, **kwargs) - - -logging.setLoggerClass(Logger) - - if sys.platform == 'win32' and not os.environ.get('TERM'): colorama_wrap = False colorama_stdout = colorama.AnsiToWin32(sys.stdout).stream @@ -48,28 +29,23 @@ def notice(self, msg, *args, **kwargs): # create a global console logger for dbt stdout_handler = logging.StreamHandler(colorama_stdout) stdout_handler.setFormatter(logging.Formatter('%(message)s')) -stdout_handler.setLevel(NOTICE) +stdout_handler.setLevel(logging.INFO) logger = logging.getLogger('dbt') logger.addHandler(stdout_handler) -logger.setLevel(DEBUG) -logging.getLogger().setLevel(CRITICAL) +logger.setLevel(logging.DEBUG) +logging.getLogger().setLevel(logging.CRITICAL) # Quiet these down in the logs -logging.getLogger('botocore').setLevel(INFO) -logging.getLogger('requests').setLevel(INFO) -logging.getLogger('urllib3').setLevel(INFO) -logging.getLogger('google').setLevel(INFO) -logging.getLogger('snowflake.connector').setLevel(INFO) -logging.getLogger('parsedatetime').setLevel(INFO) -# we never want to seek werkzeug logs -logging.getLogger('werkzeug').setLevel(CRITICAL) +logging.getLogger('botocore').setLevel(logging.INFO) +logging.getLogger('requests').setLevel(logging.INFO) +logging.getLogger('urllib3').setLevel(logging.INFO) +logging.getLogger('google').setLevel(logging.INFO) +logging.getLogger('snowflake.connector').setLevel(logging.INFO) +logging.getLogger('parsedatetime').setLevel(logging.INFO) # provide this for the cache. CACHE_LOGGER = logging.getLogger('dbt.cache') -# provide this for RPC connection logging -RPC_LOGGER = logging.getLogger('dbt.rpc') - # Redirect warnings through our logging setup # They will be logged to a file below @@ -94,10 +70,6 @@ def filter(self, record): return True -def default_formatter(): - return logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s') - - def initialize_logger(debug_mode=False, path=None): global initialized, logger, stdout_handler @@ -105,8 +77,9 @@ def initialize_logger(debug_mode=False, path=None): return if debug_mode: - stdout_handler.setFormatter(default_formatter()) - stdout_handler.setLevel(DEBUG) + stdout_handler.setFormatter( + logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s')) + stdout_handler.setLevel(logging.DEBUG) if path is not None: make_log_dir_if_missing(path) @@ -123,15 +96,16 @@ def initialize_logger(debug_mode=False, path=None): color_filter = ColorFilter() logdir_handler.addFilter(color_filter) - logdir_handler.setFormatter(default_formatter()) - logdir_handler.setLevel(DEBUG) + logdir_handler.setFormatter( + logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s')) + logdir_handler.setLevel(logging.DEBUG) logger.addHandler(logdir_handler) # Log Python warnings to file warning_logger = logging.getLogger('py.warnings') warning_logger.addHandler(logdir_handler) - warning_logger.setLevel(DEBUG) + warning_logger.setLevel(logging.DEBUG) initialized = True @@ -147,56 +121,3 @@ def log_cache_events(flag): GLOBAL_LOGGER = logger - - -class QueueFormatter(logging.Formatter): - def formatMessage(self, record): - superself = super(QueueFormatter, self) - if hasattr(superself, 'formatMessage'): - # python 3.x - return superself.formatMessage(record) - - # python 2.x, handling weird unicode things - try: - return self._fmt % record.__dict__ - except UnicodeDecodeError: - try: - record.name = record.name.decode('utf-8') - return self._fmt % record.__dict__ - except UnicodeDecodeError as e: - raise e - - def format(self, record): - record.message = record.getMessage() - record.asctime = self.formatTime(record, self.datefmt) - formatted = self.formatMessage(record) - - output = { - 'message': formatted, - 'timestamp': record.asctime, - 'levelname': record.levelname, - 'level': record.levelno, - } - if record.exc_info: - if not record.exc_text: - record.exc_text = self.formatException(record.exc_info) - output['exc_info'] = record.exc_text - return output - - -class QueueLogHandler(logging.Handler): - def __init__(self, queue): - super(QueueLogHandler, self).__init__() - self.queue = queue - - def emit(self, record): - msg = self.format(record) - self.queue.put_nowait(['log', msg]) - - -def add_queue_handler(queue): - """Add a queue log handler to the global logger.""" - handler = QueueLogHandler(queue) - handler.setFormatter(QueueFormatter()) - handler.setLevel(DEBUG) - GLOBAL_LOGGER.addHandler(handler) diff --git a/core/dbt/main.py b/core/dbt/main.py index 9d96d9c91d9..8a8a001c1a5 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -5,7 +5,6 @@ import os.path import sys import traceback -from contextlib import contextmanager import dbt.version import dbt.flags as flags @@ -21,8 +20,6 @@ import dbt.task.generate as generate_task import dbt.task.serve as serve_task import dbt.task.freshness as freshness_task -import dbt.task.run_operation as run_operation_task -from dbt.task.rpc_server import RPCServerTask from dbt.adapters.factory import reset_adapters import dbt.tracking @@ -32,8 +29,9 @@ import dbt.profiler from dbt.utils import ExitCodes -from dbt.config import UserConfig, PROFILES_DIR -from dbt.exceptions import RuntimeException +from dbt.config import Project, UserConfig, RuntimeConfig, PROFILES_DIR, \ + read_profiles +from dbt.exceptions import DbtProjectError, DbtProfileError, RuntimeException PROFILES_HELP_MESSAGE = """ @@ -83,7 +81,7 @@ def main(args=None): else: exit_code = ExitCodes.ModelError - except KeyboardInterrupt: + except KeyboardInterrupt as e: logger.info("ctrl-c") exit_code = ExitCodes.UnhandledError @@ -150,60 +148,138 @@ def handle_and_check(args): reset_adapters() - task, res = run_from_args(parsed) + try: + task, res = run_from_args(parsed) + finally: + dbt.tracking.flush() + success = task.interpret_results(res) return res, success -@contextmanager -def track_run(task): - dbt.tracking.track_invocation_start(config=task.config, args=task.args) +def get_nearest_project_dir(): + root_path = os.path.abspath(os.sep) + cwd = os.getcwd() + + while cwd != root_path: + project_file = os.path.join(cwd, "dbt_project.yml") + if os.path.exists(project_file): + return cwd + cwd = os.path.dirname(cwd) + + return None + + +def run_from_args(parsed): + task = None + cfg = None + + if parsed.which in ('init', 'debug'): + # bypass looking for a project file if we're running `dbt init` or + # `dbt debug` + task = parsed.cls(args=parsed) + else: + nearest_project_dir = get_nearest_project_dir() + if nearest_project_dir is None: + raise RuntimeException( + "fatal: Not a dbt project (or any of the parent directories). " + "Missing dbt_project.yml file" + ) + + os.chdir(nearest_project_dir) + + res = invoke_dbt(parsed) + if res is None: + raise RuntimeException("Could not run dbt") + else: + task, cfg = res + + log_path = None + + if cfg is not None: + log_path = cfg.log_path + + initialize_logger(parsed.debug, log_path) + logger.debug("Tracking: {}".format(dbt.tracking.active_user.state())) + + dbt.tracking.track_invocation_start(config=cfg, args=parsed) + + results = run_from_task(task, cfg, parsed) + + return task, results + + +def run_from_task(task, cfg, parsed_args): + result = None try: - yield + result = task.run() dbt.tracking.track_invocation_end( - config=task.config, args=task.args, result_type="ok" + config=cfg, args=parsed_args, result_type="ok" ) except (dbt.exceptions.NotImplementedException, dbt.exceptions.FailedToConnectException) as e: - logger.error('ERROR: {}'.format(e)) + logger.info('ERROR: {}'.format(e)) dbt.tracking.track_invocation_end( - config=task.config, args=task.args, result_type="error" + config=cfg, args=parsed_args, result_type="error" ) - except Exception: + except Exception as e: dbt.tracking.track_invocation_end( - config=task.config, args=task.args, result_type="error" + config=cfg, args=parsed_args, result_type="error" ) raise - finally: - dbt.tracking.flush() + return result -def run_from_args(parsed): - log_cache_events(getattr(parsed, 'log_cache_events', False)) - update_flags(parsed) - logger.info("Running with dbt{}".format(dbt.version.installed)) +def invoke_dbt(parsed): + task = None + cfg = None - # this will convert DbtConfigErrors into RuntimeExceptions - task = parsed.cls.from_args(args=parsed) - logger.debug("running dbt with arguments %s", parsed) - - log_path = None - if task.config is not None: - log_path = getattr(task.config, 'log_path', None) - initialize_logger(parsed.debug, log_path) - logger.debug("Tracking: {}".format(dbt.tracking.active_user.state())) + log_cache_events(getattr(parsed, 'log_cache_events', False)) + logger.info("Running with dbt{}".format(dbt.version.installed)) - results = None + try: + if parsed.which in {'deps', 'clean'}: + # deps doesn't need a profile, so don't require one. + cfg = Project.from_current_directory(getattr(parsed, 'vars', '{}')) + elif parsed.which != 'debug': + # for debug, we will attempt to load the various configurations as + # part of the task, so just leave cfg=None. + cfg = RuntimeConfig.from_args(parsed) + except DbtProjectError as e: + logger.info("Encountered an error while reading the project:") + logger.info(dbt.compat.to_string(e)) + + dbt.tracking.track_invalid_invocation( + config=cfg, + args=parsed, + result_type=e.result_type) + + return None + except DbtProfileError as e: + logger.info("Encountered an error while reading profiles:") + logger.info(" ERROR {}".format(str(e))) + + all_profiles = read_profiles(parsed.profiles_dir).keys() + + if len(all_profiles) > 0: + logger.info("Defined profiles:") + for profile in all_profiles: + logger.info(" - {}".format(profile)) + else: + logger.info("There are no profiles defined in your " + "profiles.yml file") - with track_run(task): - results = task.run() + logger.info(PROFILES_HELP_MESSAGE) - return task, results + dbt.tracking.track_invalid_invocation( + config=cfg, + args=parsed, + result_type=e.result_type) + return None -def update_flags(parsed): flags.NON_DESTRUCTIVE = getattr(parsed, 'non_destructive', False) flags.USE_CACHE = getattr(parsed, 'use_cache', True) @@ -221,7 +297,11 @@ def update_flags(parsed): elif arg_full_refresh: flags.FULL_REFRESH = True - flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False) + logger.debug("running dbt with arguments %s", parsed) + + task = parsed.cls(args=parsed, config=cfg) + + return task, cfg def _build_base_subparser(): @@ -297,9 +377,9 @@ def _build_source_subparser(subparsers, base_subparser): def _build_init_subparser(subparsers, base_subparser): sub = subparsers.add_parser( - 'init', - parents=[base_subparser], - help="Initialize a new DBT project.") + 'init', + parents=[base_subparser], + help="Initialize a new DBT project.") sub.add_argument('project_name', type=str, help='Name of the new project') sub.set_defaults(cls=init_task.InitTask, which='init') return sub @@ -398,7 +478,7 @@ def _build_docs_generate_subparser(subparsers, base_subparser): return generate_sub -def _add_selection_arguments(*subparsers): +def _add_common_arguments(*subparsers): for sub in subparsers: sub.add_argument( '-m', @@ -417,10 +497,15 @@ def _add_selection_arguments(*subparsers): Specify the models to exclude. """ ) - - -def _add_table_mutability_arguments(*subparsers): - for sub in subparsers: + sub.add_argument( + '--threads', + type=int, + required=False, + help=""" + Specify number of threads to use while executing models. Overrides + settings in profiles.yml. + """ + ) sub.add_argument( '--non-destructive', action='store_true', @@ -436,19 +521,6 @@ def _add_table_mutability_arguments(*subparsers): If specified, DBT will drop incremental models and fully-recalculate the incremental table from the model definition. """) - - -def _add_common_arguments(*subparsers): - for sub in subparsers: - sub.add_argument( - '--threads', - type=int, - required=False, - help=""" - Specify number of threads to use while executing models. Overrides - settings in profiles.yml. - """ - ) sub.add_argument( '--no-version-check', dest='version_check', @@ -511,6 +583,32 @@ def _build_test_subparser(subparsers, base_subparser): action='store_true', help='Run constraint validations from schema.yml files' ) + sub.add_argument( + '--threads', + type=int, + required=False, + help=""" + Specify number of threads to use while executing tests. Overrides + settings in profiles.yml + """ + ) + sub.add_argument( + '-m', + '--models', + required=False, + nargs='+', + help=""" + Specify the models to test. + """ + ) + sub.add_argument( + '--exclude', + required=False, + nargs='+', + help=""" + Specify the models to exclude from testing. + """ + ) sub.set_defaults(cls=test_task.TestTask, which='test') return sub @@ -541,43 +639,11 @@ def _build_source_snapshot_freshness_subparser(subparsers, base_subparser): target/sources.json """ ) - sub.add_argument( - '--threads', - type=int, - required=False, - help=""" - Specify number of threads to use. Overrides settings in profiles.yml - """ - ) sub.set_defaults(cls=freshness_task.FreshnessTask, which='snapshot-freshness') return sub -def _build_rpc_subparser(subparsers, base_subparser): - sub = subparsers.add_parser( - 'rpc', - parents=[base_subparser], - help='Start a json-rpc server', - ) - sub.add_argument( - '--host', - default='0.0.0.0', - help='Specify the host to listen on for the rpc server.' - ) - sub.add_argument( - '--port', - default=8580, - type=int, - help='Specify the port number for the rpc server.' - ) - sub.set_defaults(cls=RPCServerTask, which='rpc') - # the rpc task does a 'compile', so we need these attributes to exist, but - # we don't want users to be allowed to set them. - sub.set_defaults(models=None, exclude=None) - return sub - - def parse_args(args): p = DBTArgumentParser( prog='dbt: data build tool', @@ -620,14 +686,6 @@ def parse_args(args): help='''Run schema validations at runtime. This will surface bugs in dbt, but may incur a performance penalty.''') - p.add_argument( - '--warn-error', - action='store_true', - help='''If dbt would normally warn, instead raise an exception. - Examples include --models that selects nothing, deprecations, - configurations with no associated models, invalid test configurations, - and missing sources/refs in tests''') - # if set, run dbt in single-threaded mode: thread count is ignored, and # calls go through `map` instead of the thread pool. This is useful for # getting performance information about aspects of dbt that normally run in @@ -639,15 +697,6 @@ def parse_args(args): help=argparse.SUPPRESS, ) - # if set, extract all models and blocks with the jinja block extractor, and - # verify that we don't fail anywhere the actual jinja parser passes. The - # reverse (passing files that ends up failing jinja) is fine. - p.add_argument( - '--test-new-parser', - action='store_true', - help=argparse.SUPPRESS - ) - subs = p.add_subparsers(title="Available sub-commands") base_subparser = _build_base_subparser() @@ -662,53 +711,18 @@ def parse_args(args): _build_clean_subparser(subs, base_subparser) _build_debug_subparser(subs, base_subparser) _build_deps_subparser(subs, base_subparser) + _build_archive_subparser(subs, base_subparser) - archive_sub = _build_archive_subparser(subs, base_subparser) - rpc_sub = _build_rpc_subparser(subs, base_subparser) run_sub = _build_run_subparser(subs, base_subparser) compile_sub = _build_compile_subparser(subs, base_subparser) generate_sub = _build_docs_generate_subparser(docs_subs, base_subparser) - test_sub = _build_test_subparser(subs, base_subparser) - # --threads, --no-version-check - _add_common_arguments(run_sub, compile_sub, generate_sub, test_sub, - rpc_sub) - # --models, --exclude - _add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub, - archive_sub) - # --full-refresh, --non-destructive - _add_table_mutability_arguments(run_sub, compile_sub) + _add_common_arguments(run_sub, compile_sub, generate_sub) _build_seed_subparser(subs, base_subparser) _build_docs_serve_subparser(docs_subs, base_subparser) + _build_test_subparser(subs, base_subparser) _build_source_snapshot_freshness_subparser(source_subs, base_subparser) - sub = subs.add_parser( - 'run-operation', - parents=[base_subparser], - help=""" - (beta) Run the named macro with any supplied arguments. This - subcommand is unstable and subject to change in a future release - of dbt. Please use it with caution""" - ) - sub.add_argument( - '--macro', - required=True, - help=""" - Specify the macro to invoke. dbt will call this macro with the - supplied arguments and then exit""" - ) - sub.add_argument( - '--args', - type=str, - default='{}', - help=""" - Supply arguments to the macro. This dictionary will be mapped - to the keyword arguments defined in the selected macro. This - argument should be a YAML string, eg. '{my_variable: my_value}'""" - ) - sub.set_defaults(cls=run_operation_task.RunOperationTask, - which='run-operation') - if len(args) == 0: p.print_help() sys.exit(1) diff --git a/core/dbt/node_runners.py b/core/dbt/node_runners.py index bba860c9356..669b93bae22 100644 --- a/core/dbt/node_runners.py +++ b/core/dbt/node_runners.py @@ -1,18 +1,25 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.exceptions import NotImplementedException, CompilationException, \ RuntimeException, InternalException, missing_materialization -from dbt.node_types import NodeType +from dbt.utils import get_nodes_by_tags +from dbt.node_types import NodeType, RunHookType +from dbt.adapters.factory import get_adapter from dbt.contracts.results import RunModelResult, collect_timing_info, \ - SourceFreshnessResult, PartialResult, RemoteCompileResult, RemoteRunResult + SourceFreshnessResult, PartialResult from dbt.compilation import compile_node +import dbt.clients.jinja import dbt.context.runtime import dbt.exceptions import dbt.utils import dbt.tracking import dbt.ui.printer -from dbt import rpc +import dbt.flags +import dbt.schema +import dbt.writer +import six +import sys import threading import time import traceback @@ -41,15 +48,6 @@ def track_model_run(index, num_nodes, run_model_result): }) -class ExecutionContext(object): - """During execution and error handling, dbt makes use of mutable state: - timing information and the newest (compiled vs executed) form of the node. - """ - def __init__(self, node): - self.timing = [] - self.node = node - - class BaseRunner(object): def __init__(self, config, adapter, node, node_index, num_nodes): self.config = config @@ -121,78 +119,67 @@ def from_run_result(self, result, start_time, timing_info): timing_info=timing_info ) - def compile_and_execute(self, manifest, ctx): + def safe_run(self, manifest): + catchable_errors = (CompilationException, RuntimeException) + + # result = self.DefaultResult(self.node) + started = time.time() + timing = [] + error = None + node = self.node result = None - self.adapter.acquire_connection(self.node.get('name')) - with collect_timing_info('compile') as timing_info: - # if we fail here, we still have a compiled node to return - # this has the benefit of showing a build path for the errant - # model - ctx.node = self.compile(manifest) - ctx.timing.append(timing_info) - - # for ephemeral nodes, we only want to compile, not run - if not ctx.node.is_ephemeral_model: - with collect_timing_info('execute') as timing_info: - result = self.run(ctx.node, manifest) - ctx.node = result.node - - ctx.timing.append(timing_info) - return result + try: + with collect_timing_info('compile') as timing_info: + # if we fail here, we still have a compiled node to return + # this has the benefit of showing a build path for the errant + # model + node = self.compile(manifest) - def _handle_catchable_exception(self, e, ctx): - if e.node is None: - e.node = ctx.node + timing.append(timing_info) - return dbt.compat.to_string(e) + # for ephemeral nodes, we only want to compile, not run + if not node.is_ephemeral_model: + with collect_timing_info('execute') as timing_info: + result = self.run(node, manifest) + node = result.node - def _handle_internal_exception(self, e, ctx): - build_path = self.node.build_path - prefix = 'Internal error executing {}'.format(build_path) + timing.append(timing_info) - error = "{prefix}\n{error}\n\n{note}".format( - prefix=dbt.ui.printer.red(prefix), - error=str(e).strip(), - note=INTERNAL_ERROR_STRING - ) - logger.debug(error) - return dbt.compat.to_string(e) - - def _handle_generic_exception(self, e, ctx): - node_description = self.node.get('build_path') - if node_description is None: - node_description = self.node.unique_id - prefix = "Unhandled error while executing {}".format(node_description) - error = "{prefix}\n{error}".format( - prefix=dbt.ui.printer.red(prefix), - error=str(e).strip() - ) + # result.extend(item.serialize() for item in timing) - logger.error(error) - logger.debug('', exc_info=True) - return dbt.compat.to_string(e) + except catchable_errors as e: + if e.node is None: + e.node = node - def handle_exception(self, e, ctx): - catchable_errors = (CompilationException, RuntimeException) - if isinstance(e, catchable_errors): - error = self._handle_catchable_exception(e, ctx) - elif isinstance(e, InternalException): - error = self._handle_internal_exception(e, ctx) - else: - error = self._handle_generic_exception(e, ctx) - return error + error = dbt.compat.to_string(e) - def safe_run(self, manifest): - started = time.time() - ctx = ExecutionContext(self.node) - error = None - result = None + except InternalException as e: + build_path = self.node.build_path + prefix = 'Internal error executing {}'.format(build_path) + + error = "{prefix}\n{error}\n\n{note}".format( + prefix=dbt.ui.printer.red(prefix), + error=str(e).strip(), + note=INTERNAL_ERROR_STRING) + logger.debug(error) + error = dbt.compat.to_string(e) - try: - result = self.compile_and_execute(manifest, ctx) except Exception as e: - error = self.handle_exception(e, ctx) + node_description = self.node.get('build_path') + if node_description is None: + node_description = self.node.unique_id + prefix = "Unhandled error while executing {description}".format( + description=node_description) + + error = "{prefix}\n{error}".format( + prefix=dbt.ui.printer.red(prefix), + error=str(e).strip()) + + logger.error(error) + logger.debug('', exc_info=True) + error = dbt.compat.to_string(e) + finally: exc_str = self._safe_release_connection() @@ -203,23 +190,24 @@ def safe_run(self, manifest): if error is not None: # we could include compile time for runtime errors here - result = self.error_result(ctx.node, error, started, []) + result = self.error_result(node, error, started, []) elif result is not None: - result = self.from_run_result(result, started, ctx.timing) + result = self.from_run_result(result, started, timing) else: - result = self.ephemeral_result(ctx.node, started, ctx.timing) + result = self.ephemeral_result(node, started, timing) return result def _safe_release_connection(self): """Try to release a connection. If an exception is hit, log and return the error string. """ + node_name = self.node.name try: - self.adapter.release_connection() + self.adapter.release_connection(node_name) except Exception as exc: logger.debug( 'Error releasing connection for node {}: {!s}\n{}' - .format(self.node.name, exc, traceback.format_exc()) + .format(node_name, exc, traceback.format_exc()) ) return dbt.compat.to_string(exc) @@ -298,17 +286,11 @@ def compile(self, manifest): class ModelRunner(CompileRunner): - def get_node_representation(self): - if self.config.credentials.database == self.node.database: - template = "{0.schema}.{0.alias}" - else: - template = "{0.database}.{0.schema}.{0.alias}" - - return template.format(self.node) - def describe_node(self): - return "{} model {}".format(self.node.get_materialization(), - self.get_node_representation()) + materialization = dbt.utils.get_materialization(self.node) + return "{0} model {1.database}.{1.schema}.{1.alias}".format( + materialization, self.node + ) def print_start_line(self): description = self.describe_node() @@ -316,9 +298,9 @@ def print_start_line(self): self.num_nodes) def print_result_line(self, result): - description = self.describe_node() + schema_name = self.node.schema dbt.ui.printer.print_model_result_line(result, - description, + schema_name, self.node_index, self.num_nodes) @@ -383,8 +365,7 @@ def _calculate_status(self, target_freshness, freshness): continue target = target_freshness[fullkey] - kwname = target['period'] + 's' - kwargs = {kwname: target['count']} + kwargs = {target['period']+'s': target['count']} if freshness > timedelta(**kwargs).total_seconds(): return key return 'pass' @@ -413,14 +394,11 @@ def from_run_result(self, result, start_time, timing_info): def execute(self, compiled_node, manifest): relation = self.adapter.Relation.create_from_source(compiled_node) # given a Source, calculate its fresnhess. - with self.adapter.connection_named(compiled_node.unique_id): - self.adapter.clear_transaction() - freshness = self.adapter.calculate_freshness( - relation, - compiled_node.loaded_at_field, - manifest=manifest - ) - + freshness = self.adapter.calculate_freshness( + relation, + compiled_node.loaded_at_field, + manifest=manifest + ) status = self._calculate_status( compiled_node.freshness, freshness['age'] @@ -461,6 +439,7 @@ def print_start_line(self): def execute_test(self, test): res, table = self.adapter.execute( test.wrapped_sql, + model_name=test.name, auto_begin=True, fetch=True) @@ -487,10 +466,8 @@ def after_execute(self, result): class ArchiveRunner(ModelRunner): def describe_node(self): cfg = self.node.get('config', {}) - return ( - "archive {name} --> {target_database}.{target_schema}.{name}" - .format(name=self.node.name, **cfg) - ) + return "archive {source_database}.{source_schema}.{source_table} --> "\ + "{target_database}.{target_schema}.{target_table}".format(**cfg) def print_result_line(self, result): dbt.ui.printer.print_archive_result_line(result, self.node_index, @@ -499,7 +476,7 @@ def print_result_line(self, result): class SeedRunner(ModelRunner): def describe_node(self): - return "seed file {}".format(self.get_node_representation()) + return "seed file {0.database}.{0.schema}.{0.alias}".format(self.node) def before_execute(self): description = self.describe_node() @@ -515,80 +492,3 @@ def print_result_line(self, result): schema_name, self.node_index, self.num_nodes) - - -class RPCCompileRunner(CompileRunner): - def __init__(self, config, adapter, node, node_index, num_nodes): - super(RPCCompileRunner, self).__init__(config, adapter, node, - node_index, num_nodes) - - def handle_exception(self, e, ctx): - if isinstance(e, dbt.exceptions.Exception): - if isinstance(e, dbt.exceptions.RuntimeException): - e.node = ctx.node - return rpc.dbt_error(e) - elif isinstance(e, rpc.RPCException): - return e - else: - return rpc.server_error(e) - - def before_execute(self): - pass - - def after_execute(self, result): - pass - - def compile(self, manifest): - return compile_node(self.adapter, self.config, self.node, manifest, {}, - write=False) - - def execute(self, compiled_node, manifest): - return RemoteCompileResult( - raw_sql=compiled_node.raw_sql, - compiled_sql=compiled_node.injected_sql, - node=compiled_node - ) - - def error_result(self, node, error, start_time, timing_info): - raise error - - def ephemeral_result(self, node, start_time, timing_info): - raise NotImplementedException( - 'cannot execute ephemeral nodes remotely!' - ) - - def from_run_result(self, result, start_time, timing_info): - timing = [t.serialize() for t in timing_info] - return RemoteCompileResult( - raw_sql=result.raw_sql, - compiled_sql=result.compiled_sql, - node=result.node, - timing=timing - ) - - -class RPCExecuteRunner(RPCCompileRunner): - def from_run_result(self, result, start_time, timing_info): - timing = [t.serialize() for t in timing_info] - return RemoteRunResult( - raw_sql=result.raw_sql, - compiled_sql=result.compiled_sql, - node=result.node, - table=result.table, - timing=timing - ) - - def execute(self, compiled_node, manifest): - status, table = self.adapter.execute(compiled_node.injected_sql, - fetch=True) - table = { - 'column_names': list(table.column_names), - 'rows': [list(row) for row in table] - } - - return RemoteRunResult( - raw_sql=compiled_node.raw_sql, - compiled_sql=compiled_node.injected_sql, - node=compiled_node, - table=table - ) diff --git a/core/dbt/node_types.py b/core/dbt/node_types.py index d0a94404ae0..4f097ab1070 100644 --- a/core/dbt/node_types.py +++ b/core/dbt/node_types.py @@ -10,7 +10,6 @@ class NodeType(object): Seed = 'seed' Documentation = 'documentation' Source = 'source' - RPCCall = 'rpc' @classmethod def executable(cls): @@ -22,7 +21,6 @@ def executable(cls): cls.Operation, cls.Seed, cls.Documentation, - cls.RPCCall, ] @classmethod @@ -30,7 +28,6 @@ def refable(cls): return [ cls.Model, cls.Seed, - cls.Archive, ] diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py index 5363aa29400..78ca99c3eda 100644 --- a/core/dbt/parser/__init__.py +++ b/core/dbt/parser/__init__.py @@ -1,7 +1,6 @@ from .analysis import AnalysisParser from .archives import ArchiveParser -from .archives import ArchiveBlockParser from .data_test import DataTestParser from .docs import DocumentationParser from .hooks import HookParser @@ -15,7 +14,6 @@ __all__ = [ 'AnalysisParser', 'ArchiveParser', - 'ArchiveBlockParser', 'DataTestParser', 'DocumentationParser', 'HookParser', diff --git a/core/dbt/parser/analysis.py b/core/dbt/parser/analysis.py index c466ead1cfe..5d218544983 100644 --- a/core/dbt/parser/analysis.py +++ b/core/dbt/parser/analysis.py @@ -7,8 +7,3 @@ class AnalysisParser(BaseSqlParser): @classmethod def get_compiled_path(cls, name, relative_path): return os.path.join('analysis', relative_path) - - -class RPCCallParser(AnalysisParser): - def get_compiled_path(cls, name, relative_path): - return os.path.join('rpc', relative_path) diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/archives.py index 981570a48da..ad4de342ebd 100644 --- a/core/dbt/parser/archives.py +++ b/core/dbt/parser/archives.py @@ -1,12 +1,7 @@ + from dbt.contracts.graph.unparsed import UnparsedNode -from dbt.contracts.graph.parsed import ParsedArchiveNode from dbt.node_types import NodeType from dbt.parser.base import MacrosKnownParser -from dbt.parser.base_sql import BaseSqlParser, SQLParseResult -from dbt.adapters.factory import get_adapter -import dbt.clients.jinja -import dbt.exceptions -import dbt.utils import os @@ -25,7 +20,7 @@ def parse_archives_from_project(cls, config): for table in tables: cfg = table.copy() - source_database = archive_config.get( + cfg['source_database'] = archive_config.get( 'source_database', config.credentials.database ) @@ -34,24 +29,11 @@ def parse_archives_from_project(cls, config): config.credentials.database ) - source_schema = archive_config['source_schema'] + cfg['source_schema'] = archive_config.get('source_schema') cfg['target_schema'] = archive_config.get('target_schema') - # project-defined archives always use the 'timestamp' strategy. - cfg['strategy'] = 'timestamp' fake_path = [cfg['target_database'], cfg['target_schema'], cfg['target_table']] - - relation = get_adapter(config).Relation.create( - database=source_database, - schema=source_schema, - identifier=table['source_table'], - type='table' - ) - - raw_sql = '{{ config(materialized="archive") }}' + \ - 'select * from {!s}'.format(relation) - archives.append({ 'name': table.get('target_table'), 'root_path': config.project_root, @@ -60,7 +42,7 @@ def parse_archives_from_project(cls, config): 'original_file_path': 'dbt_project.yml', 'package_name': config.project_name, 'config': cfg, - 'raw_sql': raw_sql + 'raw_sql': '{{config(materialized="archive")}} -- noop' }) return archives @@ -94,74 +76,3 @@ def load_and_parse(self): archive_config=archive_config) return to_return - - -class ArchiveBlockParser(BaseSqlParser): - def parse_archives_from_file(self, file_node, tags=None): - # the file node has a 'raw_sql' field that contains the jinja data with - # (we hope!) `archive` blocks - try: - blocks = dbt.clients.jinja.extract_toplevel_blocks( - file_node['raw_sql'] - ) - except dbt.exceptions.CompilationException as exc: - if exc.node is None: - exc.node = file_node - raise - for block in blocks: - if block.block_type_name != NodeType.Archive: - # non-archive blocks are just ignored - continue - name = block.block_name - raw_sql = block.contents - updates = { - 'raw_sql': raw_sql, - 'name': name, - } - yield dbt.utils.deep_merge(file_node, updates) - - @classmethod - def get_compiled_path(cls, name, relative_path): - return relative_path - - @classmethod - def get_fqn(cls, node, package_project_config, extra=[]): - parts = dbt.utils.split_path(node.path) - fqn = [package_project_config.project_name] - fqn.extend(parts[:-1]) - fqn.extend(extra) - fqn.append(node.name) - - return fqn - - @staticmethod - def validate_archives(node): - if node.resource_type == NodeType.Archive: - try: - return ParsedArchiveNode(**node.to_shallow_dict()) - except dbt.exceptions.JSONValidationException as exc: - raise dbt.exceptions.CompilationException(str(exc), node) - else: - return node - - def parse_sql_nodes(self, nodes, tags=None): - if tags is None: - tags = [] - - results = SQLParseResult() - - # in archives, we have stuff in blocks. - for file_node in nodes: - archive_nodes = list( - self.parse_archives_from_file(file_node, tags=tags) - ) - found = super(ArchiveBlockParser, self).parse_sql_nodes( - nodes=archive_nodes, tags=tags - ) - # make sure our blocks are going to work when we try to archive - # them! - found.parsed = {k: self.validate_archives(v) for - k, v in found.parsed.items()} - - results.update(found) - return results diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 3c709f3e459..d21b48cb1ea 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -13,6 +13,7 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.contracts.graph.parsed import ParsedNode from dbt.parser.source_config import SourceConfig +from dbt.node_types import NodeType class BaseParser(object): @@ -39,8 +40,8 @@ def get_path(cls, resource_type, package_name, resource_name): return "{}.{}.{}".format(resource_type, package_name, resource_name) @classmethod - def get_fqn(cls, node, package_project_config, extra=[]): - parts = dbt.utils.split_path(node.path) + def get_fqn(cls, path, package_project_config, extra=[]): + parts = dbt.utils.split_path(path) name, _ = os.path.splitext(parts[-1]) fqn = ([package_project_config.project_name] + parts[:-1] + @@ -58,7 +59,6 @@ def __init__(self, root_project_config, all_projects, macro_manifest): ) self.macro_manifest = macro_manifest self._get_schema_func = None - self._get_alias_func = None def get_schema_func(self): """The get_schema function is set by a few different things: @@ -88,51 +88,13 @@ def get_schema(_): else: root_context = dbt.context.parser.generate_macro( get_schema_macro, self.root_project_config, - self.macro_manifest + self.macro_manifest, 'generate_schema_name' ) get_schema = get_schema_macro.generator(root_context) self._get_schema_func = get_schema return self._get_schema_func - def get_alias_func(self): - """The get_alias function is set by a few different things: - - if there is a 'generate_alias_name' macro in the root project, - it will be used. - - if that does not exist but there is a 'generate_alias_name' - macro in the 'dbt' internal project, that will be used - - if neither of those exist (unit tests?), a function that returns - the 'default alias' as set in the model's filename or alias - configuration. - """ - if self._get_alias_func is not None: - return self._get_alias_func - - get_alias_macro = self.macro_manifest.find_macro_by_name( - 'generate_alias_name', - self.root_project_config.project_name - ) - if get_alias_macro is None: - get_alias_macro = self.macro_manifest.find_macro_by_name( - 'generate_alias_name', - GLOBAL_PROJECT_NAME - ) - if get_alias_macro is None: - def get_alias(node, custom_alias_name=None): - if custom_alias_name is None: - return node.name - else: - return custom_alias_name - else: - root_context = dbt.context.parser.generate_macro( - get_alias_macro, self.root_project_config, - self.macro_manifest - ) - get_alias = get_alias_macro.generator(root_context) - - self._get_alias_func = get_alias - return self._get_alias_func - def _build_intermediate_node_dict(self, config, node_dict, node_path, package_project_config, tags, fqn, agate_table, archive_config, @@ -197,6 +159,10 @@ def _render_with_context(self, parsed_node, config): parsed_node.raw_sql, context, parsed_node.to_shallow_dict(), capture_macros=True) + # Clean up any open conns opened by adapter functions that hit the db + db_wrapper = context['adapter'] + db_wrapper.adapter.release_connection(parsed_node.name) + def _update_parsed_node_info(self, parsed_node, config): """Given the SourceConfig used for parsing and the parsed node, generate and set the true values to use, overriding the temporary parse @@ -207,11 +173,7 @@ def _update_parsed_node_info(self, parsed_node, config): schema_override = config.config.get('schema') get_schema = self.get_schema_func() parsed_node.schema = get_schema(schema_override).strip() - - alias_override = config.config.get('alias') - get_alias = self.get_alias_func() - parsed_node.alias = get_alias(parsed_node, alias_override).strip() - + parsed_node.alias = config.config.get('alias', parsed_node.get('name')) parsed_node.database = config.config.get( 'database', self.default_database ).strip() @@ -245,7 +207,7 @@ def parse_node(self, node, node_path, package_project_config, tags=None, fqn_extra = coalesce(fqn_extra, []) if fqn is None: - fqn = self.get_fqn(node, package_project_config, fqn_extra) + fqn = self.get_fqn(node.path, package_project_config, fqn_extra) config = SourceConfig( self.root_project_config, @@ -265,16 +227,3 @@ def parse_node(self, node, node_path, package_project_config, tags=None, parsed_node.validate() return parsed_node - - def check_block_parsing(self, name, path, contents): - """Check if we were able to extract toplevel blocks from the given - contents. Return True if extraction was successful (no exceptions), - False if it fails. - """ - if not dbt.flags.TEST_NEW_PARSER: - return True - try: - dbt.clients.jinja.extract_toplevel_blocks(contents) - except Exception: - return False - return True diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py index 2a576ffaaf9..d6d7322a423 100644 --- a/core/dbt/parser/base_sql.py +++ b/core/dbt/parser/base_sql.py @@ -9,7 +9,6 @@ from dbt.contracts.graph.unparsed import UnparsedNode from dbt.parser.base import MacrosKnownParser -from dbt.node_types import NodeType class BaseSqlParser(MacrosKnownParser): @@ -63,77 +62,36 @@ def load_and_parse(self, package_name, root_dir, relative_dirs, return self.parse_sql_nodes(result, tags) - def parse_sql_node(self, node_dict, tags=None): - if tags is None: - tags = [] - - node = UnparsedNode(**node_dict) - package_name = node.package_name - - unique_id = self.get_path(node.resource_type, - package_name, - node.name) - - project = self.all_projects.get(package_name) - - parse_ok = True - if node.resource_type == NodeType.Model: - parse_ok = self.check_block_parsing( - node.name, node.original_file_path, node.raw_sql - ) - - node_parsed = self.parse_node(node, unique_id, project, tags=tags) - if not parse_ok: - # if we had a parse error in parse_node, we would not get here. So - # this means we rejected a good file :( - raise dbt.exceptions.InternalException( - 'the block parser rejected a good node: {} was marked invalid ' - 'but is actually valid!'.format(node.original_file_path) - ) - return unique_id, node_parsed - def parse_sql_nodes(self, nodes, tags=None): + if tags is None: tags = [] - results = SQLParseResult() + to_return = {} + disabled = [] for n in nodes: - node_path, node_parsed = self.parse_sql_node(n, tags) + node = UnparsedNode(**n) + package_name = node.package_name - # Ignore disabled nodes - if not node_parsed.config['enabled']: - results.disable(node_parsed) - continue - - results.keep(node_path, node_parsed) - - return results + node_path = self.get_path(node.resource_type, + package_name, + node.name) + project = self.all_projects.get(package_name) + node_parsed = self.parse_node(node, node_path, project, tags=tags) -class SQLParseResult(object): - def __init__(self): - self.parsed = {} - self.disabled = [] - - def result(self, unique_id, node): - if node.config['enabled']: - self.keep(unique_id, node) - else: - self.disable(node) - - def disable(self, node): - self.disabled.append(node) + # Ignore disabled nodes + if not node_parsed['config']['enabled']: + disabled.append(node_parsed) + continue - def keep(self, unique_id, node): - if unique_id in self.parsed: - dbt.exceptions.raise_duplicate_resource_name( - self.parsed[unique_id], node - ) + # Check for duplicate model names + existing_node = to_return.get(node_path) + if existing_node is not None: + dbt.exceptions.raise_duplicate_resource_name( + existing_node, node_parsed) - self.parsed[unique_id] = node + to_return[node_path] = node_parsed - def update(self, other): - self.disabled.extend(other.disabled) - for unique_id, node in other.parsed.items(): - self.keep(unique_id, node) + return to_return, disabled diff --git a/core/dbt/parser/docs.py b/core/dbt/parser/docs.py index 840ecdb1a9f..9f9e403894c 100644 --- a/core/dbt/parser/docs.py +++ b/core/dbt/parser/docs.py @@ -62,6 +62,8 @@ def parse(self, docfile): # because docs are in their own graph namespace, node type doesn't # need to be part of the unique ID. unique_id = '{}.{}'.format(docfile.package_name, name) + fqn = self.get_fqn(docfile.path, + self.all_projects[docfile.package_name]) merged = dbt.utils.deep_merge( docfile.serialize(), @@ -76,10 +78,10 @@ def parse(self, docfile): def load_and_parse(self, package_name, root_dir, relative_dirs): to_return = {} for docfile in self.load_file(package_name, root_dir, relative_dirs): - for parsed in self.parse(docfile): - if parsed.unique_id in to_return: - dbt.exceptions.raise_duplicate_resource_name( - to_return[parsed.unique_id], parsed - ) - to_return[parsed.unique_id] = parsed + for parsed in self.parse(docfile): + if parsed.unique_id in to_return: + dbt.exceptions.raise_duplicate_resource_name( + to_return[parsed.unique_id], parsed + ) + to_return[parsed.unique_id] = parsed return to_return diff --git a/core/dbt/parser/hooks.py b/core/dbt/parser/hooks.py index e12ece3ad69..19d6a80b2ee 100644 --- a/core/dbt/parser/hooks.py +++ b/core/dbt/parser/hooks.py @@ -56,8 +56,8 @@ def load_and_parse_run_hook_type(self, hook_type): }) tags = [hook_type] - results = self.parse_sql_nodes(result, tags=tags) - return results.parsed + hooks, _ = self.parse_sql_nodes(result, tags=tags) + return hooks def load_and_parse(self): if dbt.flags.STRICT_MODE: diff --git a/core/dbt/parser/macros.py b/core/dbt/parser/macros.py index 3ad2978dee7..c03713dae03 100644 --- a/core/dbt/parser/macros.py +++ b/core/dbt/parser/macros.py @@ -28,6 +28,8 @@ def parse_macro_file(self, macro_file_path, macro_file_contents, root_path, if tags is None: tags = [] + context = {} + # change these to actual kwargs base_node = UnparsedMacro( path=macro_file_path, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index fc8bc943528..82e92973536 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -14,7 +14,7 @@ from dbt.clients.jinja import get_rendered from dbt.node_types import NodeType -from dbt.compat import basestring, to_string +from dbt.compat import basestring, to_string, to_native_string from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import get_pseudo_test_path from dbt.contracts.graph.unparsed import UnparsedNode, UnparsedNodeUpdate, \ @@ -194,7 +194,7 @@ def _filter_validate(filepath, location, values, validate): except dbt.exceptions.JSONValidationException as exc: # we don't want to fail the full run, but we do want to fail # parsing this file - warn_invalid(filepath, location, value, '- ' + exc.msg) + warn_invalid(filepath, location, value, '- '+exc.msg) continue @@ -243,10 +243,6 @@ def _generate_test_name(self, target, test_type, test_args): """Returns a hashed_name, full_name pair.""" raise NotImplementedError - @staticmethod - def _describe_test_target(test_target): - raise NotImplementedError - def build_test_node(self, test_target, package_name, test, root_dir, path, column_name=None): """Build a test node against the given target (a model or a source). @@ -261,9 +257,8 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, source_package = self.all_projects.get(package_name) if source_package is None: - desc = '"{}" test on {}'.format( - test_type, self._describe_test_target(test_target) - ) + desc = '"{}" test on model "{}"'.format(test_type, + model_name) dbt.exceptions.raise_dep_not_found(None, desc, test_namespace) test_path = os.path.basename(path) @@ -278,7 +273,6 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, full_path = get_pseudo_test_path(full_name, test_path, 'schema_test') raw_sql = self._build_raw_sql(test_namespace, test_target, test_type, test_args) - unparsed = UnparsedNode( name=full_name, resource_type=NodeType.Test, @@ -291,29 +285,18 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, # supply our own fqn which overrides the hashed version from the path # TODO: is this necessary even a little bit for tests? - fqn_override = self.get_fqn(unparsed.incorporate(path=full_path), - source_package) + fqn_override = self.get_fqn(full_path, source_package) node_path = self.get_path(NodeType.Test, unparsed.package_name, unparsed.name) - result = self.parse_node(unparsed, - node_path, - source_package, - tags=['schema'], - fqn_extra=None, - fqn=fqn_override, - column_name=column_name) - - parse_ok = self.check_block_parsing(full_name, test_path, raw_sql) - if not parse_ok: - # if we had a parse error in parse_node, we would not get here. So - # this means we rejected a good file :( - raise dbt.exceptions.InternalException( - 'the block parser rejected a good node: {} was marked invalid ' - 'but is actually valid!'.format(test_path) - ) - return result + return self.parse_node(unparsed, + node_path, + source_package, + tags=['schema'], + fqn_extra=None, + fqn=fqn_override, + column_name=column_name) class SchemaModelParser(SchemaBaseTestParser): @@ -323,10 +306,6 @@ def _build_raw_sql(self, test_namespace, target, test_type, test_args): def _generate_test_name(self, target, test_type, test_args): return get_nice_schema_test_name(test_type, target['name'], test_args) - @staticmethod - def _describe_test_target(test_target): - return 'model "{}"'.format(test_target) - def parse_models_entry(self, model_dict, path, package_name, root_dir): model_name = model_dict['name'] refs = ParserRef() @@ -394,15 +373,11 @@ def _build_raw_sql(self, test_namespace, target, test_type, test_args): def _generate_test_name(self, target, test_type, test_args): return get_nice_schema_test_name( - 'source_' + test_type, + 'source_'+test_type, '{}_{}'.format(target['source']['name'], target['table']['name']), test_args ) - @staticmethod - def _describe_test_target(test_target): - return 'source "{0[source]}.{0[table]}"'.format(test_target) - def get_path(self, *parts): return '.'.join(str(s) for s in parts) @@ -417,17 +392,12 @@ def generate_source_node(self, source, table, path, package_name, root_dir, get_rendered(description, context) get_rendered(source_description, context) + # we'll fill columns in later. freshness = dbt.utils.deep_merge(source.get('freshness', {}), table.get('freshness', {})) loaded_at_field = table.get('loaded_at_field', source.get('loaded_at_field')) - - # use 'or {}' to allow quoting: null - source_quoting = source.get('quoting') or {} - table_quoting = table.get('quoting') or {} - quoting = dbt.utils.deep_merge(source_quoting, table_quoting) - default_database = self.root_project_config.credentials.database return ParsedSourceDefinition( package_name=package_name, @@ -447,7 +417,6 @@ def generate_source_node(self, source, table, path, package_name, root_dir, docrefs=refs.docrefs, loaded_at_field=loaded_at_field, freshness=freshness, - quoting=quoting, resource_type=NodeType.Source ) diff --git a/core/dbt/parser/source_config.py b/core/dbt/parser/source_config.py index 65f3e6b93c3..fd5482a3c4c 100644 --- a/core/dbt/parser/source_config.py +++ b/core/dbt/parser/source_config.py @@ -13,6 +13,7 @@ class SourceConfig(object): 'schema', 'enabled', 'materialized', + 'sql_where', 'unique_key', 'database', } @@ -65,8 +66,6 @@ def config(self): if self.node_type == NodeType.Seed: defaults['materialized'] = 'seed' - elif self.node_type == NodeType.Archive: - defaults['materialized'] = 'archive' active_config = self.load_config_from_active_project() @@ -151,8 +150,6 @@ def get_project_config(self, runtime_config): if self.node_type == NodeType.Seed: model_configs = runtime_config.seeds - elif self.node_type == NodeType.Archive: - model_configs = {} else: model_configs = runtime_config.models diff --git a/core/dbt/parser/util.py b/core/dbt/parser/util.py index c5b4a46ae67..c4c2245746d 100644 --- a/core/dbt/parser/util.py +++ b/core/dbt/parser/util.py @@ -27,6 +27,8 @@ def do_docs(*args): dbt.exceptions.doc_target_not_found(node, doc_name, doc_package_name) + target_doc_id = target_doc.unique_id + return target_doc.block_contents return do_docs @@ -122,125 +124,96 @@ def _get_node_column(cls, node, column_name): return column - @classmethod - def process_docs_for_node(cls, manifest, current_project, node): - for docref in node.get('docrefs', []): - column_name = docref.get('column_name') - if column_name is None: - description = node.get('description', '') - else: - column = cls._get_node_column(node, column_name) - description = column.get('description', '') - context = { - 'doc': docs(node, manifest, current_project, column_name), - } - - # At this point, target_doc is a ParsedDocumentation, and we - # know that our documentation string has a 'docs("...")' - # pointing at it. We want to render it. - description = dbt.clients.jinja.get_rendered(description, - context) - # now put it back. - if column_name is None: - node.set('description', description) - else: - column['description'] = description - @classmethod def process_docs(cls, manifest, current_project): - for node in manifest.nodes.values(): - cls.process_docs_for_node(manifest, current_project, node) + for _, node in manifest.nodes.items(): + target_doc = None + target_doc_name = None + target_doc_package = None + for docref in node.get('docrefs', []): + column_name = docref.get('column_name') + if column_name is None: + description = node.get('description', '') + else: + column = cls._get_node_column(node, column_name) + description = column.get('description', '') + target_doc_name = docref['documentation_name'] + target_doc_package = docref['documentation_package'] + context = { + 'doc': docs(node, manifest, current_project, column_name), + } + + # At this point, target_doc is a ParsedDocumentation, and we + # know that our documentation string has a 'docs("...")' + # pointing at it. We want to render it. + description = dbt.clients.jinja.get_rendered(description, + context) + # now put it back. + if column_name is None: + node.set('description', description) + else: + column['description'] = description return manifest - @classmethod - def process_refs_for_node(cls, manifest, current_project, node): - """Given a manifest and a node in that manifest, process its refs""" - target_model = None - target_model_name = None - target_model_package = None - - for ref in node.refs: - if len(ref) == 1: - target_model_name = ref[0] - elif len(ref) == 2: - target_model_package, target_model_name = ref - - target_model = cls.resolve_ref( - manifest, - target_model_name, - target_model_package, - current_project, - node.get('package_name')) - - if target_model is None or target_model is cls.DISABLED: - # This may raise. Even if it doesn't, we don't want to add - # this node to the graph b/c there is no destination node - node.config['enabled'] = False - dbt.utils.invalid_ref_fail_unless_test( - node, target_model_name, target_model_package, - disabled=(target_model is cls.DISABLED) - ) - - continue - - target_model_id = target_model.get('unique_id') - - node.depends_on['nodes'].append(target_model_id) - manifest.nodes[node['unique_id']] = node - @classmethod def process_refs(cls, manifest, current_project): - for node in manifest.nodes.values(): - cls.process_refs_for_node(manifest, current_project, node) - return manifest - - @classmethod - def process_sources_for_node(cls, manifest, current_project, node): - target_source = None - for source_name, table_name in node.sources: - target_source = cls.resolve_source( - manifest, - source_name, - table_name, - current_project, - node.get('package_name')) - - if target_source is None: - # this folows the same pattern as refs - node.config['enabled'] = False - dbt.utils.invalid_source_fail_unless_test( - node, - source_name, - table_name) - continue - target_source_id = target_source.unique_id - node.depends_on['nodes'].append(target_source_id) - manifest.nodes[node['unique_id']] = node + for _, node in manifest.nodes.items(): + target_model = None + target_model_name = None + target_model_package = None + + for ref in node.refs: + if len(ref) == 1: + target_model_name = ref[0] + elif len(ref) == 2: + target_model_package, target_model_name = ref + + target_model = cls.resolve_ref( + manifest, + target_model_name, + target_model_package, + current_project, + node.get('package_name')) + + if target_model is None or target_model is cls.DISABLED: + # This may raise. Even if it doesn't, we don't want to add + # this node to the graph b/c there is no destination node + node.config['enabled'] = False + dbt.utils.invalid_ref_fail_unless_test( + node, target_model_name, target_model_package, + disabled=(target_model is cls.DISABLED) + ) + + continue + + target_model_id = target_model.get('unique_id') + + node.depends_on['nodes'].append(target_model_id) + manifest.nodes[node['unique_id']] = node - @classmethod - def process_sources(cls, manifest, current_project): - for node in manifest.nodes.values(): - cls.process_sources_for_node(manifest, current_project, node) return manifest @classmethod - def add_new_refs(cls, manifest, current_project, node, macros): - """Given a new node that is not in the manifest, copy the manifest and - insert the new node into it as if it were part of regular ref - processing - """ - manifest = manifest.deepcopy(config=current_project) - # it's ok for macros to silently override a local project macro name - manifest.macros.update(macros) - - if node.unique_id in manifest.nodes: - # this should be _impossible_ due to the fact that rpc calls get - # a unique ID that starts with 'rpc'! - raise dbt.exceptions.raise_duplicate_resource_name( - manifest.nodes[node.unique_id], node - ) - manifest.nodes[node.unique_id] = node - cls.process_sources_for_node(manifest, current_project, node) - cls.process_refs_for_node(manifest, current_project, node) - cls.process_docs_for_node(manifest, current_project, node) + def process_sources(cls, manifest, current_project): + for _, node in manifest.nodes.items(): + target_source = None + for source_name, table_name in node.sources: + target_source = cls.resolve_source( + manifest, + source_name, + table_name, + current_project, + node.get('package_name')) + + if target_source is None: + # this folows the same pattern as refs + node.config['enabled'] = False + dbt.utils.invalid_source_fail_unless_test( + node, + source_name, + table_name) + continue + target_source_id = target_source.unique_id + node.depends_on['nodes'].append(target_source_id) + manifest.nodes[node['unique_id']] = node return manifest diff --git a/core/dbt/rpc.py b/core/dbt/rpc.py deleted file mode 100644 index 9335fc3051e..00000000000 --- a/core/dbt/rpc.py +++ /dev/null @@ -1,395 +0,0 @@ -from jsonrpc.exceptions import \ - JSONRPCDispatchException, \ - JSONRPCInvalidParams, \ - JSONRPCParseError, \ - JSONRPCInvalidRequestException, \ - JSONRPCInvalidRequest -from jsonrpc import JSONRPCResponseManager -from jsonrpc.jsonrpc import JSONRPCRequest -from jsonrpc.jsonrpc2 import JSONRPC20Response - -import json -import uuid -import multiprocessing -import os -import signal -import time -from collections import namedtuple - -from dbt.logger import RPC_LOGGER as logger -from dbt.logger import add_queue_handler -from dbt.compat import QueueEmpty -import dbt.exceptions - - -class RPCException(JSONRPCDispatchException): - def __init__(self, code=None, message=None, data=None, logs=None): - if code is None: - code = -32000 - if message is None: - message = 'Server error' - if data is None: - data = {} - - super(RPCException, self).__init__(code=code, message=message, - data=data) - self.logs = logs - - def __str__(self): - return ( - 'RPCException({0.code}, {0.message}, {0.data}, {1.logs})' - .format(self.error, self) - ) - - @property - def logs(self): - return self.error.data.get('logs') - - @logs.setter - def logs(self, value): - if value is None: - return - self.error.data['logs'] = value - - @classmethod - def from_error(cls, err): - return cls(err.code, err.message, err.data, err.data.get('logs')) - - -def invalid_params(data): - return RPCException( - code=JSONRPCInvalidParams.CODE, - message=JSONRPCInvalidParams.MESSAGE, - data=data - ) - - -def server_error(err, logs=None): - exc = dbt.exceptions.Exception(str(err)) - return dbt_error(exc, logs) - - -def timeout_error(timeout_value, logs=None): - exc = dbt.exceptions.RPCTimeoutException(timeout_value) - return dbt_error(exc, logs) - - -def dbt_error(exc, logs=None): - exc = RPCException(code=exc.CODE, message=exc.MESSAGE, data=exc.data(), - logs=logs) - return exc - - -class QueueMessageType(object): - Error = 'error' - Result = 'result' - Log = 'log' - - @classmethod - def terminating(cls): - return [ - cls.Error, - cls.Result - ] - - -def sigterm_handler(signum, frame): - raise dbt.exceptions.RPCKilledException(signum) - - -class RequestDispatcher(object): - """A special dispatcher that knows about requests.""" - def __init__(self, http_request, json_rpc_request, manager): - self.http_request = http_request - self.json_rpc_request = json_rpc_request - self.manager = manager - self.task = None - - def rpc_factory(self, task): - request_handler = RequestTaskHandler(task, - self.http_request, - self.json_rpc_request) - - def rpc_func(**kwargs): - try: - self.manager.add_request(request_handler) - return request_handler.handle(kwargs) - finally: - self.manager.mark_done(request_handler) - - return rpc_func - - def __getitem__(self, key): - # the dispatcher's keys are method names and its values are functions - # that implement the RPC calls - func = self.manager.rpc_builtin(key) - if func is not None: - return func - - task = self.manager.rpc_task(key) - return self.rpc_factory(task) - - -class RequestTaskHandler(object): - def __init__(self, task, http_request, json_rpc_request): - self.task = task - self.http_request = http_request - self.json_rpc_request = json_rpc_request - self.queue = None - self.process = None - self.started = None - self.timeout = None - self.logs = [] - self.task_id = uuid.uuid4() - - @property - def request_source(self): - return self.http_request.remote_addr - - @property - def request_id(self): - return self.json_rpc_request._id - - @property - def method(self): - return self.task.METHOD_NAME - - def _next_timeout(self): - if self.timeout is None: - return None - end = self.started + self.timeout - timeout = end - time.time() - if timeout < 0: - raise dbt.exceptions.RPCTimeoutException(self.timeout) - return timeout - - def _wait_for_results(self): - """Wait for results off the queue. If there is a timeout set, and it is - exceeded, raise an RPCTimeoutException. - """ - while True: - get_timeout = self._next_timeout() - try: - msgtype, value = self.queue.get(timeout=get_timeout) - except QueueEmpty: - raise dbt.exceptions.RPCTimeoutException(self.timeout) - - if msgtype == QueueMessageType.Log: - self.logs.append(value) - elif msgtype in QueueMessageType.terminating(): - return msgtype, value - else: - raise dbt.exceptions.InternalException( - 'Got invalid queue message type {}'.format(msgtype) - ) - - def _join_process(self): - try: - msgtype, result = self._wait_for_results() - except dbt.exceptions.RPCTimeoutException: - self.process.terminate() - raise timeout_error(self.timeout) - except dbt.exceptions.Exception as exc: - raise dbt_error(exc) - except Exception as exc: - raise server_error(exc) - finally: - self.process.join() - - if msgtype == QueueMessageType.Error: - raise RPCException.from_error(result) - - return result - - def get_result(self): - try: - result = self._join_process() - except RPCException as exc: - exc.logs = self.logs - raise - - result['logs'] = self.logs - return result - - def task_bootstrap(self, kwargs): - signal.signal(signal.SIGTERM, sigterm_handler) - # the first thing we do in a new process: start logging - add_queue_handler(self.queue) - - error = None - result = None - try: - result = self.task.handle_request(**kwargs) - except RPCException as exc: - error = exc - except dbt.exceptions.RPCKilledException as exc: - # do NOT log anything here, you risk triggering a deadlock on the - # queue handler we inserted above - error = dbt_error(exc) - except dbt.exceptions.Exception as exc: - logger.debug('dbt runtime exception', exc_info=True) - error = dbt_error(exc) - except Exception as exc: - logger.debug('uncaught python exception', exc_info=True) - error = server_error(exc) - - # put whatever result we got onto the queue as well. - if error is not None: - self.queue.put([QueueMessageType.Error, error.error]) - else: - self.queue.put([QueueMessageType.Result, result]) - - def handle(self, kwargs): - self.started = time.time() - self.timeout = kwargs.pop('timeout', None) - self.queue = multiprocessing.Queue() - self.process = multiprocessing.Process( - target=self.task_bootstrap, - args=(kwargs,) - ) - self.process.start() - return self.get_result() - - @property - def state(self): - if self.started is None: - return 'not started' - elif self.process is None: - return 'initializing' - elif self.process.is_alive(): - return 'running' - else: - return 'finished' - - -TaskRow = namedtuple( - 'TaskRow', - 'task_id request_id request_source method state start elapsed timeout' -) - - -class TaskManager(object): - def __init__(self): - self.tasks = {} - self.completed = {} - self._rpc_task_map = {} - self._rpc_function_map = {} - self._lock = multiprocessing.Lock() - - def add_request(self, request_handler): - self.tasks[request_handler.task_id] = request_handler - - def add_task_handler(self, task): - self._rpc_task_map[task.METHOD_NAME] = task - - def rpc_task(self, method_name): - return self._rpc_task_map[method_name] - - def process_listing(self, active=True, completed=False): - included_tasks = {} - with self._lock: - if completed: - included_tasks.update(self.completed) - if active: - included_tasks.update(self.tasks) - - table = [] - now = time.time() - for task_handler in included_tasks.values(): - start = task_handler.started - if start is not None: - elapsed = now - start - - table.append(TaskRow( - str(task_handler.task_id), task_handler.request_id, - task_handler.request_source, task_handler.method, - task_handler.state, start, elapsed, task_handler.timeout - )) - table.sort(key=lambda r: (r.state, r.start)) - result = { - 'rows': [dict(r._asdict()) for r in table], - } - return result - - def process_kill(self, task_id): - # TODO: this result design is terrible - result = { - 'found': False, - 'started': False, - 'finished': False, - 'killed': False - } - task_id = uuid.UUID(task_id) - try: - task = self.tasks[task_id] - except KeyError: - # nothing to do! - return result - - result['found'] = True - - if task.process is None: - return result - pid = task.process.pid - if pid is None: - return result - - result['started'] = True - - if task.process.is_alive(): - os.kill(pid, signal.SIGINT) - result['killed'] = True - return result - - result['finished'] = True - return result - - def rpc_builtin(self, method_name): - if method_name == 'ps': - return self.process_listing - if method_name == 'kill': - return self.process_kill - return None - - def mark_done(self, request_handler): - task_id = request_handler.task_id - with self._lock: - if task_id not in self.tasks: - # lost a task! Maybe it was killed before it started. - return - self.completed[task_id] = self.tasks.pop(task_id) - - def methods(self): - rpc_builtin_methods = ['ps', 'kill'] - return list(self._rpc_task_map) + rpc_builtin_methods - - -class ResponseManager(JSONRPCResponseManager): - """Override the default response manager to handle request metadata and - track in-flight tasks. - """ - @classmethod - def handle(cls, http_request, task_manager): - # pretty much just copy+pasted from the original, with slight tweaks to - # preserve the request - request_str = http_request.data - if isinstance(request_str, bytes): - request_str = request_str.decode("utf-8") - - try: - data = json.loads(request_str) - except (TypeError, ValueError): - return JSONRPC20Response(error=JSONRPCParseError()._data) - - try: - request = JSONRPCRequest.from_data(data) - except JSONRPCInvalidRequestException: - return JSONRPC20Response(error=JSONRPCInvalidRequest()._data) - - dispatcher = RequestDispatcher( - http_request, - request, - task_manager - ) - - return cls.handle_request(request, dispatcher) diff --git a/core/dbt/schema.py b/core/dbt/schema.py new file mode 100644 index 00000000000..f46e89e9859 --- /dev/null +++ b/core/dbt/schema.py @@ -0,0 +1,177 @@ +from dbt.logger import GLOBAL_LOGGER as logger # noqa +import dbt.exceptions + + +class Column(object): + TYPE_LABELS = { + 'STRING': 'TEXT', + 'TIMESTAMP': 'TIMESTAMP', + 'FLOAT': 'FLOAT', + 'INTEGER': 'INT' + } + + def __init__(self, column, dtype, char_size=None, numeric_precision=None, + numeric_scale=None): + self.column = column + self.dtype = dtype + self.char_size = char_size + self.numeric_precision = numeric_precision + self.numeric_scale = numeric_scale + + @classmethod + def translate_type(cls, dtype): + return cls.TYPE_LABELS.get(dtype.upper(), dtype) + + @classmethod + def create(cls, name, label_or_dtype): + column_type = cls.translate_type(label_or_dtype) + return cls(name, column_type) + + @property + def name(self): + return self.column + + @property + def quoted(self): + return '"{}"'.format(self.column) + + @property + def data_type(self): + if self.is_string(): + return Column.string_type(self.string_size()) + elif self.is_numeric(): + return Column.numeric_type(self.dtype, self.numeric_precision, + self.numeric_scale) + else: + return self.dtype + + def is_string(self): + return self.dtype.lower() in ['text', 'character varying', 'character', + 'varchar'] + + def is_numeric(self): + return self.dtype.lower() in ['numeric', 'number'] + + def string_size(self): + if not self.is_string(): + raise RuntimeError("Called string_size() on non-string field!") + + if self.dtype == 'text' or self.char_size is None: + # char_size should never be None. Handle it reasonably just in case + return 255 + else: + return int(self.char_size) + + def can_expand_to(self, other_column): + """returns True if this column can be expanded to the size of the + other column""" + if not self.is_string() or not other_column.is_string(): + return False + + return other_column.string_size() > self.string_size() + + def literal(self, value): + return "{}::{}".format(value, self.data_type) + + @classmethod + def string_type(cls, size): + return "character varying({})".format(size) + + @classmethod + def numeric_type(cls, dtype, precision, scale): + # This could be decimal(...), numeric(...), number(...) + # Just use whatever was fed in here -- don't try to get too clever + if precision is None or scale is None: + return dtype + else: + return "{}({},{})".format(dtype, precision, scale) + + def __repr__(self): + return "".format(self.name, self.data_type) + + +class BigQueryColumn(Column): + TYPE_LABELS = { + 'STRING': 'STRING', + 'TIMESTAMP': 'TIMESTAMP', + 'FLOAT': 'FLOAT64', + 'INTEGER': 'INT64', + 'RECORD': 'RECORD', + } + + def __init__(self, column, dtype, fields=None, mode='NULLABLE'): + super(BigQueryColumn, self).__init__(column, dtype) + + if fields is None: + fields = [] + + self.fields = self.wrap_subfields(fields) + self.mode = mode + + @classmethod + def wrap_subfields(cls, fields): + return [BigQueryColumn.create_from_field(field) for field in fields] + + @classmethod + def create_from_field(cls, field): + return BigQueryColumn(field.name, cls.translate_type(field.field_type), + field.fields, field.mode) + + @classmethod + def _flatten_recursive(cls, col, prefix=None): + if prefix is None: + prefix = [] + + if len(col.fields) == 0: + prefixed_name = ".".join(prefix + [col.column]) + new_col = BigQueryColumn(prefixed_name, col.dtype, col.fields, + col.mode) + return [new_col] + + new_fields = [] + for field in col.fields: + new_prefix = prefix + [col.column] + new_fields.extend(cls._flatten_recursive(field, new_prefix)) + + return new_fields + + def flatten(self): + return self._flatten_recursive(self) + + @property + def quoted(self): + return '`{}`'.format(self.column) + + def literal(self, value): + return "cast({} as {})".format(value, self.dtype) + + @property + def data_type(self): + if self.dtype.upper() == 'RECORD': + subcols = [ + "{} {}".format(col.name, col.data_type) for col in self.fields + ] + field_type = 'STRUCT<{}>'.format(", ".join(subcols)) + + else: + field_type = self.dtype + + if self.mode.upper() == 'REPEATED': + return 'ARRAY<{}>'.format(field_type) + + else: + return field_type + + def is_string(self): + return self.dtype.lower() == 'string' + + def is_numeric(self): + return False + + def can_expand_to(self, other_column): + """returns True if both columns are strings""" + return self.is_string() and other_column.is_string() + + def __repr__(self): + return "".format(self.name, self.data_type, + self.mode) diff --git a/core/dbt/semver.py b/core/dbt/semver.py index e00c9ea3168..a04c1f8b46a 100644 --- a/core/dbt/semver.py +++ b/core/dbt/semver.py @@ -30,8 +30,7 @@ alpha_no_leading_zeros=_ALPHA_NO_LEADING_ZEROS, alpha=_ALPHA) - -_VERSION_REGEX_PAT_STR = r""" +_VERSION_REGEX = re.compile(r""" ^ {matchers} {base_version_regex} @@ -40,9 +39,8 @@ """.format( matchers=_MATCHERS, base_version_regex=_BASE_VERSION_REGEX, - version_extra_regex=_VERSION_EXTRA_REGEX) - -_VERSION_REGEX = re.compile(_VERSION_REGEX_PAT_STR, re.VERBOSE) + version_extra_regex=_VERSION_EXTRA_REGEX), + re.VERBOSE) class Matchers: @@ -358,7 +356,7 @@ def reduce_versions(*args): for version_specifier in version_specifiers: to_return = to_return.reduce(version_specifier.to_range()) - except VersionsNotCompatibleException: + except VersionsNotCompatibleException as e: raise VersionsNotCompatibleException( 'Could not find a satisfactory version from options: {}' .format([str(a) for a in args])) @@ -373,7 +371,7 @@ def versions_compatible(*args): try: reduce_versions(*args) return True - except VersionsNotCompatibleException: + except VersionsNotCompatibleException as e: return False diff --git a/core/dbt/ssh_forward.py b/core/dbt/ssh_forward.py new file mode 100644 index 00000000000..0ff32097998 --- /dev/null +++ b/core/dbt/ssh_forward.py @@ -0,0 +1,10 @@ +import logging + +# modules are only imported once -- make sure that we don't have > 1 +# because subsequent tunnels will block waiting to acquire the port + +server = None + + +def get_or_create_tunnel(host, port, user, remote_host, remote_port, timeout): + pass diff --git a/core/dbt/task/archive.py b/core/dbt/task/archive.py index 0cfbd82aa5c..1a90b615874 100644 --- a/core/dbt/task/archive.py +++ b/core/dbt/task/archive.py @@ -9,10 +9,9 @@ def raise_on_first_error(self): def build_query(self): return { - "include": self.args.models, - "exclude": self.args.exclude, - "resource_types": [NodeType.Archive], - "tags": [], + 'include': ['*'], + 'exclude': [], + 'resource_types': [NodeType.Archive] } def get_runner_type(self): diff --git a/core/dbt/task/base.py b/core/dbt/task/base.py deleted file mode 100644 index 6a7a6b2be7e..00000000000 --- a/core/dbt/task/base.py +++ /dev/null @@ -1,127 +0,0 @@ -from abc import ABCMeta, abstractmethod -import os - -import six - -from dbt.config import RuntimeConfig, Project -from dbt.config.profile import read_profile, PROFILES_DIR -from dbt import tracking -from dbt.logger import GLOBAL_LOGGER as logger -import dbt.exceptions - - -class NoneConfig(object): - @classmethod - def from_args(cls, args): - return None - - -def read_profiles(profiles_dir=None): - """This is only used for some error handling""" - if profiles_dir is None: - profiles_dir = PROFILES_DIR - - raw_profiles = read_profile(profiles_dir) - - if raw_profiles is None: - profiles = {} - else: - profiles = {k: v for (k, v) in raw_profiles.items() if k != 'config'} - - return profiles - - -PROFILES_HELP_MESSAGE = """ -For more information on configuring profiles, please consult the dbt docs: - -https://docs.getdbt.com/docs/configure-your-profile -""" - - -@six.add_metaclass(ABCMeta) -class BaseTask(object): - ConfigType = NoneConfig - - def __init__(self, args, config): - self.args = args - self.config = config - - @classmethod - def from_args(cls, args): - try: - config = cls.ConfigType.from_args(args) - except dbt.exceptions.DbtProjectError as exc: - logger.error("Encountered an error while reading the project:") - logger.error(" ERROR: {}".format(str(exc))) - - tracking.track_invalid_invocation( - args=args, - result_type=exc.result_type) - raise dbt.exceptions.RuntimeException('Could not run dbt') - except dbt.exceptions.DbtProfileError as exc: - logger.error("Encountered an error while reading profiles:") - logger.error(" ERROR {}".format(str(exc))) - - all_profiles = read_profiles(args.profiles_dir).keys() - - if len(all_profiles) > 0: - logger.info("Defined profiles:") - for profile in all_profiles: - logger.info(" - {}".format(profile)) - else: - logger.info("There are no profiles defined in your " - "profiles.yml file") - - logger.info(PROFILES_HELP_MESSAGE) - - tracking.track_invalid_invocation( - args=args, - result_type=exc.result_type) - raise dbt.exceptions.RuntimeException('Could not run dbt') - return cls(args, config) - - @abstractmethod - def run(self): - raise dbt.exceptions.NotImplementedException('Not Implemented') - - def interpret_results(self, results): - return True - - -def get_nearest_project_dir(): - root_path = os.path.abspath(os.sep) - cwd = os.getcwd() - - while cwd != root_path: - project_file = os.path.join(cwd, "dbt_project.yml") - if os.path.exists(project_file): - return cwd - cwd = os.path.dirname(cwd) - - return None - - -def move_to_nearest_project_dir(): - nearest_project_dir = get_nearest_project_dir() - if nearest_project_dir is None: - raise dbt.exceptions.RuntimeException( - "fatal: Not a dbt project (or any of the parent directories). " - "Missing dbt_project.yml file" - ) - - os.chdir(nearest_project_dir) - - -class RequiresProjectTask(BaseTask): - @classmethod - def from_args(cls, args): - move_to_nearest_project_dir() - return super(RequiresProjectTask, cls).from_args(args) - - -class ConfiguredTask(RequiresProjectTask): - ConfigType = RuntimeConfig - - -class ProjectOnlyTask(RequiresProjectTask): - ConfigType = Project diff --git a/core/dbt/task/base_task.py b/core/dbt/task/base_task.py new file mode 100644 index 00000000000..db8cedbff45 --- /dev/null +++ b/core/dbt/task/base_task.py @@ -0,0 +1,13 @@ +import dbt.exceptions + + +class BaseTask(object): + def __init__(self, args, config=None): + self.args = args + self.config = config + + def run(self): + raise dbt.exceptions.NotImplementedException('Not Implemented') + + def interpret_results(self, results): + return True diff --git a/core/dbt/task/clean.py b/core/dbt/task/clean.py index ab0ef081b10..f7b524057b8 100644 --- a/core/dbt/task/clean.py +++ b/core/dbt/task/clean.py @@ -2,10 +2,10 @@ import os import shutil -from dbt.task.base import ProjectOnlyTask +from dbt.task.base_task import BaseTask -class CleanTask(ProjectOnlyTask): +class CleanTask(BaseTask): def __is_project_path(self, path): proj_path = os.path.abspath('.') diff --git a/core/dbt/task/compile.py b/core/dbt/task/compile.py index 64cbefc9daf..ac7f49ec2c8 100644 --- a/core/dbt/task/compile.py +++ b/core/dbt/task/compile.py @@ -1,24 +1,11 @@ -import os -import signal -import threading - -from dbt.adapters.factory import get_adapter -from dbt.clients.jinja import extract_toplevel_blocks -from dbt.compilation import compile_manifest -from dbt.loader import load_all_projects -from dbt.node_runners import CompileRunner, RPCCompileRunner +from dbt.node_runners import CompileRunner from dbt.node_types import NodeType -from dbt.parser.analysis import RPCCallParser -from dbt.parser.macros import MacroParser -from dbt.parser.util import ParserUtils import dbt.ui.printer -from dbt.logger import RPC_LOGGER as rpc_logger - -from dbt.task.runnable import GraphRunnableTask, RemoteCallable +from dbt.task.runnable import RunnableTask -class CompileTask(GraphRunnableTask): +class CompileTask(RunnableTask): def raise_on_first_error(self): return True @@ -35,129 +22,3 @@ def get_runner_type(self): def task_end_messages(self, results): dbt.ui.printer.print_timestamped_line('Done.') - - -class RemoteCompileTask(CompileTask, RemoteCallable): - METHOD_NAME = 'compile' - - def __init__(self, args, config, manifest): - super(RemoteCompileTask, self).__init__(args, config) - self._base_manifest = manifest.deepcopy(config=config) - - def get_runner_type(self): - return RPCCompileRunner - - def runtime_cleanup(self, selected_uids): - """Do some pre-run cleanup that is usually performed in Task __init__. - """ - self.run_count = 0 - self.num_nodes = len(selected_uids) - self.node_results = [] - self._skipped_children = {} - self._skipped_children = {} - self._raise_next_tick = None - - def _extract_request_data(self, data): - data = self.decode_sql(data) - macro_blocks = [] - data_chunks = [] - for block in extract_toplevel_blocks(data): - if block.block_type_name == 'macro': - macro_blocks.append(block.full_block) - else: - data_chunks.append(block.full_block) - macros = '\n'.join(macro_blocks) - sql = ''.join(data_chunks) - return sql, macros - - def _get_exec_node(self, name, sql, macros): - request_path = os.path.join(self.config.target_path, 'rpc', name) - all_projects = load_all_projects(self.config) - macro_overrides = {} - sql, macros = self._extract_request_data(sql) - - if macros: - macro_parser = MacroParser(self.config, all_projects) - macro_overrides.update(macro_parser.parse_macro_file( - macro_file_path='from remote system', - macro_file_contents=macros, - root_path=request_path, - package_name=self.config.project_name, - resource_type=NodeType.Macro - )) - - self._base_manifest.macros.update(macro_overrides) - rpc_parser = RPCCallParser( - self.config, - all_projects=all_projects, - macro_manifest=self._base_manifest - ) - - node_dict = { - 'name': name, - 'root_path': request_path, - 'resource_type': NodeType.RPCCall, - 'path': name + '.sql', - 'original_file_path': 'from remote system', - 'package_name': self.config.project_name, - 'raw_sql': sql, - } - - unique_id, node = rpc_parser.parse_sql_node(node_dict) - self.manifest = ParserUtils.add_new_refs( - manifest=self._base_manifest, - current_project=self.config, - node=node, - macros=macro_overrides - ) - - # don't write our new, weird manifest! - self.linker = compile_manifest(self.config, self.manifest, write=False) - return node - - def _raise_set_error(self): - if self._raise_next_tick is not None: - raise self._raise_next_tick - - def _in_thread(self, node, thread_done): - runner = self.get_runner(node) - try: - self.node_results.append(runner.safe_run(self.manifest)) - except Exception as exc: - self._raise_next_tick = exc - finally: - thread_done.set() - - def handle_request(self, name, sql, macros=None): - # we could get a ctrl+c at any time, including during parsing. - thread = None - try: - node = self._get_exec_node(name, sql, macros) - - selected_uids = [node.unique_id] - self.runtime_cleanup(selected_uids) - - thread_done = threading.Event() - thread = threading.Thread(target=self._in_thread, - args=(node, thread_done)) - thread.start() - thread_done.wait() - except KeyboardInterrupt: - adapter = get_adapter(self.config) - if adapter.is_cancelable(): - - for conn_name in adapter.cancel_open_connections(): - rpc_logger.debug('canceled query {}'.format(conn_name)) - if thread: - thread.join() - else: - msg = ("The {} adapter does not support query " - "cancellation. Some queries may still be " - "running!".format(adapter.type())) - - rpc_logger.debug(msg) - - raise dbt.exceptions.RPCKilledException(signal.SIGINT) - - self._raise_set_error() - return self.node_results[0].serialize() diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index 1399effe7c4..6a141cd7b3c 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -1,6 +1,7 @@ # coding=utf-8 import os import platform +import pprint import sys from dbt.logger import GLOBAL_LOGGER as logger @@ -15,7 +16,7 @@ from dbt.clients.yaml_helper import load_yaml_text from dbt.ui.printer import green, red -from dbt.task.base import BaseTask +from dbt.task.base_task import BaseTask PROFILE_DIR_MESSAGE = """To view your profiles.yml file, run: @@ -58,7 +59,7 @@ class DebugTask(BaseTask): - def __init__(self, args, config): + def __init__(self, args, config=None): super(DebugTask, self).__init__(args, config) self.profiles_dir = getattr(self.args, 'profiles_dir', dbt.config.PROFILES_DIR) @@ -209,7 +210,8 @@ def _load_profile(self): self.profile_name = self._choose_profile_name() self.target_name = self._choose_target_name() try: - self.profile = Profile.from_args(self.args, self.profile_name) + self.profile = Profile.from_args(self.args, self.profile_name, + self.cli_vars) except dbt.exceptions.DbtConfigError as exc: self.profile_fail_details = str(exc) return red('ERROR invalid') diff --git a/core/dbt/task/deps.py b/core/dbt/task/deps.py index 3e282c25d7b..2fe91a8af66 100644 --- a/core/dbt/task/deps.py +++ b/core/dbt/task/deps.py @@ -21,7 +21,7 @@ GIT_PACKAGE_CONTRACT, REGISTRY_PACKAGE_CONTRACT, \ REGISTRY_PACKAGE_METADATA_CONTRACT, PackageConfig -from dbt.task.base import ProjectOnlyTask +from dbt.task.base_task import BaseTask DOWNLOADS_PATH = None REMOVE_DOWNLOADS = False @@ -440,7 +440,7 @@ def _read_packages(project_yaml): return packages -class DepsTask(ProjectOnlyTask): +class DepsTask(BaseTask): def __init__(self, args, config=None): super(DepsTask, self).__init__(args=args, config=config) self._downloads_path = None diff --git a/core/dbt/task/freshness.py b/core/dbt/task/freshness.py index 75bbd4f5ba9..b3956daa917 100644 --- a/core/dbt/task/freshness.py +++ b/core/dbt/task/freshness.py @@ -1,5 +1,5 @@ import os -from dbt.task.runnable import GraphRunnableTask +from dbt.task.runnable import BaseRunnableTask from dbt.node_runners import FreshnessRunner from dbt.node_types import NodeType from dbt.ui.printer import print_timestamped_line, print_run_result_error @@ -8,7 +8,7 @@ RESULT_FILE_NAME = 'sources.json' -class FreshnessTask(GraphRunnableTask): +class FreshnessTask(BaseRunnableTask): def result_path(self): if self.args.output: return os.path.realpath(self.args.output) diff --git a/core/dbt/task/generate.py b/core/dbt/task/generate.py index db7c91504f6..58b2238a02a 100644 --- a/core/dbt/task/generate.py +++ b/core/dbt/task/generate.py @@ -1,3 +1,4 @@ +import json import os import shutil @@ -201,12 +202,11 @@ def run(self): DOCS_INDEX_FILE_PATH, os.path.join(self.config.target_path, 'index.html')) + manifest = self._get_manifest() adapter = get_adapter(self.config) - with adapter.connection_named('generate_catalog'): - manifest = self._get_manifest() - dbt.ui.printer.print_timestamped_line("Building catalog") - results = adapter.get_catalog(manifest) + dbt.ui.printer.print_timestamped_line("Building catalog") + results = adapter.get_catalog(manifest) results = [ dict(zip(results.column_names, row)) diff --git a/core/dbt/task/init.py b/core/dbt/task/init.py index c3184c60fa3..9f8569b9481 100644 --- a/core/dbt/task/init.py +++ b/core/dbt/task/init.py @@ -6,11 +6,11 @@ from dbt.logger import GLOBAL_LOGGER as logger -from dbt.task.base import BaseTask +from dbt.task.base_task import BaseTask STARTER_REPO = 'https://github.com/fishtown-analytics/dbt-starter-project.git' DOCS_URL = 'https://docs.getdbt.com/docs/configure-your-profile' -SAMPLE_PROFILES_YML_FILE = 'https://docs.getdbt.com/reference#profile' # noqa +SAMPLE_PROFILES_YML_FILE = 'https://github.com/fishtown-analytics/dbt/blob/master/sample.profiles.yml' # noqa ON_COMPLETE_MESSAGE = """ Your new dbt project "{project_name}" was created! If this is your first time diff --git a/core/dbt/task/rpc_server.py b/core/dbt/task/rpc_server.py deleted file mode 100644 index 0dfdfa4c277..00000000000 --- a/core/dbt/task/rpc_server.py +++ /dev/null @@ -1,83 +0,0 @@ -import json - -from werkzeug.wsgi import DispatcherMiddleware -from werkzeug.wrappers import Request, Response -from werkzeug.serving import run_simple -from werkzeug.exceptions import NotFound - -from dbt.logger import RPC_LOGGER as logger -from dbt.task.base import ConfiguredTask -from dbt.task.compile import CompileTask, RemoteCompileTask -from dbt.task.run import RemoteRunTask -from dbt.utils import JSONEncoder -from dbt import rpc - - -class RPCServerTask(ConfiguredTask): - def __init__(self, args, config, tasks=None): - super(RPCServerTask, self).__init__(args, config) - # compile locally - self.manifest = self._compile_manifest() - self.task_manager = rpc.TaskManager() - tasks = tasks or [RemoteCompileTask, RemoteRunTask] - for cls in tasks: - task = cls(args, config, self.manifest) - self.task_manager.add_task_handler(task) - - def _compile_manifest(self): - compile_task = CompileTask(self.args, self.config) - compile_task.run() - return compile_task.manifest - - def run(self): - host = self.args.host - port = self.args.port - addr = (host, port) - - display_host = host - if host == '0.0.0.0': - display_host = 'localhost' - - logger.info( - 'Serving RPC server at {}:{}'.format(*addr) - ) - - logger.info( - 'Supported methods: {}'.format(self.task_manager.methods()) - ) - - logger.info( - 'Send requests to http://{}:{}/jsonrpc'.format(display_host, port) - ) - - app = self.handle_request - app = DispatcherMiddleware(app, { - '/jsonrpc': self.handle_jsonrpc_request, - }) - - # we have to run in threaded mode if we want to share subprocess - # handles, which is the easiest way to implement `kill` (it makes `ps` - # easier as well). The alternative involves tracking metadata+state in - # a multiprocessing.Manager, adds polling the manager to the request - # task handler and in general gets messy fast. - run_simple(host, port, app, threaded=True) - - @Request.application - def handle_jsonrpc_request(self, request): - msg = 'Received request ({0}) from {0.remote_addr}, data={0.data}' - logger.info(msg.format(request)) - response = rpc.ResponseManager.handle(request, self.task_manager) - json_data = json.dumps(response.data, cls=JSONEncoder) - response = Response(json_data, mimetype='application/json') - # this looks and feels dumb, but our json encoder converts decimals and - # datetimes, and if we use the json_data itself the output looks silly - # because of escapes, so re-serialize it into valid JSON types for - # logging. - logger.info('sending response ({}) to {}, data={}'.format( - response, request.remote_addr, json.loads(json_data)) - ) - return response - - @Request.application - def handle_request(self, request): - raise NotFound() diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index 3e7903a3736..7c9a7c6b418 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -2,7 +2,7 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType, RunHookType -from dbt.node_runners import ModelRunner, RPCExecuteRunner +from dbt.node_runners import ModelRunner import dbt.exceptions import dbt.flags @@ -11,7 +11,7 @@ from dbt.hooks import get_hook_dict from dbt.compilation import compile_node -from dbt.task.compile import CompileTask, RemoteCompileTask +from dbt.task.compile import CompileTask from dbt.utils import get_nodes_by_tags @@ -29,14 +29,19 @@ def run_hooks(self, adapter, hook_type, extra_context): ordered_hooks = sorted(hooks, key=lambda h: h.get('index', len(hooks))) - # on-run-* hooks should run outside of a transaction. This happens - # b/c psycopg2 automatically begins a transaction when a connection - # is created. - adapter.clear_transaction() - for i, hook in enumerate(ordered_hooks): - compiled = compile_node(adapter, self.config, hook, - self.manifest, extra_context) + model_name = hook.get('name') + + # This will clear out an open transaction if there is one. + # on-run-* hooks should run outside of a transaction. This happens + # b/c psycopg2 automatically begins a transaction when a connection + # is created. TODO : Move transaction logic out of here, and + # implement a for-loop over these sql statements in jinja-land. + # Also, consider configuring psycopg2 (and other adapters?) to + # ensure that a transaction is only created if dbt initiates it. + adapter.clear_transaction(model_name) + compiled = compile_node(adapter, self.config, hook, self.manifest, + extra_context) statement = compiled.wrapped_sql hook_index = hook.get('index', len(hooks)) @@ -48,7 +53,10 @@ def run_hooks(self, adapter, hook_type, extra_context): sql = hook_dict.get('sql', '') if len(sql.strip()) > 0: - adapter.execute(sql, auto_begin=False, fetch=False) + adapter.execute(sql, model_name=model_name, auto_begin=False, + fetch=False) + + adapter.release_connection(model_name) def safe_run_hooks(self, adapter, hook_type, extra_context): try: @@ -74,10 +82,9 @@ def print_results_line(cls, results, execution_time): .format(stat_line=stat_line, execution=execution)) def before_run(self, adapter, selected_uids): - with adapter.connection_named('master'): - self.create_schemas(adapter, selected_uids) - self.populate_adapter_cache(adapter) - self.safe_run_hooks(adapter, RunHookType.Start, {}) + self.populate_adapter_cache(adapter) + self.safe_run_hooks(adapter, RunHookType.Start, {}) + self.create_schemas(adapter, selected_uids) def after_run(self, adapter, results): # in on-run-end hooks, provide the value 'schemas', which is a list of @@ -87,9 +94,8 @@ def after_run(self, adapter, results): r.node.schema for r in results if not any((r.error is not None, r.failed, r.skipped)) )) - with adapter.connection_named('master'): - self.safe_run_hooks(adapter, RunHookType.End, - {'schemas': schemas, 'results': results}) + self.safe_run_hooks(adapter, RunHookType.End, + {'schemas': schemas, 'results': results}) def after_hooks(self, adapter, results, elapsed): self.print_results_line(results, elapsed) @@ -108,10 +114,3 @@ def get_runner_type(self): def task_end_messages(self, results): if results: dbt.ui.printer.print_run_end_messages(results) - - -class RemoteRunTask(RemoteCompileTask, RunTask): - METHOD_NAME = 'run' - - def get_runner_type(self): - return RPCExecuteRunner diff --git a/core/dbt/task/run_operation.py b/core/dbt/task/run_operation.py deleted file mode 100644 index fe90649d1e0..00000000000 --- a/core/dbt/task/run_operation.py +++ /dev/null @@ -1,64 +0,0 @@ -from dbt.logger import GLOBAL_LOGGER as logger -from dbt.task.base import ConfiguredTask -from dbt.adapters.factory import get_adapter -from dbt.loader import GraphLoader - -import dbt -import dbt.utils -import dbt.exceptions - - -class RunOperationTask(ConfiguredTask): - def _get_macro_parts(self): - macro_name = self.args.macro - if '.' in macro_name: - package_name, macro_name = macro_name.split(".", 1) - else: - package_name = None - - return package_name, macro_name - - def _get_kwargs(self): - return dbt.utils.parse_cli_vars(self.args.args) - - def _run_unsafe(self): - manifest = GraphLoader.load_all(self.config) - adapter = get_adapter(self.config) - - package_name, macro_name = self._get_macro_parts() - macro_kwargs = self._get_kwargs() - - with adapter.connection_named('macro_{}'.format(macro_name)): - adapter.clear_transaction() - res = adapter.execute_macro( - macro_name, - project=package_name, - kwargs=macro_kwargs, - manifest=manifest - ) - - return res - - def run(self): - try: - result = self._run_unsafe() - except dbt.exceptions.Exception as exc: - logger.error( - 'Encountered an error while running operation: {}' - .format(exc) - ) - logger.debug('', exc_info=True) - return False, None - except Exception as exc: - logger.error( - 'Encountered an uncaught exception while running operation: {}' - .format(exc) - ) - logger.debug('', exc_info=True) - return False, None - else: - return True, result - - def interpret_results(self, results): - success, _ = results - return success diff --git a/core/dbt/task/runnable.py b/core/dbt/task/runnable.py index 0b107d1819f..0084bdf83b6 100644 --- a/core/dbt/task/runnable.py +++ b/core/dbt/task/runnable.py @@ -1,15 +1,9 @@ -import base64 import os -import re import time -from abc import abstractmethod -from multiprocessing.dummy import Pool as ThreadPool -from dbt import rpc -from dbt.task.base import ConfiguredTask +from dbt.task.base_task import BaseTask from dbt.adapters.factory import get_adapter from dbt.logger import GLOBAL_LOGGER as logger -from dbt.compat import to_unicode from dbt.compilation import compile_manifest from dbt.contracts.graph.manifest import CompileResultNode from dbt.contracts.results import ExecutionResult @@ -21,6 +15,8 @@ import dbt.graph.selector +from multiprocessing.dummy import Pool as ThreadPool + RESULT_FILE_NAME = 'run_results.json' MANIFEST_FILE_NAME = 'manifest.json' @@ -36,20 +32,11 @@ def load_manifest(config): return manifest -class ManifestTask(ConfiguredTask): +class BaseRunnableTask(BaseTask): def __init__(self, args, config): - super(ManifestTask, self).__init__(args, config) + super(BaseRunnableTask, self).__init__(args, config) self.manifest = None self.linker = None - - def _runtime_initialize(self): - self.manifest = load_manifest(self.config) - self.linker = compile_manifest(self.config, self.manifest) - - -class GraphRunnableTask(ManifestTask): - def __init__(self, args, config): - super(GraphRunnableTask, self).__init__(args, config) self.job_queue = None self._flattened_nodes = None @@ -59,14 +46,12 @@ def __init__(self, args, config): self._skipped_children = {} self._raise_next_tick = None - def select_nodes(self): + def _runtime_initialize(self): + self.manifest = load_manifest(self.config) + self.linker = compile_manifest(self.config, self.manifest) + selector = dbt.graph.selector.NodeSelector(self.linker, self.manifest) selected_nodes = selector.select(self.build_query()) - return selected_nodes - - def _runtime_initialize(self): - super(GraphRunnableTask, self)._runtime_initialize() - selected_nodes = self.select_nodes() self.job_queue = self.linker.as_graph_queue(self.manifest, selected_nodes) @@ -127,7 +112,7 @@ def _submit(self, pool, args, callback): This does still go through the callback path for result collection. """ - if self.config.args.single_threaded: + if self.config.args.single_threaded or True: callback(self.call_runner(*args)) else: pool.apply_async(self.call_runner, args=args, callback=callback) @@ -244,8 +229,27 @@ def after_run(self, adapter, results): def after_hooks(self, adapter, results, elapsed): pass - def execute_with_hooks(self, selected_uids): + def task_end_messages(self, results): + raise dbt.exceptions.NotImplementedException('Not Implemented') + + def get_result(self, results, elapsed_time, generated_at): + raise dbt.exceptions.NotImplementedException('Not Implemented') + + def run(self): + """ + Run dbt for the query, based on the graph. + """ + self._runtime_initialize() adapter = get_adapter(self.config) + + if len(self._flattened_nodes) == 0: + logger.info("WARNING: Nothing to do. Try checking your model " + "configs and model specification args") + return [] + else: + logger.info("") + + selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) try: self.before_hooks(adapter) started = time.time() @@ -263,28 +267,10 @@ def execute_with_hooks(self, selected_uids): elapsed_time=elapsed, generated_at=dbt.utils.timestring() ) - return result - - def run(self): - """ - Run dbt for the query, based on the graph. - """ - self._runtime_initialize() - - if len(self._flattened_nodes) == 0: - logger.warning("WARNING: Nothing to do. Try checking your model " - "configs and model specification args") - return [] - else: - logger.info("") - - selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) - result = self.execute_with_hooks(selected_uids) - result.write(self.result_path()) - self.task_end_messages(result.results) - return result.results + self.task_end_messages(res) + return res def interpret_results(self, results): if results is None: @@ -293,6 +279,8 @@ def interpret_results(self, results): failures = [r for r in results if r.error or r.failed] return len(failures) == 0 + +class RunnableTask(BaseRunnableTask): def get_model_schemas(self, selected_uids): schemas = set() for node in self.manifest.nodes.values(): @@ -332,44 +320,3 @@ def get_result(self, results, elapsed_time, generated_at): def task_end_messages(self, results): dbt.ui.printer.print_run_end_messages(results) - - -class RemoteCallable(object): - METHOD_NAME = None - is_async = False - - @abstractmethod - def handle_request(self, **kwargs): - raise dbt.exceptions.NotImplementedException( - 'from_kwargs not implemented' - ) - - def decode_sql(self, sql): - """Base64 decode a string. This should only be used for sql in calls. - - :param str sql: The base64 encoded form of the original utf-8 string - :return str: The decoded utf-8 string - """ - # JSON is defined as using "unicode", we'll go a step further and - # mandate utf-8 (though for the base64 part, it doesn't really matter!) - base64_sql_bytes = to_unicode(sql).encode('utf-8') - # in python3.x you can pass `validate=True` to b64decode to get this - # behavior. - if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', base64_sql_bytes): - self.raise_invalid_base64(sql) - - try: - sql_bytes = base64.b64decode(base64_sql_bytes) - except ValueError: - self.raise_invalid_base64(sql) - - return sql_bytes.decode('utf-8') - - @staticmethod - def raise_invalid_base64(sql): - raise rpc.invalid_params( - data={ - 'message': 'invalid base64-encoded sql input', - 'sql': str(sql), - } - ) diff --git a/core/dbt/task/serve.py b/core/dbt/task/serve.py index dd3af94f17b..d8ce756b75c 100644 --- a/core/dbt/task/serve.py +++ b/core/dbt/task/serve.py @@ -6,10 +6,10 @@ from dbt.compat import SimpleHTTPRequestHandler, TCPServer from dbt.logger import GLOBAL_LOGGER as logger -from dbt.task.base import ProjectOnlyTask +from dbt.task.base_task import BaseTask -class ServeTask(ProjectOnlyTask): +class ServeTask(BaseTask): def run(self): os.chdir(self.config.target_path) @@ -31,7 +31,7 @@ def run(self): try: webbrowser.open_new_tab('http://127.0.0.1:{}'.format(port)) - except webbrowser.Error: + except webbrowser.Error as e: pass try: diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index f20b9897924..422214bb780 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -1,24 +1,23 @@ from dbt.node_runners import TestRunner from dbt.node_types import NodeType +import dbt.ui.printer from dbt.task.run import RunTask class TestTask(RunTask): """ Testing: - Read schema files + custom data tests and validate that - constraints are satisfied. + 1) Create tmp views w/ 0 rows to ensure all tables, schemas, and SQL + statements are valid + 2) Read schema files and validate that constraints are satisfied + a) not null + b) uniquenss + c) referential integrity + d) accepted value """ def raise_on_first_error(self): return False - def before_run(self, adapter, selected_uids): - # Don't execute on-run-* hooks for tests - self.populate_adapter_cache(adapter) - - def after_run(self, adapter, results): - pass - def build_query(self): query = { "include": self.args.models, diff --git a/core/dbt/ui/printer.py b/core/dbt/ui/printer.py index e92816d5b97..3f81711b38b 100644 --- a/core/dbt/ui/printer.py +++ b/core/dbt/ui/printer.py @@ -146,11 +146,17 @@ def print_test_result_line(result, schema_name, index, total): result.execution_time) -def print_model_result_line(result, description, index, total): +def print_model_result_line(result, schema_name, index, total): + model = result.node + info, status = get_printable_result(result, 'created', 'creating') print_fancy_output_line( - "{info} {description}".format(info=info, description=description), + "{info} {model_type} model {schema}.{relation}".format( + info=info, + model_type=get_materialization(model), + schema=schema_name, + relation=model.get('alias')), status, index, total, @@ -163,10 +169,9 @@ def print_archive_result_line(result, index, total): info, status = get_printable_result(result, 'archived', 'archiving') cfg = model.get('config', {}) - msg = "{info} {name} --> {target_database}.{target_schema}.{name}".format( - info=info, name=model.name, **cfg) print_fancy_output_line( - msg, + "{info} {source_schema}.{source_table} --> " + "{target_schema}.{target_table}".format(info=info, **cfg), status, index, total, diff --git a/core/dbt/utils.py b/core/dbt/utils.py index b194f0aa5db..edf283bda6b 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -1,14 +1,18 @@ +from datetime import datetime +from decimal import Decimal + import collections import copy -import datetime import functools import hashlib import itertools import json +import numbers import os import dbt.exceptions +from dbt.include.global_project import PACKAGES from dbt.compat import basestring, DECIMALS from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType @@ -166,7 +170,7 @@ def merge(*args): return args[0] lst = list(args) - last = lst.pop(len(lst) - 1) + last = lst.pop(len(lst)-1) return _merge(merge(*lst), last) @@ -190,7 +194,7 @@ def deep_merge(*args): return copy.deepcopy(args[0]) lst = list(args) - last = copy.deepcopy(lst.pop(len(lst) - 1)) + last = copy.deepcopy(lst.pop(len(lst)-1)) return _deep_merge(deep_merge(*lst), last) @@ -420,10 +424,9 @@ def parse_cli_vars(var_string): dbt.exceptions.raise_compiler_error( "The --vars argument must be a YAML dictionary, but was " "of type '{}'".format(type_name)) - except dbt.exceptions.ValidationException: + except dbt.exceptions.ValidationException as e: logger.error( - "The YAML provided in the --vars argument is not valid.\n" - ) + "The YAML provided in the --vars argument is not valid.\n") raise @@ -439,7 +442,7 @@ def add_ephemeral_model_prefix(s): def timestring(): """Get the current datetime as an RFC 3339-compliant string""" # isoformat doesn't include the mandatory trailing 'Z' for UTC. - return datetime.datetime.utcnow().isoformat() + 'Z' + return datetime.utcnow().isoformat() + 'Z' class JSONEncoder(json.JSONEncoder): @@ -450,9 +453,6 @@ class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, DECIMALS): return float(obj) - if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): - return obj.isoformat() - return super(JSONEncoder, self).default(obj) @@ -473,10 +473,8 @@ def translate_aliases(kwargs, aliases): key_names = ', '.join("{}".format(k) for k in kwargs if aliases.get(k) == canonical_key) - raise dbt.exceptions.AliasException( - 'Got duplicate keys: ({}) all map to "{}"' - .format(key_names, canonical_key) - ) + raise AliasException('Got duplicate keys: ({}) all map to "{}"' + .format(key_names, canonical_key)) result[canonical_key] = value diff --git a/core/dbt/version.py b/core/dbt/version.py index ab46bd02992..752926a2f59 100644 --- a/core/dbt/version.py +++ b/core/dbt/version.py @@ -1,4 +1,5 @@ import json +import re import requests @@ -56,5 +57,5 @@ def get_version_information(): .format(version_msg)) -__version__ = '0.13.0' +__version__ = '0.13.0a1' installed = get_installed_version() diff --git a/events/schemas/com.fishtownanalytics/invocation_env_context.json b/core/events/schemas/com.fishtownanalytics/invocation_env_context.json similarity index 100% rename from events/schemas/com.fishtownanalytics/invocation_env_context.json rename to core/events/schemas/com.fishtownanalytics/invocation_env_context.json diff --git a/events/schemas/com.fishtownanalytics/invocation_event.json b/core/events/schemas/com.fishtownanalytics/invocation_event.json similarity index 100% rename from events/schemas/com.fishtownanalytics/invocation_event.json rename to core/events/schemas/com.fishtownanalytics/invocation_event.json diff --git a/events/schemas/com.fishtownanalytics/platform_context.json b/core/events/schemas/com.fishtownanalytics/platform_context.json similarity index 100% rename from events/schemas/com.fishtownanalytics/platform_context.json rename to core/events/schemas/com.fishtownanalytics/platform_context.json diff --git a/events/schemas/com.fishtownanalytics/run_model_context.json b/core/events/schemas/com.fishtownanalytics/run_model_context.json similarity index 100% rename from events/schemas/com.fishtownanalytics/run_model_context.json rename to core/events/schemas/com.fishtownanalytics/run_model_context.json diff --git a/core/setup.py b/core/setup.py index 29714da5d23..b29bb8fd1fd 100644 --- a/core/setup.py +++ b/core/setup.py @@ -9,7 +9,7 @@ def read(fname): package_name = "dbt-core" -package_version = "0.13.0" +package_version = "0.13.0a1" description = """dbt (data build tool) is a command line tool that helps \ analysts and engineers transform data in their warehouse more effectively""" @@ -51,8 +51,6 @@ def read(fname): 'requests>=2.18.0,<3', 'colorama==0.3.9', 'agate>=1.6,<2', - 'jsonschema>=3.0.1,<4', - 'json-rpc>=1.12,<2', - 'werkzeug>=0.14.1,<0.15', + 'jsonschema==2.6.0', ] ) diff --git a/dev_requirements.txt b/dev_requirements.txt index b2f7fb82fc8..cc33b84fbb1 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,10 +1,9 @@ freezegun==0.3.9 -pytest==4.4.0 -pytest-cov==2.6.1 +nose>=1.3.7 mock>=1.3.0 -flake8>=3.5.0 +pep8>=1.6.2 pytz==2017.2 bumpversion==0.5.3 -coverage==4.4 +coverage==4.2 tox==2.5.0 ipdb diff --git a/docker-compose.yml b/docker-compose.yml index da9b389fefe..176c6f50837 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: build: context: . dockerfile: Dockerfile - command: "/root/.virtualenvs/dbt/bin/pytest" + command: "/root/.virtualenvs/dbt/bin/nosetests" env_file: - ./test.env volumes: diff --git a/etc/dbt-horizontal.png b/etc/dbt-horizontal.png index 2d2549089222496ec86b65423fe0122f29a5d4d7..03dce6d02bacdca4c5eb5198a0bafbb89f0ef869 100644 GIT binary patch literal 18063 zcmX_n1z42b^R~VysH6hIlG5GXFD%{NrF6^Et;Ev3bT8fAB_gpPt#k-1A>AGShxhkg z-^E4k!gJ=#nKN_G%st_%$}*U5h~7MV_6$=_R#N@hvwtLkpW9!d0$+pMzjg!PURa4M zi9dT*8;gGT@g?y42Qyi9rDxB)X`em&68!Ah4e-*J?Pt&2*q=SyF@5$-F!k9pg3p<4 zYQn%9sGk&MB%eJ#edKl)CjzgaxymU?p{=3g;9!#%(f!ze_Dm{KPEuUcbAErxBXd!! z_3HR?PKhe+nT_m#)QfXCWklypIb1H;RL5JZ#-r1)`sGA9ch7RwOys{3l6#%Yc2WUw z`pJSKwRayVEIj-JG7tAg9WC9hJgnTU+;hfu8>@f%ZY@}GZgGtXjJYHTwyx^QB$FR} zk&(|1qFe9L4bx+o3v_G2dStpL^AA0*S@B@2y8PUO_vzeuN#51<=wD&dF_4+TVm%}DEH`H-;a$NWUIpfk$emYc4vu3%%-mo|hPoGSXZzN-B1zO9>wK`T z2&T_;dzVWJv^Q-*?@fVcBFDyiJT2BjwiABNI&OT;sUB&Ot|NJ5LHD$oD3*u{1fDp- zQGlZ*8A1ka{>ZV3vCOEops-T+&OZL`oz=Qe3;D&Za@d5XIq)o3_&^swS+$%1k>)+F zo{!$^fB^x9>o*4>AvqWvZ-7ThODj<@nwMxK=U%So+8X?#04FUh1`U5!mI)P0miNBA zcbA>e{CLi>v@!O5G=b_!kR)>`zCRwJ`Z#6I#roaH8J9vz9Op&PULx^A$F8 zNz=tEixNn?!I6OP`{sYcCXklrwM*=zDt*M%V?Ta&#SUy+lq8U^HC|=>_>K;ac-qm; zY!!mRoPL$i7q4B@zy*U-vxp!7mKli;IUDR+dBc&eh(`%K!pV$Ck>5-rAUetoj2~9@ z$|6Tl27q6w;k*!6WkX4aMR0(}dI)N7FYI_Ep^VM#CS*Ei&Ufz&z!CE~62Yc#jiVE5 z0K^smkw71CSW zC%T4WM?AIwOcrId<<65`&qLFl&_2->c?*uvPx&q5ams(!-^Kr2=3{IZzBC({#-7DE zD_K3Mv20j14ShqgDf@pC(cf$oqIAY^bAsAJCh@OcPMgM%kn)pYE0lJBUeDdP6&o91 zvZ4}s!$h;Scm}u7Vf?$aMNA=G`F+RirftCe!cIn3xwpH7epWypH2>i`Pq=9hRbb}~ zrxpDmJeB8jyLzP`X?6m-JdKXhS?7}>>1PbRns7`w{1cC&U2;Es-|orR89;shWZ8>X z9Tpor7jg)L4f~ullvv)Y?V$HsV#_R7!lLE9Bi?Xg2RMVdHEMPspL}-SNvCB4=a~nh z{YXjb`B=o*k5NyWWvV$AwlW6r#{P0}Y)_zyB#a(7vl8XD)j7W9%9?V%Mnav~g{VX6Q4NPr%apqRN)5jlu>!7*1!!I=rp+)Wk(eG# zgLv6=Qya5y5eC0G{5&)o`n9+f$o%;*^-m>!y(?rVot zX-<#lVTQ{KyXU%|r;#bIIc!O!G)pw9sLljhb+>S}e&G>)n^oU^tO^svQb^t}r)kF2 zHS}8%qSrCNeiEJt7E*%CwNr5nd2(lM0|)kTR_!bPGx;fbCl_)t_E{kcG?6qyiyiz0 z*31>6Z3{c`N2AWI zU8kJDXc6DhcbW6nub7n~ohq$%scL`~j8aYI^#&^*4VvR}x@HmZ@8L3&*|=LLum)SS zKfwUM#8CZ2xi4Kh75F;!`c$_XZ$ur>k7TpH0n?!W?|W3>XdUEbR9s;GXc?7#UH zTDDGbDP~ervDkP_xY5S3VZ^5mWt_>PDz>PGWm3#-ZPE-cc2HAF_o@w!V+a$Q^j&+l z9LG~I2(Z)m6y6NnkOYhDTroRnO7a9fo$C9fpt9<}KHO)nBr4iJU9<~ku=|X4)mwAJ zIW=4DRXP-6-jgOJwDPg(Z6+5|NC)Y;xi&Av94OO}L0yh`?#eYgi&PseFwE9xGtq-1eL)MB0F+6u}4AUz!CJ(NI^+)x3;#n!4eR@~8{ppj;z_#MvShqEWvq8oZlY(nziI&Oa9U znCG$LWTgI{0%E>B!ml=A#48S{Wr_5Py!7mK>6u5N=Dgi?;kswJLC~AG>Cs#3nBl#! zfkdTDjHH=sre(RmLbJlw?E;>9@}u7lz+}~r*SpTjgR1O!A@+MggNzuW-J+U4p zpHT>s7xVtr?m)46CxNFZ!btmR-$-GgnVvLa6-l`oa`I(i_;@%#G+@q_#)Ud`t?%S2 zF+2(bbJ?*=QxJblOFrjEz|y4O5B*fePYpXIj^1uNN7jAzXQd}IM?Z#fgkh5e$0CT! z^Dzr|oa~&FKi8yCVdKZsjN^Ofw$2`;Y5HB~Z;okFtvM&xMO1Ngu4qGtoBl0Uu#G@W z@*a<89}(tk*NOcaomob~RPN&y3njQf-D?Ep=p%EoFUd;lMi>=uk8z5!GKapKrirGt zSg8~5%@W!od^(Oo$`wLD!axwnY$Y~I=-qT%HdYgB*X1hLTAgrUW{8^=Xt6(a$4 zOp27dko3@d@*gCA5)A+X$^%z3zY1VtBd`3`P=lf6*o`|em?s+pO;JH)(Q7o_9Bl3MiuGETd{~=KZ$~cF6sMv8t zHkwq?az02k-J4C&tiz{IA@LeJ$tpkKwfmB?ZP`*Jf(jhaZ#6+Yk@s}5$WUF=v^29H zY~}qMm*x}Ipf|*KSi(_1rljH#Q9G>1?U;%K#J@?jK!!*2#WuUUKJ4lud2L z>c{Im>%b-iKW$2ht|rt98?WLQ&rV)*@cb4xqkErDj@503Xkj<`_M}vGj zcX*b}ZI%?W(f(STm>!arIkLN1Y(ExKj`;fzsy-Rm8z>$;;G6JKO$?M^Kj6li8?)}(Trv%ZN7fCpurI+CuI&~fCF_d?S7|3#4D%PLj|H%{m zcUe=9w}-RAxM1$ddt>HSy;9q8njPz#g4L%>KzHUGL)V`&( z{P=^bn_GMU^H{tU>1+h?pPSw4ob9QMvtKllw}#H)8OALb{y~r54J4pMjYfmx^cLIi z5nhxdqOSsZ^Kr;iSuYv)VpvJ&$H1)t$9L$STar14nt96{l6nzXCeuM9j&!D6faBf?wn$Kksd;^Rbn!PgSd`Rv-WkI7RX6Dz=%aJvAXz#lmVtA~9p=cu*=n~_V)Z-Tg#;0^BEfTzSDkd|Zf z)mz4?Bl1d4%zzz)>IA7aCBeEN^~RubXx;_aQ6me33aU>A@{km9D1YY-faUYdBSSj>os5<-wXAfpkpsz3uahjYJh{kpS+IQQVdu%a6W!F)&$oC<6iP$1 z3bS%2vYAkYW2Fuy$e*3{HX$AInWg3v_!u+Wqr=21n_w2rn1}~PS7@{YCAfw}j1mq8 z?z*^(jbe&kTEfcCkwr%(#CdQW=h7ZodR~9bkNC*X;0)?&*t|L@AFto$@|&QH6ip(p z7Rd8q0cr)*V?A=7?<|{YFqWt%_kv6(RXKWn7#|n|;!+~IjawdWcIj?hgBWCcgRKvf zQd+zab;iEZF|xu@ae1`UpdDE+_%}Y;7}!?I?a%b}_Q$W2&p2*eDWvJ#o#Lnz#ePh} zIbTYuAA!W@uW2)o$B7>9Zf=$FcCkxQ3rNRPZ9i-Uvz5N! zJt0C#Gb+inT|ApB224f^GEU7~qELb6j|U^!p14>8*9p}X0(rNNHx^+JVG}@JcDS>5 z)|yQw_N|nk2wh(mS_LE=c%0lkCDyW5pZN4H$fH>InE1-t2jz%`wh7?M_wgZQdSSf7 zl&(38aaDaREv@C{yQl(Y&W(nTc9GVu(jy<9&x2WCLg=x2f{vbfSzdxpe6-kTt~=*G zbbAE!Jg(^&85sqnY?RtSG!17klGa(v>y=#SoU$kaHZ}S>9l2?)2f$9~Lz^v=suc6L zbqb4hu4(EBDF$q$pZAjsZLD@qbtykxrY51kRuLvR9B`V>95*V1QR-eHL^*Egmcz+q zmxJh3%hKNRDRVaHWz%y^?$HVK5%?xZZJD+bUphk^Dwj6sCBXdx?_tDq=XE$s!FHc{ z9vBt0sABnSO06@u*7Z(eYkQID*7Zc6!>ES`zpowQbQo6cUf2mt-Y->ERsH7Hj$*9Z z|IM%VeqSGk9MqX4Y1`P_x19J&>>T0iRLngRh9id{g}9IBbI_SvhuM*AE=oH)Sq>T{ z$9_voV5<_S^)V9=)I@w&VQs#E9XQB$g_hHIz8pXVwV)k%`%K_F-Ig7oCpg+H54 zCP6G(p}nnbdg>@_LOh*`>ip=-T6LPes^nskAm$@a!%Re%*JxL0btnd=b98U!Lh1pg z55WZd@dufm@?O*CkAr<}ea{{UI-6n4q=ToBF&$>qXjDGN5(}VB7T^ql48?NI1_Hit z$fS@7*|e>~z?2GQHoHZ@-~Y;u%d>&--RdAOA;LgnG_?`-i|)uGq=mc2fB}(%DE+pA zB!8iiT@)O8sqg%{v_XzBiw!$$m(IMT%7u8U^REf70@k%ZMWOJZ-@e>ZS?$qxeCiCh zt9Ql>DvLtdoRTpZ_o-o6O0;j@?tG)#Sj%HNA_AbCq?{_d1f%)3r>~~}BW-$`XS|`u ze&M!#9G()FTktkp0}`kN7SG*pbSLeL-~u726@Ux&-OPoVwvQVy;cSY6?Ld4O> zkYTo}+Z7sO+s%ZFR-2mR9hw#2J@~1TUH+DLMNGhP z*(;x`|6&Mmq!YSCxY(nn(@XlWbi)?;9zllqoSG|7i)rdN9Vk8$A7W9&^*NyHG`a-7%VJN!}nPr5R?(a<6e!lgaYF0fcV`H4eeH5dkrBsU$b* z5cz0p7ni=J(5J&L!-|rTmI~daAsG5wa}NYWA2Li>9w&CZuY7TFT!o02&H0olB;;j| zuK2-$FsVuM8wMhT*GxcXzz>2m`jJOZ$d}P4{G>Ul z(Hg&-+*5J}u8nk^#4O^CI=rv6=o7}nY%J|RW{ys~2qd6TkyzS6pk|1|kfm~%SHd7Q zs1B=0>Hs|!Y8P1B&$R1R$@(Sc?w4AB$&O?jLNN*AEm-UHjnB-i{R|%~RlfLf8xbHg zY(G-T_KVSwil7%G%48%u6PmM#%gKG`#jOS%nsH?xO?H*55V3g0AQI0`i->;G{rN4mu8!3{06LTv&zEH>GI8BYVcA@NsipMz#>5G6ytQ_ zE~L*%?oeD_cv9X;9o;I*4k32UpF4bNJ8Gx+;(O~~=gTa`bJek$`4>g-39p8`Y*9c= z212gTRm1&jW7<~kZ+ZEP{%DCNVY~>2OYxo-$*_wyq&h>&q?nj96QGXys%uWo%~u_) zG+DdKm83L*FSjZ`=*0TtR@si98z)>jfA0Vc7xf}xAWhgDu0l=7h27`X*S$YP2FaV7 zFwYv9PK{Wo{7KsK5v~GHBAnqoFGV(AsROZW0jO{GG{7=hb^LfT@$eTOUpu9o{M*4{ zcqq1!&J6(5`3!h%8iPl2l(nC>)}s%I+x9 zutk$HnMZwH0SV4i98|sE(zlqSw&cq%UMW~_0j zj$=qQZD6fYP|@IW{$tzds1e0P2E-u^Kfh!7#QtJJi&tP4-6=jm3vGXdY<2~i@O6v$ zn#9dzLtip6tvd{KfW}@$ej~!x)o^!YcV+Vg*@1_mA^?laPZ9<}6XDleRUhWQV9Qp4 zR?m~C_MF}~@Zfj1#Bv@dnxcxckl7-0#t>u_^HJq=H`Myk;a}cB7Fx{wG#m%K0NyEz9Kc+Ols+)2T_Q(I+J7-Z=J2UkhzGE^6H^v3Q`{ z!^Da^^B4TsF*;{yyRi)-1`0X$Yn^{;yFSO7anL@#G0$mQSC+K4#+mBB{o-TnTWVke zdEslK+#BFFsOq=yTX=?OhP!V6n@i0L6XVOa!h17l8T(@<{sF%qgO_dWmR$1myhq>p za-nQ}&advyqG$M+158NP@{rQH8*I_=7#QEhu>+vA_Lm8|IQ zf5pUEIUpWcyH4?Fq?Z`=#&QS72IB>%#%wF=(vx4sh=q^6**PAMFYo3H9sLk*FV}q$ zTu6>r6kuE+m8n0{Z=D{m+@WYiBTwx9Ove86+&CiVTSu(4tM$9_5Q}x!ipnco*2G9J zVwRhByz{GW8OW+5A^l>7 zK*_@9^(**Hl(C&LHbBWaXQ{K(xPjTn_eIVe3v2e~EjkK;PZj-J5b$ zCC3cDlz05zC{=b%bd6ptoNY zz6uWkH#!3ET*dOcSMV3kO^tMPW(2Qq*IYa?l{K!xWnMoMU)sx3l<}`XQg-9;9tjSt z*SuDO3{bNS#;Iu)!!A0u)tfud!qmgZithSuj^wDp1{=@qR$zCk50KV;SZ|H{?H;5C zfUGClLI}0GUSu0cB?)zum|K}s)8D;<)G$V@TF9aF#G=drbeVKAPU&)J zWX2@z6Eo94$9`%)3db-XpJmRL)U`a^B1QL#keLD9*NPZj5%AS|1=- zZG?+Txmr&i5t2+I&F&1VB2Tf`2u{q`fSh~g_wV0K!!!2RLM}73uKaqICl(JrG5Tb}qtI*jA!UhRwW;lM`}4 znV=~{5aVM#hDwN#GBGXY;~xHoMl1bsr18r99#d*4el` z0C4%Vsr#+F^$&HOJlpHzB@e5bCM}=*uw%OQ?2n~!v3p$+jF2kc6Zr`1_?!eLc!?2z&22TR(%hLO#euQOEdxydV`vn#%h%c z*7QrT7;ufP?!$KY7H@aV6NP0VjhR}PER=%WpptD?pTNvl;~3t^gT%OP1__Ez8eedg z$1i5fA&tZr;#&F=VG)j8S5=E)O+gq*RD9iq0-Tzbt0BW`Z8?)NNw@PEY+wtlJ~d0G z;@O!332%R@;?>80HNjc-KYuOijaMgiqXFxjU*&)LshcP(5f(^c{hFfekKo-Wpoxhg z>ezg0iBnLk*r>fP|F&8vG-h`RRkg~+jUMu zth2T4E(4SEEJO)?d>gA5_L_A?A{^^{7?!mBrzJu9u`bS+Ekii({o!nrC#1eEndT)w zIT2IT2cEr0YVe`%-22iise2qR(HhGd{R7_DMXWj+%49ZgTrjIV2wuFzHBPNffMa&a zDVwdB1t*m|X}vN|EMC8&@aIk5HFWvZtS%=b@2-IO>KT{$P7sBdrg{AkR`q+=F+3z^ zk027Z$VMRO`*ld4eI$`0!N^&EsC0aH0CF#G;9`n<>6exI8B*p_E}Zsvk9I$xYVp7! zdm9~7KQ{Rt<@jG?kq2h4dG~5ekuNubB#Y*tdCIZk1{>~)JRUmt#KJ8O1~0iG+=Taj8A7!vrjTmh3OMoY2k*8 zX~d@0b?`+7(635Qt%N?<1p^%5LK~84Ms7K*+wkS; zEuLP9PY^dk4q=Sje#kUQWOaQxp7{jBk_7 z!K7Ov^Sh*1>i$RctvD8Kr_tb8nm4qh?dlu#3xwWD3XU%{(kgWqe%z1YNZ|d-EetLR z)Eau(wo}gKihH0l8fY7X(Q}w|2D2(Bk9h;~t`c6j%`;0aaT7GARavh!sU1BB+|1{# zC3+B}n4mU?i>vGe>{GVzupifKK`Z9RaWebF0UVx4<;rQUcMTYojFKMszthmv+qc)m zkl5Zt&wVQZVZXc9m20WInEvTfS6v%_!1#59V7C6svOf;3bQAxj%bTsbVCF zKGfSe$Xt7*lSdkhxBN6k0SFhM>={!KWvH)8valw^BeBY(rE0*)xy^jb`#_GJA+^|R zosGAggpW|s;=pOGO99pQFRHj0ux0w9RYHHqx{(&CO12$o#B&?Y~LpOW^;%J zh^H#fZxLn@^1r8T>M)YLXxmm}2o<<(O6iAYD_$ERqW?67cTMNg%yM2B{VLGmz?Hzk zDAEd{2aUhvMJ}3O%1$$&|N7Hu9Y;c)wL>W8eE!yNB9E>B`L^*wL>k1HGldRA)8)JI zw&OTz1VN@3mq{yc>d`9_V0}9#WD4@!ZtN_Zg~WMx{_QXCi2ct z_;dDkY@$dM6NMW_Pgg+g4gnp|A=UF!$HjB3RzD(7rQ1K6q?U{4$zku5KEeGJ*vS;9 zgc;gYB*OxU8elj}4?B)h@4k^m9KUh8`4`S~e4cBj`XOPo2bHTNE zh1G+y+Z9nt2~IQmEa)Ciug3nSD3Sp`@F$kpij>my@Kn%?9`}Y!oFsWc^%FL#)W)u6 zPX~vx3fBcsP@u>$3FvoUU@9EOorJCJey1b;-N{FDctm*IQDFo$LuX&X?04E}( z50TYKvMrK=VoF&31{$0`K)L4kjLL(~NvQY&W!>#f^oir+Njy8uDu-uX63waj9GtP;i7Eb5jGf^ znU{2o3j4w)U&Abc%FEwB4%5X*j5~|LbtO?ixt@YCAK;RS!RP52b$1_JwF^wpG=d{8 z@8Z*S-rf_9L_<&=MuX8O%JSQW%^YK3bWBUxc^js*7ge4cRxOn~?$Jqkfs?+zFAh`o znl5040E3``Q~rz&uuV;oWn>ZsQ1ablO2t^rYfZC63;X(8BaQSpmGW3T~Gqk>3NN0{iAUF4X(TROs{h$ zEw~O<$Fh4oP*F-D$=7JRC4Ea{>vHgIllCA$jABe94dwIa#0$SziLm*oRI$$_X{r=J zBi-YgB&PT4LZxao2{oE*9~#{prQTN-An`bZ z(rssS_<5l*24h1#nWSrMLJDYuXbbU)YFHUAs zfBS_%h84|+Ai(2&q|WN6Qh7~yD?n$n7uG(})eAo8&q`=Vdb3VNcB{XYN~-|YD23G@ z9Qba%I~Daz%iV9$Ic2&Rl_OKQN~t}dxAtTFaT$_MK)RPSknCtze+)))zNm+t)&v!C zQoMyrK>v90`+keekA(e9xr_V(A7f?n$Y&W?37xCbWcSW2;C`pE6o1=BOSJi_4^S#INBeR^p?lmKxYDY#P;|w^2pP2 z5Au0-9;=;}`R3=zNx4Z+4j5XNb=WeD^D{0JPW9#p-~bw7229=Qv)-;dq~fZ<5pu^} zqu>5Q)RpRIBZg~R! zMJJ$RR%N~8maQkXr-{$?tMQiqns@*?c&K13mt27LCW$2wA9|okcojF#e?%Z+2w$ zS|x;|-?HvqGxs&&rLCi1)%8`}nh;k(iI<{e^4vOXYLA!=ZLI$#>S(KL%3Yy%7&m(c z3()%T>1Wd%4?+- z&w?${H`Bvuy+YA$GT_f8%<)_689;HrnUQl0+TIj#Nr^Z=u^dusCFLk?Q6ZjZN2y*d zs6B*>0w-+mi^b-UK(ZJ@u)ek7ENQ2_JsQ4Gx*T!IL|x-xQ8Mk#liZvOP%<694IeAL zVz9-IV4E3094>S?3!l9g;5?_gd!u6(eJ3zqboC{r`xhF|KT{k1w%M5zt~gln?M~#> zk!tnb=re;n$yG!Sy&BF+Tf^{K3|@qVf;RqEhkliaTk+!2icUqn9$Cb=#+}>XrB~77 z;AkQj>SfYE@IIZ4U_Te)Z2jcR{<0;{MW0VyvvBC9XRu=v;vbjBhlAbrjmv^XhndpH z&Qz9ea+i&<>yEW!!|VImK&s;b9Jq4?juuA2gAU2^4_QQ20)clzPPN?9dq3}1sT^a+ zk?)S)JQ)DE^^pZ!J^e7%rU=X@1_5$@1mh` z$F|0BCD36KSLv6{M(-vq{k^-`PZSQ@OXRpO5<$uxI$CcN_7UO7ff>}4) zZgoBIqEeM{*@Sx45-hgMoCTLVTg0n4_|&2#0~uQlUa65Qk!vpTWr(mP3V$sc!Vw^O z@QpaAI~fj#-5Fu!E!W(;1M~)FFcP{C`+>5)ZS- zz0MWNHoZPnnBCD=8O&1R ze!h>@>tZ?*ey;`>W!Py{KcTI74dRRk3MZdo%c&b3MkQBTclch+W9EO*AWQkjjw<9+?`qe>jUAd)a7jP@@91 zqS}R#+X4u$f zM32x*7Z=wEw*Eb_w9#~M9Zqo3$?nfdO~a)Dk(a-yaE}32il>r7;6w5TP4E^l+YjUN zpn#7~%A}39bI35=yCWm89jm$BKk@vYqr-gLXy`&`OaZMkZltN{+I%qLZMlq`T&&cJ-U8Ig!DFJNA}nCWBs@heuW2`6^*$tZQ+N)PuV58 zG_3?VaiM~Sa(08N&RCbjxU~-u_*gL(Zx}oZ#kvFsesZIQ3p4&=Mq{&& zP_WB>1lFzj6NpcPNaAm5i4l|aCiYvV0RL{<#hsoROa!%^E=9hqRFX0~!zHP9qmnY% z@cJei!2;%n>o8j7pjJv&3tR5g}jX0dXPtigGWcI9q>8p|B~dxoo|8iEk&HHr`Cf)(Qc-eojRHAGVX(7!TX%Pi_VqUsn$Mp@Kw{ zBlAoK*6hv0M)GX*56ct;OdaArS~Sa)k$<}pQyhNJ_?oRXOJXBjZbgh4tUfkg`bw-6V znM0id+;>{W`NGAyZIOT+<#9e3Vm1!c-3cCWSCZ<#BoB&ZOqO_v3Qi-|Lqy@gZ8+_Q zQ1NTdnZ18wWri)RINIe`AUy4&B7obXMQgtVHQ2a)#B3$La9?L< z)1ztySSoG+g$?M%FSp_PWQ==ah6+?Iln)PuU5S=%y2vWgmru^zvesZ|xtWw~8_+!!zV zJ9jIEG3BGFcNjz4j(b zTT(Hu7@2 z*KN#`>YCAXpKMPMql?&s(ZkDjH){149+_R-hwaE7Nct5MHM+E0Czv^BjZ<^^fU&jx zjO9WrDu?)khDMpg!w+;_)?CfV*HT$T(R ze-QQXw#Jj6&!$8`&!1z^o3_c3U2>N(Ffmqnpt>i8ShMX>$;4gIC+{;xPVTKa72RdK z;Vq;*qCK&R^yUvz3ipj$wa08*;~$EdtM~>FS45z>(MX~VSqx)Ww9ZTe zncAi7ax{8PC84YcJ6Z+)`>>L|JRI`)@$;w!cgI^TGKhBCfzPLyZ zU^y)8uFcWt5LJ3mscF1wS0SVCP{z7nVfIj1WaWFc5XN>dt$Hl#Hh9>}tQYJaN4%d_ zCpiMc)fs{zFF!_-JfCM)SdhNHU!Ytr?g?y2WX=>KcpUv5kTWdNpw#^$>E$Web#^kj z6YF%^0W>{QY^Fr5e#3yF`&yv7%%)#N62Z&9tr;=y^v3RvPYgK^S> zIE%D;a3tS5m*skO_G`jl&r%*48DvPk8|P9JfEk$_HFPy8ow-D3hWCwW7)h-%C*qkw zO6a4+t$1o-6N>h=1TjjjF%!!?)z$qcae3(jw-LgQYY#YZfJT=)5v3L84nsR(vC{xj z^cCvjQh(KWKVE%~wo^<6M7KS8_!PB*=CI~#5Yb=Bw#&gJn{H*v@FxadKPMwaSr`^+ zIGe1N^iXpwM$#Nqf%@<}W~Z*}!FIu`1K)LZpgdc@(J)NMcv&Bc5xsf;mAnusyI4Eb zuJa#>Pnsk7dsI8(Z5-kQ7@01?)~kc7k|o`?-jEl`ht<~+Tlv9C#}};J?8o)M>R{5y zZ7yq+6AKf5$rj?X(ya?H-f*}h-pYh&lTT&wl+6Hr8&D?WLN+J&RfkirUc%V05zl50 zSHHCyc9B<&VdgHrF8`4)K&=!!jM#smEM7GsQ2S2@YEgz^G5fRxCpT?|u+QzCjjb3- zIra=y$3fora_Z?~P*(1iQzd+xn+{Q$u|6ycFgV8}Xf03{*cAPDl%^F?nqlrgQpQC*u@iLxit`7(7#E?d02=QtKi{ZNXUR0?T;4B~ zd(}(b(*aiWO1{q#k)ibe&azf51t5Kg8b^#VsL1j)ngero8Al+*)_IGPm5KIT3BwQT zWaJ#=_aPk!Rz%L(u#}J^oBVM}`OuKziNz>AEE~N2{N(3^O35Mw;Ay8BBMvv-%$N*x znkB=VSsBDKV^h}g5MNPP`G7~y@#>L{h5k2D*6(~<9{b_abWhv{4TkC~Gjo+)J@2pS zWq9bVX+!+(_qzmj(%UlUyXB7|C6t0^mICd?ROZ&-5|ysVM*G=nDk(m}rp~axF_dNk zQfWHOnQU<1*@43Vk%(j{B-_^l=tU@LI)T}-<4#dZ3ZgOxLTy)h@NLgva0z$i=dSa8^E55cTV-Z_v07DJlEgL#$$EQx}fT;c1$^dG= zXo)wyyTB=@N4FnBK{bwK4KXrBJMC{z4`7<2H09koZdi(Hjk*3_8^ ztw0iRSH|2_C;|B~of0z3f5bSM2$bCt0o(t4Ti%|>MS~6ViIUF#siERNr@F66I%i>1 z;$ED+zO}UgxK-C}7Je=*_i`mt&`ysNjn(>_#Tj`@?$q-`eTf8^@GH9Yq_V=lDvm(B zZA417xDPZ}lKj5M01U8*3?)4gXj;8uRS_<^NvEyHSPD~->$1+!D&jg6GthxD>OEG` zARWgC2yAY~G?b@uh&VNkFVs3nMgALp6&KMY))c{{QxONX5$E(o9(|-K-^^-F8{2*R!GxjmN1C3clR*7ch?1IcP)sa_-}ytkXs_OLu{W7tbQWV zYHIo9oG}yvdG5_Zv#Df4H)H;V@|xiv$RU^ogdB?Zo9!4t`XOxPviUh;a*`$QiI>}7 zSgxjd=Ih(sR8oOO81y2AB6SGlHXWKb#|Jt|*nrv?5OYik8Fz8q)@09z$KQLYPqD25 zX)xLmSy+l`nt4hGWO#5~8i9W^we3UO{wgW$wrkkl^DyD|Sm}=3Sij&e`7e)ReK1u1 zN<&^WGUrnx31ZQJjM^WAOn1vUhf+WjF*dNp`ZP7MFK-cSErz(%!YVp2Fl(>mU1j!F za*ImuP$e7p{kKVoJ3X+stVnAgLx=lN_!2S+TM@omyYl&ug8w+>ZrF{QbNrbjxBwUTyH- za-bxeOSEzoXjR{}?H~?w$7g{m3O;?YbYPN)I45~(d+dAJ^;X3zP?_mLw}~{PL!vxT zH1AfD8*^vkGl3qcAX*t(847{n)8|RW0ik2Rg%7?s+ZEn#I6DAp6jx1@r`Wk|6j4OE z(9ny3(zWV9S|h|I*FS&_j*L1qv!CCWU0YStkBA9LmJqp&>DnBCm{ylZl^?Sb`uI`! z&MUb^=gc3!jhPO3R8)9%;fK8M4rz2V&)aAA`a=B_cx=_*!);PSy@_hdCk&VbpBNT? z|D5pI7yRYaRL&2`%G5hEvVZ-yEVk9EqG8=f`@6$c?qbM|7vknQ6E9oigSSAwy{+VAV*ZvjDP`pQ=(;D<(_3;7kpAhjo?X70(Q!+Z$+w|}G7t7za~`D-z0e)}x&KBsVVwQ+g`T-&UtvG&!Ahvj)jg^emaVI*kVYVO~;`Lpz3jBMJA${ghj1MmHn^dDsjYc zigv!~thR=ZlwuD0I(B!^PWF@If zM9HTgiFQyigi4i|M~t){S2rZ#{N=mZVO1#Ln=_B8^QPuD@KBZP^pjMT&jlaG` zfE(}Bxx!rf5e_7W&Be%z^oa^U=^s>Ce|k#r=^1yiq=}h^>_jiR@n>fR{9H;xDPgj( z4)9VO<3b~=oCn!Ic0^z-(RjpW#dhc7QF3!E-ahmp|LPeB&#bE@jP7fbTm#+d)ht27 z5_3B#Y0wQE?D83kbR8O~UR|?EXRLk1S zC_iz{gxxAt)m2L}FP}LW{E<&<6=#23+qMVCi$dSbPw?)za3t76u=36Mi<2I2%)igI zc>an~W;^BU6Lu-8Mt6s&J&ONOb#mgA#m>&BE|>u~xW_Ak_GcUn2;ma6T>ZkPyJBo%rVO_dSc6`{99BW1);pVDGUF(I_H_M;rV(h@YH677SQf@ zi7rN`D;FJCK8dhgyyBFJobU97zYAKQ6D-(F6kT%{TK-sKbMjfm$$U1Z}y1-w_Hbmu$!Z4bQ)L!G+EtT=NVNUR4%6!{i&j6+9l`gwZN4JySU@M z=gj|+-5Jzz;mPBDo5h}Aw6XO4UjW1xvNC@GhvnApV0eFQFF0Zx_lD?(Pv_SAw&3wc z3-SKE1w6UHqR)Pv$KrSaC6kvcHMZ?&f1)vGfs*fjMx!mB=O+ANFcXMP1(k4rudKPh z``eSHKd(+=R((HN+wXkB6;RP*&p%z`(hKLdWn$lJD-3^54)nWhuf*$jS$-$a-4_0b z-8<}Ez`?EQ?3c0O+mwB)E*F0^nlo!syQEF^j|)a2htB@cKbc;-E6~5=LdbG~#WOW4 z+2Yj2KhLpslDyTR{bbSHdI#Xi7N8j8Ej{~T3vfQPs_Mm?E&g6&$((Y1u}d9)znHJ{ zEv~X|Oa7yvlUHBx3UZwa@ws9p?mvAGi~o^#Q$DpExDBjD? z%6BZ=vu4Kf96i2CHbU%=7-zJuowRiaTm7WxGP{MBvOaI+VBLE2$Hb#g^lsK_+6wVc zexp6TQ6q70%-&yGe**ig@{}PyvreAHTyr;jdf2<=&h2r#HEteh>Th9SU7Y^dJwB!` z;@!NrtvB=eesY515Q5r1|7Uz#6!FV6zI6fc#1_>O*NBpo#FA927_;b(jb_Tj zkd~bE_xSmI|AEhbfW2Pl+&lNV;(fiZ>wPCaG0KQu^VUw^jy?P35t@LZ!)pxiR{OeW6Wwe>b!n5%(+D@|)OW`$j;_*KN3edd% z!#@hGNyLdS-xmQunI>~tM`lOCM`Xg|_F9#mvhjDF@!VoV%G$4vkkR*#<=!0InoK<_ zP8C<#{4~@_21KXCU2Dm74ZO$lN+HX#>V8&$1|To&n4PQJ_C$Wy?se5OkAHMEsB}zWMSa11<9=XpIk`}-*RLaDyAPoR6Z$_|FPL^W#Ax=mM6wu^T z!w$2DOu~O1&rV5K-HCWXoUrWK0Dx$La4NK8_JU5fX=I0!mjk(l8z+Sm69Q2Yn(J>; zN?8p6l!ID?^?NP|t3Iyb)?3jGNs7Tpk*E=dL;@QS?bFH#=gy?@FzI!$an2*vl02}X z=p)8eDUIKBfbt7CHM+@Z7qj;?pd#&xso%)ywC)jMs__f01>QW}+JuDxP`15KjgFc` z^f*m3#FO`bad-=^`xvl`mV*NL0u+>}w+jnCY%d zp+&fQVX6M-Var{euX8T)c#Dn?>Xwnz1HPrv^4d$%7nee))yHp`^r~+ffR5zjnrgvR zhjcBatOTTemrn~YDwzoziz&F)lS__1Pz|e3j+i_r99TQGv4ams_20zr*{r=Tu9m{x zztcI#Kj&yz(*EerA)kcZy`UO%Z=}Kp134&|gJ&MoqZdQ3wyp9eS130%)y zR$s7*m@{>0cqa>I!j_`$6{xLuvsr-+;h?7-fr`&pWL3f3|!gcaG!ep_(X?> z{ArO1wsW4Us{RwoJM7dlxiQ6Uo(2Gd%>{jug%%?ZjrBJem2e$42o~+70m=4;#}>bdl-h?noqr90~xTWy3!zKY5=7N#WA&XzZ^cP)^95#cy%N_2NTEA1GoN z#>E^M6_0;EA&o~!=CeCyE55L0J++q&Jpa6>Q}q4g*F%7yp-;`cxxTX{b6nMttUp<^ zt^9j|$AII#_z{CKRVwCsrudgsreoM+I1>bPi z@#gLCdIR|0^#$nO?bvqGlD6$kJ5__WyPQa%LodDOyupl9H7Paas}4SWLoIk9D7CZt zMP2ciGNIK3i~iZ>c;hJ2Hzx;-JG)OTE$mX;CLw9qZf(*iqL;b`BU885_qif*X<@T? zd!%jnd@tJ_yfby35^du)?`*v6a8dQ8+@)3{DW1c9ml zhwxvo?EMg)`>hD=)WaiDPTu?xuZR=*)GX8E(JZLCSS4yi4vw3a#{q-0gwoIK%&Cu~ z;*BQ?J6Az#n>ANp1*agt-t@xRt=D$bJzp$P%=+AU6VsWA z;yc1mM0YZBRtlsqe&Ri>F#9gP)NFPB{)Ztfc(Z#c3IGUNRamKPLubcqM44s0H+T5f zGo*Qdz1B2QyzvWD`b$qmy8%i0^kZw@KD`JXxjEP1baq~MT-K2zxE88^(qi4mRiKJY zlqMJF9Mlm=L*Q+?Il4byrooRvm<7tJk9wh2QN3%Mf!u2k%aS9!9HM)h$bnJ+MhfVL z8W;?1jn_uZK#Nox5kPosbs?twjwq%AbM+iAxqSb1pX#)Ex>xvLKTF~t`7{dtbRKFZ z5BNzjV-8F|9q|3I@&`WP`!h_)1+5K@(T8$4L5vQp##VmQ0Lsdl%N9gi|J!RBgE6bY z%k0n(9knFb75J89mCzLD=PLnmM^NB_`oQjM9*PN>qbO3uxhZh|On3c?Ey+&%>0AEEp|DEpnhTgh=)=7-H1o|a$ zVnBRoF-ef)tlzUj8c;6N6F98md1i*5wy%($wNun6p~vK&fVQ!%P#*;{ls2LNu0`0V7Czli!n(gF3ji4M8+;iba4;ruHs3pD}0?`hwy1@PW#*+%LHEii^xu0z) z#?K~-Q{|^g_u|h5aiqLh?NBJ%@$Z(yzG)NOUf-n07wgqmr<-$2-GAHvj+?@fqeliD2|&8E^@*CV6x#$uLyPSzp>*>gA96vx;x^>HDTvXs}Kvc&V# zFKDA)%GxOHeks`VV6&^go^QOWaHvDuC{`vdoL}qEft>7NgeHVv%TBU|O>_SJ4M$mn zJDpiq9z3NH>P{Cp8qpcp1(^efh#!r6mxpX#*)0CDBq#aIsdMGe_-?klg!syq4^NBc z{+b%fNdjf_==sJB!Au z^!(YT`^gs)I6^g;-t9tFfAhO zfRuWve{OG)xy*tpIJBM9SJb(~4@>a|c}kvIrkDq80| z)>pR{kE|1&`(2;`1C$kl;*Bqr#9e7Zw~b3=U+nX(*`I&{^_&Te_EP)SAf|32XswA0*=||P29M)BQxSqqo;$_Sn|rFF~2j*@gNlXW+CDt z*^zvm9#z3xxaAEw5OYB!@h{As2X!*$k(zfHt|m5P&Fon{m=Ia6M{_bmE=mY6tZQc# zk4Idf4c@YOF0vP*ELTswcH`3&aScZ^RnG`)BLflMwvRig(d?*($YXfP3{E#AaL?T8 zU(1_YFOP(K1f^{cr^Xo~>|=3-SBG00tJ(?dAIU}bc)&Dh3bNk{rX9HONS*R<5H3QD zrK~){`Gt#_Byfi6>oq!K{>vW>`5jJ&qfpuHMNz7SwG2z~fJMHcOHol~2rEHXi>*(o}F2 z+R(8au731xDuzIgk4cH`1abbWbSY$}{Aq&t3N?n`tR6j{>UGU9fj-Ss1Njv~#TZbY z+~<4xu4*yQlE zR{5WnK)zn=6y2>G?)KabM1+_F#nYvyM$6(u0?2Kbp0C8cOR0j?%w_+F7J$po7izF7 zE7yj*;okNyVj19`0_fiz$!$25ih4tXn6u z{OE!RHbpJoov)zqHjVm4-}S8Pr0!gx?B4?{vgCpa4OQBZ{7cvMB=W^lKu)pnKmo{= zcvE|AX%5TBCVsQt`)EhCNJGAl)<5mHdI?(6b>-pQYsr?Kw(oXkJ1b2&U-iZXwcOcK za5M5MG$1Cm-3Os<>wR!Q;jeW*bB)q;Oc*RoF*)uZ%rDrI(&TM;oL)kvSo)qpPIpdE zo_6+(ah7}BE>BRw$|JN{mMf;DWA8k3xp*%k``D^TV>?*CLHHMT>O|M>%y{QZP;&X5 zi5;-$2ptREd?Yrp`L9&PuU9uEnZp*p>+(3P&&!Ch5yj@ELx^3TDZ>iavh3P3-5bi@ zhu$Q-Tgk-|$6*ugPxxXKsP7e1 zSZ?Sos3m(0Z#PE`8X%JLP+!Gi|G`ph1}=MNpgef$z-!?Iw*e|1)(d*%22uiSEFXso z?caM8NnGOhPZY{JfJ)|H+|aBsa(d_*W+R_HT`43eG$zV|{hk+8%kx&;AGhbk%_`P2 znaVWgQakP!F1%K8rr4zxl~47>GvQEuJB-4ra+b>kB%+Qer^{YRv;1EqG5gxip;hrA z3}o%&dKmdlsTaRucbwLO(yiSaT8a(-g*m!uL|B*K_R;u#a;+s^X{R}(w&H&}x`?dS ztFYB;QNn9YV9|HX=AQ8=9of{YGOEEQ4JK|T_Hc)fy6mPzv*kWSA#I8}m^OEAFjrzI z(Q#n3jkVZVp`2F55A}VQ7v32T*?1mJ;pqt+me7X2Rh1!=WOn2!oZRfsdoFDpo63eR zfg?26q3ArrHL${qyu<1^>L0mJ@^tLM-ByMh!;-K;g{tI;r=i^^p-cPcd1PniV&wSI zoEaI}Xw#CJk8Pa?H@_r3c`S$>gQh0WA;Op=5`W}XXdW9H-H$FfyzRq%-|DiJNd@6S z5m1o$ZK91=&hs&o!Uz;~U8I&-k{nNbDHjArq4N1J!?{d}Qz;7M@m-d84_#@j-%&)z zC1ix;YL3#v&t21r_l6HCBN7v&B6!lt{FvOkd2p10nuJ+6d_U6J)9ZuaU*wGN zCRcQDYU@dX4!MR=jf6MEzEZ5bviS--MG@8?X(QINsirSBn-y#^f&X#CR^%W>F5177 z7OqyETsh{j1l7%F*6Wv~FPtDdSxHWi@@H}C?_Fx3M#F|2|BXZ)Nbrvj<$XxU-oCeMmnwW+ z^|Sv!)68H9OQ0fpjs0L4IgImk-03(+JR=|1G9lD zbUhnnK;UEow$ZSvNHnkvxl%VWa)T<#6App=?={u{f*VCP#sSVhaZYGXNcnIazz^Bd zSN(q1!ehD3^aWEEtu`3h%8z3i;Zq~gPIYR1%G831M5GeR7TSH^=6C=*xP(zb0FL|nd3NJRM+(ykUTvb>^tW44L!c6 z`T1u#Xd?jlK`ZLhNVbUDiD=kVA18?qMRG~kFHjkrd)~8%7-^sYr%II9Tly(Q)iyUm z_}1F=>UTg&nCZ%{weKpr*1wNYeeZBOC*#8po|uJ%_W zR6s+LP&6$JtmsE#b1#>kvI+w&S|}iv9pnS`Rex_&coj=kvttc*am?v_c52@RpID44 zmN(oel5<0lZF1sc+zZEIjmap7oV%w;E_izF(w`HKHV^37(H&r9Vf_}2qDg9$BOp%n zy5Lz(N%BUo7Oy)XNKD?v)6R*@hZtFFrm{h3x72gsKjrZdaLkdOqVao{=2h$hL@82? z1xz4_Rm#&=2t_)W17(9D1P~=(f4R98YF;v);x0qg(~_=iHpQ#Gm@P>E+3?LM7T3tt-lG)?U83L7`zon5qAt%l?gu+9k&FO zjvvv~F>T$tDJl*OlS4VV;!lc?%f0;m4PH_AZvh_*2|wcz0D-1>Wx7W;EkiRYBDf0# z8DOY&uqA#}o4{Y2H>vMMS>2j&ZYu-YMeT><)XE=+)8uH}@eSWjrWteXms>Xw2k+is z!D6Tzcd5Oa)z3ER}|HuGDmA`|g{?W>rz zG=?^NKc`gR1-6Cq+6f;5_$0MaPT{dU()o=+4c2NTkmDTR`zP(FRL@pq%C z@w&?9O<&!Bh$nI{Ra@pORNDl@Pals%MSAeLlr_ZF-0A#~`IF_HT8LcurjiW?;cU6+ z*={G9PfYjA*=>LFXMrW&O_ABY?UNAF2OdFQM&!We6w3HWV z+aDA@`0e6chuC;8WH{j0PR;&xqB?BL&>AL7#u%@CPC~5B$WfXsPQTlih?4qYQ3&Aws@=seaEs8r` zeRs_BzW1EwKcQsytYLGBtkF{oC!9!Z$eId z*F{iU*BM=g@-zChcHf;Y6pyi|6+075$IRh079<+eazeL5S2zHt_bENYAB5>(k;!qb{YPNG@s5Gv4LVO7Auo#|h9u%Ji zcTYHRI2g8}tEgU&86=xbyyGe>>aaeI>4kgF9L2w%?ovZP`}TXDGL!5K?2owr_IoZc zutr2U&l8_g+k->BEElERsn&W1+f>i3ow(p<>$70cCMrHkD7+!!7Z!bc@?>}VLc3#| zJlOJw)FlN}5S7l@LN91oDF z2YFAHk_U`!lEmZAqA^5`1&g#kceabuh@> zuIev?vW1mka?rS#mfOSNj6pI{dDWvQ4sU0{EfwAr@!MM6=`GhxddhHGcYpSpt z;pRGxv64$uW%8>HPx{m`tty)8@l>!ZIjO@JiBEUR5ARxB9eA?zpLJbGc~Ebb*;pQp zcF$+-?$E@L$OPCkE1MSN_s*%iK8>ZDZ_ES74Ra~0uW{4D0yS|+TRj|-7;+u0&^eiN zI!hb+v}Eij(mNKXb&Ek$xggsK?kYQJ_xt|&WK1*#{E(s3Yz{MAfu zueb_=JIuyjDUET>O$Q_rYNqm_1+CL@f#cpAgvNTK<|6Mi_sgFA-Vb96rw^FHFBD$sx~Ni<0=?#Sx9InaEOS8S&W?2joE)3v!@lgeF@UK zTPH=t^F`yE2E(&@&$;J<4LQa^(f1>{77ZJd6>> zl?ms&?Iwflg-=m4&s7CX9__a#iVul8YW_ajVp-3q`mvCBE+>Sq(%g4lSyc>mkj?9b z$NZYxzib${7+(3KJPyg-B93KDP(TKDKU!aOiaH!gRQ!3j;c>=!C?wIv<9p_+d{}WP z(vvvIwv@P(tl-YSzkhQ3Uh^So`nw-|jff=2>zXmKdwsH&ZvIGNGnHBwlL6|9HqOlX z+gqd^uwzJT*tW6)!GrW^apC7oC4|}moKD6U-HDW`t9Mh`nDsj|TcEK+Q{WTx#I#OL zH^L6~i%-NectT0zml>aE1hsx%8?HeO zl=Z+;3?1sKYw_nD9{g)+&!RWQ4h6$nIQKi>$yx|T9d+ewZpI9f4wc9AL(h14%VtQY zDnw^Y`hkAx=XDAdH!5P7)Wk{1-aDfx$IFzdvmmABsO#bhm2}!vDAUVz2Y7{3%QhH{ z=G7WGQUSjADpOE;SXl64kAX)H+C2^xbAK}}4%RPuigpt-&d|B`01!yJKKpqDF5ps? zU;fa0+GcKclz!d`9@F%0a1+fd`LeT|lQH?(D|fS{^~EtK1TU+(<}JJg;vd!P!8EZY zSr&eA`9|@E8^U>4Y?nS=kWG}(q0|I;twP>#iSRUVRZn(>S{H=rp*Zh=%9BYUTtljYJppo0g*sSJ6U0iXpzHKUsPlD7z4`A7m)Xc1)$aX6JJ~#cp zugQNab|;tLXq!_4Bs?wPE`(t|73?lI=EoMribD|qF#1|9#(gUE%QNwzb*H^yx5a8S zXyy|;UUFP1Xnc@$nV_zyw=S=29OYE`cXbe90Dvt5*H|wrU2o!^81~q5G=5qo=Z7je zD2;ISp5m8R-8?L^0uKj#{d*SR$U-1mgiA!eJln3hIe*e3NplPU4xSv2WfjHsOA5JH zHl|-;Y^G*7`qrNGnIevQ^j?8Gqdaq^LnM!tv+-P$b-vm8lNmM1d|y=-6#C)_`=$!ofj*u#W96_djTZHMm4+OPA$)gr2;|jgOgH0!fZCVb&$~XkWtN%& z3#`HQQL9$Ss9q;A(2nJC*xHx!=Vx3YABntfjQ42B06u0>a%eg3o&J*b;Lk$29PX|s zT>|D+?m=XLGA*caBN?5m5K=AGx)NKU&WsM;R(=At$*TuNuA7Y#Xyf0Oc(}mV?6Aci z>zag*UToX@KSaTqel3M7_lz=?#G)4tDwdLaAeR`f?P}}Dk0z(EA!nR5G(z3 z<|IH%uY9V&n&ideQFj;-RKs>ri>0(MGJqCUjWtW}mz2-C@}wB_mPop3fFVVL#y@bSx`)md})~Sld{bL;N)`31cqYueriXZ{Z0ZY&^LembCpr)GAs zGc`f_{5kRBJSybl9Mrndi2xotiC)OK3`16LGf9TAou6Lb|NbGFn)&Z(5HxcOxUAqh zEgTePN9ig59!mK|^;g&i4#;Cb@d*fx!Lot>{;!{^( 0: + fields = [column_to_bq_schema(field) for field in col.fields] + kwargs = {"fields": fields} + + return google.cloud.bigquery.SchemaField(col.name, col.dtype, col.mode, + **kwargs) + + class BigQueryAdapter(BaseAdapter): RELATION_TYPES = { @@ -35,7 +48,7 @@ class BigQueryAdapter(BaseAdapter): } Relation = BigQueryRelation - Column = BigQueryColumn + Column = dbt.schema.BigQueryColumn ConnectionManager = BigQueryConnectionManager AdapterSpecificConfigs = frozenset({"cluster_by", "partition_by"}) @@ -52,12 +65,13 @@ def date_function(cls): def is_cancelable(cls): return False - def drop_relation(self, relation): - is_cached = self._schema_is_cached(relation.database, relation.schema) + def drop_relation(self, relation, model_name=None): + is_cached = self._schema_is_cached(relation.database, relation.schema, + model_name) if is_cached: self.cache.drop(relation) - conn = self.connections.get_thread_connection() + conn = self.connections.get(model_name) client = conn.handle dataset = self.connections.dataset(relation.database, relation.schema, @@ -65,37 +79,32 @@ def drop_relation(self, relation): relation_object = dataset.table(relation.identifier) client.delete_table(relation_object) - def truncate_relation(self, relation): + def truncate_relation(self, relation, model_name=None): raise dbt.exceptions.NotImplementedException( '`truncate` is not implemented for this adapter!' ) - def rename_relation(self, from_relation, to_relation): + def rename_relation(self, from_relation, to_relation, model_name=None): raise dbt.exceptions.NotImplementedException( '`rename_relation` is not implemented for this adapter!' ) - @available - def list_schemas(self, database): - conn = self.connections.get_thread_connection() + def list_schemas(self, database, model_name=None): + conn = self.connections.get(model_name) client = conn.handle - with self.connections.exception_handler('list dataset'): + with self.connections.exception_handler('list dataset', conn.name): all_datasets = client.list_datasets(project=database, include_all=True) return [ds.dataset_id for ds in all_datasets] - @available - def check_schema_exists(self, database, schema): - superself = super(BigQueryAdapter, self) - return superself.check_schema_exists(database, schema) - - def get_columns_in_relation(self, relation): + def get_columns_in_relation(self, relation, model_name=None): try: table = self.connections.get_bq_table( database=relation.database, schema=relation.schema, - identifier=relation.table_name + identifier=relation.table_name, + conn_name=model_name ) return self._get_dbt_columns_from_bq_table(table) @@ -103,17 +112,17 @@ def get_columns_in_relation(self, relation): logger.debug("get_columns_in_relation error: {}".format(e)) return [] - def expand_column_types(self, goal, current): + def expand_column_types(self, goal, current, model_name=None): # This is a no-op on BigQuery pass - def list_relations_without_caching(self, information_schema, schema): - connection = self.connections.get_thread_connection() + def list_relations_without_caching(self, database, schema, + model_name=None): + connection = self.connections.get(model_name) client = connection.handle - bigquery_dataset = self.connections.dataset( - information_schema.database, schema, connection - ) + bigquery_dataset = self.connections.dataset(database, schema, + connection) all_tables = client.list_tables( bigquery_dataset, @@ -131,17 +140,18 @@ def list_relations_without_caching(self, information_schema, schema): # the implementation of list_relations for other adapters try: return [self._bq_table_to_relation(table) for table in all_tables] - except google.api_core.exceptions.NotFound: + except google.api_core.exceptions.NotFound as e: return [] - def get_relation(self, database, schema, identifier): - if self._schema_is_cached(database, schema): + def get_relation(self, database, schema, identifier, model_name=None): + if self._schema_is_cached(database, schema, model_name): # if it's in the cache, use the parent's model of going through # the relations cache and picking out the relation return super(BigQueryAdapter, self).get_relation( database=database, schema=schema, - identifier=identifier + identifier=identifier, + model_name=model_name ) try: @@ -150,16 +160,16 @@ def get_relation(self, database, schema, identifier): table = None return self._bq_table_to_relation(table) - def create_schema(self, database, schema): + def create_schema(self, database, schema, model_name=None): logger.debug('Creating schema "%s.%s".', database, schema) - self.connections.create_dataset(database, schema) + self.connections.create_dataset(database, schema, model_name) - def drop_schema(self, database, schema): + def drop_schema(self, database, schema, model_name=None): logger.debug('Dropping schema "%s.%s".', database, schema) - if not self.check_schema_exists(database, schema): + if not self.check_schema_exists(database, schema, model_name): return - self.connections.drop_dataset(database, schema) + self.connections.drop_dataset(database, schema, model_name) @classmethod def quote(cls, identifier): @@ -221,14 +231,16 @@ def _agate_to_schema(self, agate_table, column_override): def _materialize_as_view(self, model): model_database = model.get('database') model_schema = model.get('schema') + model_name = model.get('name') model_alias = model.get('alias') model_sql = model.get('injected_sql') - logger.debug("Model SQL ({}):\n{}".format(model_alias, model_sql)) + logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql)) self.connections.create_view( database=model_database, schema=model_schema, table_name=model_alias, + conn_name=model_name, sql=model_sql ) return "CREATE VIEW" @@ -236,6 +248,7 @@ def _materialize_as_view(self, model): def _materialize_as_table(self, model, model_sql, decorator=None): model_database = model.get('database') model_schema = model.get('schema') + model_name = model.get('name') model_alias = model.get('alias') if decorator is None: @@ -247,6 +260,7 @@ def _materialize_as_table(self, model, model_sql, decorator=None): self.connections.create_table( database=model_database, schema=model_schema, + conn_name=model_name, table_name=table_name, sql=model_sql ) @@ -292,10 +306,10 @@ def warning_on_hooks(hook_type): dbt.ui.printer.COLOR_FG_YELLOW) @available - def add_query(self, sql, auto_begin=True, bindings=None, - abridge_sql_log=False): - if self.nice_connection_name() in ['on-run-start', 'on-run-end']: - self.warning_on_hooks(self.nice_connection_name()) + def add_query(self, sql, model_name=None, auto_begin=True, + bindings=None, abridge_sql_log=False): + if model_name in ['on-run-start', 'on-run-end']: + self.warning_on_hooks(model_name) else: raise dbt.exceptions.NotImplementedException( '`add_query` is not implemented for this adapter!') @@ -304,24 +318,24 @@ def add_query(self, sql, auto_begin=True, bindings=None, # Special bigquery adapter methods ### @available - def make_date_partitioned_table(self, relation): + def make_date_partitioned_table(self, relation, model_name=None): return self.connections.create_date_partitioned_table( database=relation.database, schema=relation.schema, - table_name=relation.identifier + table_name=relation.identifier, + conn_name=model_name ) @available def execute_model(self, model, materialization, sql_override=None, - decorator=None): + decorator=None, model_name=None): if sql_override is None: sql_override = model.get('injected_sql') if flags.STRICT_MODE: - connection = self.connections.get_thread_connection() + connection = self.connections.get(model.get('name')) assert isinstance(connection, Connection) - assert(connection.name == model.get('name')) if materialization == 'view': res = self._materialize_as_view(model) @@ -334,10 +348,10 @@ def execute_model(self, model, materialization, sql_override=None, return res @available - def create_temporary_table(self, sql, **kwargs): + def create_temporary_table(self, sql, model_name=None, **kwargs): # BQ queries always return a temp table with their results - query_job, _ = self.connections.raw_execute(sql) + query_job, _ = self.connections.raw_execute(sql, model_name) bq_table = query_job.destination return self.Relation.create( @@ -351,12 +365,12 @@ def create_temporary_table(self, sql, **kwargs): type=BigQueryRelation.Table) @available - def alter_table_add_columns(self, relation, columns): + def alter_table_add_columns(self, relation, columns, model_name=None): logger.debug('Adding columns ({}) to table {}".'.format( columns, relation)) - conn = self.connections.get_thread_connection() + conn = self.connections.get(model_name) client = conn.handle table_ref = self.connections.table_ref(relation.database, @@ -364,7 +378,7 @@ def alter_table_add_columns(self, relation, columns): relation.identifier, conn) table = client.get_table(table_ref) - new_columns = [col.column_to_bq_schema() for col in columns] + new_columns = [column_to_bq_schema(col) for col in columns] new_schema = table.schema + new_columns new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema) @@ -372,9 +386,9 @@ def alter_table_add_columns(self, relation, columns): @available def load_dataframe(self, database, schema, table_name, agate_table, - column_override): + column_override, model_name=None): bq_schema = self._agate_to_schema(agate_table, column_override) - conn = self.connections.get_thread_connection() + conn = self.connections.get(model_name) client = conn.handle table = self.connections.table_ref(database, schema, table_name, conn) @@ -388,7 +402,7 @@ def load_dataframe(self, database, schema, table_name, agate_table, job_config=load_config) timeout = self.connections.get_timeout(conn) - with self.connections.exception_handler("LOAD TABLE"): + with self.connections.exception_handler("LOAD TABLE", conn.name): self.poll_until_job_completes(job, timeout) ### @@ -459,7 +473,7 @@ def _get_stats_columns(cls, table, relation_type): return zip(column_names, column_values) def get_catalog(self, manifest): - connection = self.connections.get_thread_connection() + connection = self.connections.get('catalog') client = connection.handle schemas = manifest.get_used_schemas() diff --git a/plugins/bigquery/dbt/adapters/bigquery/relation.py b/plugins/bigquery/dbt/adapters/bigquery/relation.py index e4f982b2cbe..a489512d607 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/relation.py +++ b/plugins/bigquery/dbt/adapters/bigquery/relation.py @@ -1,8 +1,6 @@ -from dbt.adapters.base.relation import BaseRelation, Column +from dbt.adapters.base.relation import BaseRelation from dbt.utils import filter_null_values -import google.cloud.bigquery - class BigQueryRelation(BaseRelation): External = "external" @@ -109,101 +107,3 @@ def dataset(self): @property def identifier(self): return self.path.get('identifier') - - -class BigQueryColumn(Column): - TYPE_LABELS = { - 'STRING': 'STRING', - 'TIMESTAMP': 'TIMESTAMP', - 'FLOAT': 'FLOAT64', - 'INTEGER': 'INT64', - 'RECORD': 'RECORD', - } - - def __init__(self, column, dtype, fields=None, mode='NULLABLE'): - super(BigQueryColumn, self).__init__(column, dtype) - - if fields is None: - fields = [] - - self.fields = self.wrap_subfields(fields) - self.mode = mode - - @classmethod - def wrap_subfields(cls, fields): - return [BigQueryColumn.create_from_field(field) for field in fields] - - @classmethod - def create_from_field(cls, field): - return BigQueryColumn(field.name, cls.translate_type(field.field_type), - field.fields, field.mode) - - @classmethod - def _flatten_recursive(cls, col, prefix=None): - if prefix is None: - prefix = [] - - if len(col.fields) == 0: - prefixed_name = ".".join(prefix + [col.column]) - new_col = BigQueryColumn(prefixed_name, col.dtype, col.fields, - col.mode) - return [new_col] - - new_fields = [] - for field in col.fields: - new_prefix = prefix + [col.column] - new_fields.extend(cls._flatten_recursive(field, new_prefix)) - - return new_fields - - def flatten(self): - return self._flatten_recursive(self) - - @property - def quoted(self): - return '`{}`'.format(self.column) - - def literal(self, value): - return "cast({} as {})".format(value, self.dtype) - - @property - def data_type(self): - if self.dtype.upper() == 'RECORD': - subcols = [ - "{} {}".format(col.name, col.data_type) for col in self.fields - ] - field_type = 'STRUCT<{}>'.format(", ".join(subcols)) - - else: - field_type = self.dtype - - if self.mode.upper() == 'REPEATED': - return 'ARRAY<{}>'.format(field_type) - - else: - return field_type - - def is_string(self): - return self.dtype.lower() == 'string' - - def is_numeric(self): - return False - - def can_expand_to(self, other_column): - """returns True if both columns are strings""" - return self.is_string() and other_column.is_string() - - def __repr__(self): - return "".format(self.name, self.data_type, - self.mode) - - def column_to_bq_schema(self): - """Convert a column to a bigquery schema object. - """ - kwargs = {} - if len(self.fields) > 0: - fields = [field.column_to_bq_schema() for field in self.fields] - kwargs = {"fields": fields} - - return google.cloud.bigquery.SchemaField(self.name, self.dtype, - self.mode, **kwargs) diff --git a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql index 1c87ce4dc18..d9700f3591a 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql @@ -59,21 +59,11 @@ {% endmacro %} -{% macro bigquery__list_relations_without_caching(information_schema, schema) -%} - {{ return(adapter.list_relations_without_caching(information_schema, schema)) }} +{% macro bigquery__list_relations_without_caching(database, schema) -%} + {{ return(adapter.list_relations_without_caching(database, schema)) }} {% endmacro %} {% macro bigquery__current_timestamp() -%} CURRENT_TIMESTAMP() {%- endmacro %} - - -{% macro bigquery__list_schemas(database) %} - {{ return(adapter.list_schemas()) }} -{% endmacro %} - - -{% macro bigquery__check_schema_exists(information_schema, schema) %} - {{ return(adapter.check_schema_exists(information_schema.database, schema)) }} -{% endmacro %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql index 7a95f440f83..5548b71a7e8 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql @@ -4,10 +4,11 @@ {% endmacro %} -{% macro bigquery__archive_hash_arguments(args) %} - to_hex(md5(concat({% for arg in args %}coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif %}{% endfor %}))) +{% macro bigquery__archive_scd_hash() %} + to_hex(md5(concat(cast(`dbt_pk` as string), '|', cast(`dbt_updated_at` as string)))) {% endmacro %} + {% macro bigquery__create_columns(relation, columns) %} {{ adapter.alter_table_add_columns(relation, columns) }} {% endmacro %} @@ -15,8 +16,8 @@ {% macro bigquery__archive_update(target_relation, tmp_relation) %} update {{ target_relation }} as dest - set dest.dbt_valid_to = tmp.dbt_valid_to + set dest.{{ adapter.quote('valid_to') }} = tmp.{{ adapter.quote('valid_to') }} from {{ tmp_relation }} as tmp - where tmp.dbt_scd_id = dest.dbt_scd_id - and change_type = 'update'; + where tmp.{{ adapter.quote('scd_id') }} = dest.{{ adapter.quote('scd_id') }} + and {{ adapter.quote('change_type') }} = 'update'; {% endmacro %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql index 98c65a1dddc..d561ea487c4 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql @@ -2,6 +2,7 @@ {% materialization incremental, adapter='bigquery' -%} {%- set unique_key = config.get('unique_key') -%} + {%- set sql_where = config.get('sql_where') -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} @@ -33,7 +34,12 @@ {% set source_sql -%} {#-- wrap sql in parens to make it a subquery --#} ( - {{ sql }} + select * from ( + {{ sql }} + ) + {% if sql_where %} + where ({{ sql_where }}) or ({{ sql_where }}) is null + {% endif %} ) {%- endset -%} diff --git a/plugins/bigquery/setup.py b/plugins/bigquery/setup.py index 1d21eba4e23..d563e6b3692 100644 --- a/plugins/bigquery/setup.py +++ b/plugins/bigquery/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-bigquery" -package_version = "0.13.0" +package_version = "0.13.0a1" description = """The bigquery adapter plugin for dbt (data build tool)""" diff --git a/plugins/postgres/dbt/adapters/postgres/__init__.py b/plugins/postgres/dbt/adapters/postgres/__init__.py index 89d7b5edee4..f139484e807 100644 --- a/plugins/postgres/dbt/adapters/postgres/__init__.py +++ b/plugins/postgres/dbt/adapters/postgres/__init__.py @@ -1,5 +1,4 @@ -# these are mostly just exports, #noqa them so flake8 will be happy -from dbt.adapters.postgres.connections import PostgresConnectionManager # noqa +from dbt.adapters.postgres.connections import PostgresConnectionManager from dbt.adapters.postgres.connections import PostgresCredentials from dbt.adapters.postgres.impl import PostgresAdapter diff --git a/plugins/postgres/dbt/adapters/postgres/connections.py b/plugins/postgres/dbt/adapters/postgres/connections.py index 360a130a936..664d79ff541 100644 --- a/plugins/postgres/dbt/adapters/postgres/connections.py +++ b/plugins/postgres/dbt/adapters/postgres/connections.py @@ -61,7 +61,7 @@ class PostgresConnectionManager(SQLConnectionManager): TYPE = 'postgres' @contextmanager - def exception_handler(self, sql): + def exception_handler(self, sql, connection_name='master'): try: yield @@ -70,7 +70,7 @@ def exception_handler(self, sql): try: # attempt to release the connection - self.release() + self.release(connection_name) except psycopg2.Error: logger.debug("Failed to release connection!") pass @@ -81,13 +81,7 @@ def exception_handler(self, sql): except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") - self.release() - if isinstance(e, dbt.exceptions.RuntimeException): - # during a sql query, an internal to dbt exception was raised. - # this sounds a lot like a signal handler and probably has - # useful information, so raise it without modification. - raise - + self.release(connection_name) raise dbt.exceptions.RuntimeException(e) @classmethod @@ -96,6 +90,7 @@ def open(cls, connection): logger.debug('Connection is already open, skipping open.') return connection + base_credentials = connection.credentials credentials = cls.get_credentials(connection.credentials.incorporate()) kwargs = {} keepalives_idle = credentials.get('keepalives_idle', @@ -137,7 +132,7 @@ def cancel(self, connection): logger.debug("Cancelling query '{}' ({})".format(connection_name, pid)) - _, cursor = self.add_query(sql) + _, cursor = self.add_query(sql, 'master') res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res)) diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py index 88be130cabb..a5b0087a711 100644 --- a/plugins/postgres/dbt/adapters/postgres/impl.py +++ b/plugins/postgres/dbt/adapters/postgres/impl.py @@ -1,8 +1,15 @@ -from dbt.adapters.base.meta import available +import psycopg2 + +import time + +from dbt.adapters.base.meta import available_raw from dbt.adapters.sql import SQLAdapter from dbt.adapters.postgres import PostgresConnectionManager import dbt.compat import dbt.exceptions +import agate + +from dbt.logger import GLOBAL_LOGGER as logger # note that this isn't an adapter macro, so just a single underscore @@ -16,7 +23,7 @@ class PostgresAdapter(SQLAdapter): def date_function(cls): return 'now()' - @available + @available_raw def verify_database(self, database): database = database.strip('"') expected = self.config.credentials.database @@ -49,26 +56,16 @@ def _link_cached_database_relations(self, schemas): if refed_schema.lower() in schemas: self.cache.add_link(dependent, referenced) - def _get_cache_schemas(self, manifest, exec_only=False): - # postgres/redshift only allow one database (the main one) - superself = super(PostgresAdapter, self) - schemas = superself._get_cache_schemas(manifest, exec_only=exec_only) - try: - return schemas.flatten() - except dbt.exceptions.RuntimeException as exc: - dbt.exceptions.raise_compiler_error( - 'Cross-db references not allowed in adapter {}: Got {}'.format( - self.type(), exc.msg - ) - ) - def _link_cached_relations(self, manifest): schemas = set() for db, schema in manifest.get_used_schemas(): self.verify_database(db) schemas.add(schema) - self._link_cached_database_relations(schemas) + try: + self._link_cached_database_relations(schemas) + finally: + self.release_connection(GET_RELATIONS_MACRO_NAME) def _relations_cache_for_schemas(self, manifest): super(PostgresAdapter, self)._relations_cache_for_schemas(manifest) diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index 0bda7fc9ad4..b51df9f42c5 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -26,7 +26,7 @@ numeric_precision, numeric_scale - from {{ relation.information_schema('columns') }} + from {{ information_schema_name(relation.database) }}.columns where table_name = '{{ relation.identifier }}' {% if relation.schema %} and table_schema = '{{ relation.schema }}' @@ -39,10 +39,10 @@ {% endmacro %} -{% macro postgres__list_relations_without_caching(information_schema, schema) %} +{% macro postgres__list_relations_without_caching(database, schema) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} select - '{{ information_schema.database.lower() }}' as database, + '{{ database }}' as database, tablename as name, schemaname as schema, 'table' as type @@ -50,7 +50,7 @@ where schemaname ilike '{{ schema }}' union all select - '{{ information_schema.database.lower() }}' as database, + '{{ database }}' as database, viewname as name, schemaname as schema, 'view' as type @@ -77,9 +77,9 @@ {{ return(load_result('list_schemas').table) }} {% endmacro %} -{% macro postgres__check_schema_exists(information_schema, schema) -%} +{% macro postgres__check_schema_exists(database, schema) -%} {% if database -%} - {{ adapter.verify_database(information_schema.database) }} + {{ adapter.verify_database(database) }} {%- endif -%} {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) %} select count(*) from pg_namespace where nspname = '{{ schema }}' diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index 3558f3ff649..e04e521ea94 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -1,11 +1,11 @@ -{% macro postgres__get_catalog(information_schemas) -%} +{% macro postgres__get_catalog() -%} {%- call statement('catalog', fetch_result=True) -%} - {% if (information_schemas | length) != 1 %} - {{ exceptions.raise_compiler_error('postgres get_catalog requires exactly one database') }} + {% if (databases | length) != 1 %} + exceptions.raise_compiler_error('postgres get_catalog requires exactly one database') {% endif %} - {% set database = information_schemas[0].database %} + {% set database = databases[0] %} {{ adapter.verify_database(database) }} with table_owners as ( diff --git a/plugins/postgres/setup.py b/plugins/postgres/setup.py index 9ea197201f0..d18988e6cf5 100644 --- a/plugins/postgres/setup.py +++ b/plugins/postgres/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-postgres" -package_version = "0.13.0" +package_version = "0.13.0a1" description = """The postgres adpter plugin for dbt (data build tool)""" setup( diff --git a/plugins/redshift/dbt/adapters/redshift/__init__.py b/plugins/redshift/dbt/adapters/redshift/__init__.py index b26665a8ddf..336eb55d17d 100644 --- a/plugins/redshift/dbt/adapters/redshift/__init__.py +++ b/plugins/redshift/dbt/adapters/redshift/__init__.py @@ -1,4 +1,4 @@ -from dbt.adapters.redshift.connections import RedshiftConnectionManager # noqa +from dbt.adapters.redshift.connections import RedshiftConnectionManager from dbt.adapters.redshift.connections import RedshiftCredentials from dbt.adapters.redshift.impl import RedshiftAdapter diff --git a/plugins/redshift/dbt/adapters/redshift/connections.py b/plugins/redshift/dbt/adapters/redshift/connections.py index d5450a117fc..fe85a0e1858 100644 --- a/plugins/redshift/dbt/adapters/redshift/connections.py +++ b/plugins/redshift/dbt/adapters/redshift/connections.py @@ -95,16 +95,16 @@ def fresh_transaction(self, name=None): """ with drop_lock: - connection = self.get_thread_connection() + connection = self.get(name) if connection.transaction_open: - self.commit() + self.commit(connection) - self.begin() + self.begin(connection.name) yield - self.commit() - self.begin() + self.commit(connection) + self.begin(connection.name) @classmethod def fetch_cluster_credentials(cls, db_user, db_name, cluster_id, @@ -123,8 +123,8 @@ def fetch_cluster_credentials(cls, db_user, db_name, cluster_id, except boto_client.exceptions.ClientError as e: raise dbt.exceptions.FailedToConnectException( - "Unable to get temporary Redshift cluster credentials: {}" - .format(e)) + "Unable to get temporary Redshift cluster credentials: " + "{}".format(e)) @classmethod def get_tmp_iam_cluster_credentials(cls, credentials): @@ -136,8 +136,8 @@ def get_tmp_iam_cluster_credentials(cls, credentials): if not cluster_id: raise dbt.exceptions.FailedToConnectException( - "'cluster_id' must be provided in profile if IAM " - "authentication method selected") + "'cluster_id' must be provided in profile if IAM " + "authentication method selected") cluster_creds = cls.fetch_cluster_credentials( credentials.user, @@ -167,4 +167,4 @@ def get_credentials(cls, credentials): else: raise dbt.exceptions.FailedToConnectException( - "Invalid 'method' in profile: '{}'".format(method)) + "Invalid 'method' in profile: '{}'".format(method)) diff --git a/plugins/redshift/dbt/adapters/redshift/impl.py b/plugins/redshift/dbt/adapters/redshift/impl.py index 50934fba862..08f0dcff0e4 100644 --- a/plugins/redshift/dbt/adapters/redshift/impl.py +++ b/plugins/redshift/dbt/adapters/redshift/impl.py @@ -1,6 +1,7 @@ from dbt.adapters.postgres import PostgresAdapter from dbt.adapters.redshift import RedshiftConnectionManager from dbt.logger import GLOBAL_LOGGER as logger # noqa +import dbt.exceptions class RedshiftAdapter(PostgresAdapter): @@ -12,7 +13,7 @@ class RedshiftAdapter(PostgresAdapter): def date_function(cls): return 'getdate()' - def drop_relation(self, relation): + def drop_relation(self, relation, model_name=None): """ In Redshift, DROP TABLE ... CASCADE should not be used inside a transaction. Redshift doesn't prevent the CASCADE @@ -27,9 +28,9 @@ def drop_relation(self, relation): https://docs.aws.amazon.com/redshift/latest/dg/r_DROP_TABLE.html """ - with self.connections.fresh_transaction(): + with self.connections.fresh_transaction(model_name): parent = super(RedshiftAdapter, self) - return parent.drop_relation(relation) + return parent.drop_relation(relation, model_name) @classmethod def convert_text_type(cls, agate_table, col_idx): diff --git a/plugins/redshift/dbt/include/redshift/__init__.py b/plugins/redshift/dbt/include/redshift/__init__.py index b177e5d4932..8b17c9fbfc5 100644 --- a/plugins/redshift/dbt/include/redshift/__init__.py +++ b/plugins/redshift/dbt/include/redshift/__init__.py @@ -1,3 +1,3 @@ import os - +from dbt.include.postgres import PACKAGE_PATH as POSTGRES_PACKAGE_PATH PACKAGE_PATH = os.path.dirname(__file__) diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index 29f6ad0b16f..7fd7063bcf5 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -62,7 +62,7 @@ {{ column_list_for_create_table(columns) }} ) {{ dist('dbt_updated_at') }} - {{ sort('compound', ['dbt_scd_id']) }}; + {{ sort('compound', ['scd_id']) }}; {%- endmacro %} @@ -88,7 +88,7 @@ numeric_precision, numeric_scale - from {{ relation.information_schema('columns') }} + from information_schema.columns where table_name = '{{ relation.identifier }}' ), @@ -153,8 +153,8 @@ {% endmacro %} -{% macro redshift__list_relations_without_caching(information_schema, schema) %} - {{ return(postgres__list_relations_without_caching(information_schema, schema)) }} +{% macro redshift__list_relations_without_caching(database, schema) %} + {{ return(postgres__list_relations_without_caching(database, schema)) }} {% endmacro %} @@ -168,8 +168,8 @@ {%- endmacro %} -{% macro redshift__check_schema_exists(information_schema, schema) -%} - {{ return(postgres__check_schema_exists(information_schema, schema)) }} +{% macro redshift__check_schema_exists(database, schema) -%} + {{ return(postgres__check_schema_exists(database, schema)) }} {%- endmacro %} list_schemas diff --git a/plugins/redshift/dbt/include/redshift/macros/catalog.sql b/plugins/redshift/dbt/include/redshift/macros/catalog.sql index c6788d9c66c..34529df8b02 100644 --- a/plugins/redshift/dbt/include/redshift/macros/catalog.sql +++ b/plugins/redshift/dbt/include/redshift/macros/catalog.sql @@ -1,10 +1,10 @@ -{% macro redshift__get_base_catalog(information_schemas) -%} +{% macro redshift__get_base_catalog() -%} {%- call statement('base_catalog', fetch_result=True) -%} - {% if (information_schemas | length) != 1 %} - {{ exceptions.raise_compiler_error('redshift get_catalog requires exactly one database') }} + {% if (databases | length) != 1 %} + exceptions.raise_compiler_error('redshift get_catalog requires exactly one database') {% endif %} - {% set database = information_schemas[0].database %} + {% set database = databases[0] %} {{ adapter.verify_database(database) }} with late_binding as ( @@ -106,7 +106,7 @@ {{ return(load_result('base_catalog').table) }} {%- endmacro %} -{% macro redshift__get_extended_catalog(information_schemas) %} +{% macro redshift__get_extended_catalog() %} {%- call statement('extended_catalog', fetch_result=True) -%} select @@ -218,12 +218,12 @@ {% endmacro %} -{% macro redshift__get_catalog(information_schemas) %} +{% macro redshift__get_catalog() %} {#-- Compute a left-outer join in memory. Some Redshift queries are -- leader-only, and cannot be joined to other compute-based queries #} - {% set catalog = redshift__get_base_catalog(information_schemas) %} + {% set catalog = redshift__get_base_catalog() %} {% set select_extended = redshift__can_select_from('svv_table_info') %} {% if select_extended %} diff --git a/plugins/redshift/setup.py b/plugins/redshift/setup.py index 4f7d5829141..be5bc45d896 100644 --- a/plugins/redshift/setup.py +++ b/plugins/redshift/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-redshift" -package_version = "0.13.0" +package_version = "0.13.0a1" description = """The redshift adapter plugin for dbt (data build tool)""" diff --git a/plugins/snowflake/dbt/adapters/snowflake/__init__.py b/plugins/snowflake/dbt/adapters/snowflake/__init__.py index 40f8aee62d4..1ac7dcbdf2f 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/__init__.py +++ b/plugins/snowflake/dbt/adapters/snowflake/__init__.py @@ -1,6 +1,6 @@ -from dbt.adapters.snowflake.connections import SnowflakeConnectionManager # noqa +from dbt.adapters.snowflake.connections import SnowflakeConnectionManager from dbt.adapters.snowflake.connections import SnowflakeCredentials -from dbt.adapters.snowflake.relation import SnowflakeRelation # noqa +from dbt.adapters.snowflake.relation import SnowflakeRelation from dbt.adapters.snowflake.impl import SnowflakeAdapter from dbt.adapters.base import AdapterPlugin diff --git a/plugins/snowflake/dbt/adapters/snowflake/connections.py b/plugins/snowflake/dbt/adapters/snowflake/connections.py index a2116e9c734..c7f117a060c 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/connections.py +++ b/plugins/snowflake/dbt/adapters/snowflake/connections.py @@ -72,7 +72,7 @@ class SnowflakeConnectionManager(SQLConnectionManager): TYPE = 'snowflake' @contextmanager - def exception_handler(self, sql): + def exception_handler(self, sql, connection_name='master'): try: yield except snowflake.connector.errors.ProgrammingError as e: @@ -83,7 +83,7 @@ def exception_handler(self, sql): if 'Empty SQL statement' in msg: logger.debug("got empty sql statement, moving on") elif 'This session does not have a current database' in msg: - self.release() + self.release(connection_name) raise dbt.exceptions.FailedToConnectException( ('{}\n\nThis error sometimes occurs when invalid ' 'credentials are provided, or when your default role ' @@ -91,17 +91,12 @@ def exception_handler(self, sql): 'Please double check your profile and try again.') .format(msg)) else: - self.release() + self.release(connection_name) raise dbt.exceptions.DatabaseException(msg) except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") - self.release() - if isinstance(e, dbt.exceptions.RuntimeException): - # during a sql query, an internal to dbt exception was raised. - # this sounds a lot like a signal handler and probably has - # useful information, so raise it without modification. - raise + self.release(connection_name) raise dbt.exceptions.RuntimeException(e.msg) @classmethod @@ -146,6 +141,8 @@ def open(cls, connection): raise dbt.exceptions.FailedToConnectException(str(e)) + return connection + def cancel(self, connection): handle = connection.handle sid = handle.session_id @@ -156,7 +153,7 @@ def cancel(self, connection): logger.debug("Cancelling query '{}' ({})".format(connection_name, sid)) - _, cursor = self.add_query(sql) + _, cursor = self.add_query(sql, 'master') res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res)) @@ -196,7 +193,7 @@ def _get_private_key(cls, private_key_path, private_key_passphrase): format=serialization.PrivateFormat.PKCS8, encryption_algorithm=serialization.NoEncryption()) - def add_query(self, sql, auto_begin=True, + def add_query(self, sql, model_name=None, auto_begin=True, bindings=None, abridge_sql_log=False): connection = None @@ -222,24 +219,21 @@ def add_query(self, sql, auto_begin=True, parent = super(SnowflakeConnectionManager, self) connection, cursor = parent.add_query( - individual_query, auto_begin, + individual_query, model_name, auto_begin, bindings=bindings, abridge_sql_log=abridge_sql_log ) if cursor is None: raise dbt.exceptions.RuntimeException( - "Tried to run an empty query on model '{}'. If you are " - "conditionally running\nsql, eg. in a model hook, make " - "sure your `else` clause contains valid sql!\n\n" - "Provided SQL:\n{}" - .format(self.nice_connection_name(), sql) - ) + "Tried to run an empty query on model '{}'. If you are " + "conditionally running\nsql, eg. in a model hook, make " + "sure your `else` clause contains valid sql!\n\n" + "Provided SQL:\n{}".format(model_name, sql)) return connection, cursor - @classmethod - def _rollback_handle(cls, connection): + def _rollback_handle(self, connection): """On snowflake, rolling back the handle of an aborted session raises an exception. """ diff --git a/plugins/snowflake/dbt/adapters/snowflake/impl.py b/plugins/snowflake/dbt/adapters/snowflake/impl.py index c6df92f2ee0..00da5026482 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/impl.py +++ b/plugins/snowflake/dbt/adapters/snowflake/impl.py @@ -1,8 +1,12 @@ from __future__ import absolute_import +import dbt.compat +import dbt.exceptions + from dbt.adapters.sql import SQLAdapter from dbt.adapters.snowflake import SnowflakeConnectionManager from dbt.adapters.snowflake import SnowflakeRelation +from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import filter_null_values diff --git a/plugins/snowflake/dbt/adapters/snowflake/relation.py b/plugins/snowflake/dbt/adapters/snowflake/relation.py index e89b71d89cf..a494fb89363 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/relation.py +++ b/plugins/snowflake/dbt/adapters/snowflake/relation.py @@ -1,4 +1,5 @@ from dbt.adapters.base.relation import BaseRelation +import dbt.utils class SnowflakeRelation(BaseRelation): @@ -8,7 +9,7 @@ class SnowflakeRelation(BaseRelation): }, 'quote_character': '"', 'quote_policy': { - 'database': False, + 'database': True, 'schema': False, 'identifier': False, }, diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index a339bb54b12..c608f496916 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -48,7 +48,7 @@ numeric_scale from - {{ relation.information_schema('columns') }} + {{ information_schema_name(relation.database) }}.columns where table_name ilike '{{ relation.identifier }}' {% if relation.schema %} @@ -67,7 +67,7 @@ {% endmacro %} -{% macro snowflake__list_relations_without_caching(information_schema, schema) %} +{% macro snowflake__list_relations_without_caching(database, schema) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} select table_catalog as database, @@ -77,20 +77,20 @@ when table_type = 'VIEW' then 'view' else table_type end as table_type - from {{ information_schema }}.tables + from {{ information_schema_name(database) }}.tables where table_schema ilike '{{ schema }}' - and table_catalog ilike '{{ information_schema.database.lower() }}' + and table_catalog ilike '{{ database }}' {% endcall %} {{ return(load_result('list_relations_without_caching').table) }} {% endmacro %} -{% macro snowflake__check_schema_exists(information_schema, schema) -%} +{% macro snowflake__check_schema_exists(database, schema) -%} {% call statement('check_schema_exists', fetch_result=True) -%} select count(*) - from {{ information_schema }}.schemata + from {{ information_schema_name(database) }}.schemata where upper(schema_name) = upper('{{ schema }}') - and upper(catalog_name) = upper('{{ information_schema.database }}') + and upper(catalog_name) = upper('{{ database }}') {%- endcall %} {{ return(load_result('check_schema_exists').table) }} {%- endmacro %} @@ -98,10 +98,3 @@ {% macro snowflake__current_timestamp() -%} convert_timezone('UTC', current_timestamp()) {%- endmacro %} - - -{% macro snowflake__rename_relation(from_relation, to_relation) -%} - {% call statement('rename_relation') -%} - alter table {{ from_relation }} rename to {{ to_relation }} - {%- endcall %} -{% endmacro %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql b/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql index fe68cd2e46e..d235e687607 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql @@ -1,8 +1,8 @@ -{% macro snowflake__get_catalog(information_schemas) -%} +{% macro snowflake__get_catalog() -%} {%- call statement('catalog', fetch_result=True) -%} - {% for information_schema in information_schemas %} + {% for database in databases %} ( with tables as ( @@ -31,7 +31,7 @@ 'Approximate size of the table as reported by Snowflake' as "stats:bytes:description", (bytes is not null) as "stats:bytes:include" - from {{ information_schema }}.tables + from {{ information_schema_name(database) }}.tables ), @@ -48,7 +48,7 @@ data_type as "column_type", null as "column_comment" - from {{ information_schema }}.columns + from {{ adapter.quote_as_configured(database, "database") }}.information_schema.columns ) @@ -56,6 +56,7 @@ from tables join columns using ("table_database", "table_schema", "table_name") where "table_schema" != 'INFORMATION_SCHEMA' + and "table_database" = {{ adapter.quote_as_configured(database, "database").replace('"', "'") }} order by "column_index" ) {% if not loop.last %} union all {% endif %} diff --git a/plugins/snowflake/setup.py b/plugins/snowflake/setup.py index aa28d30ff49..ed3c1afc63d 100644 --- a/plugins/snowflake/setup.py +++ b/plugins/snowflake/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-snowflake" -package_version = "0.13.0" +package_version = "0.13.0a1" description = """The snowflake adapter plugin for dbt (data build tool)""" diff --git a/scripts/build-sdists.sh b/scripts/build-sdists.sh deleted file mode 100755 index 8d0db621b23..00000000000 --- a/scripts/build-sdists.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -eo pipefail - -DBT_PATH="$( cd "$(dirname "$0")/.." ; pwd -P )" - -echo $SCRIPTPATH - -set -x - -rm -rf "$DBT_PATH"/dist -mkdir -p "$DBT_PATH"/dist - -for SUBPATH in core plugins/postgres plugins/redshift plugins/bigquery plugins/snowflake -do - rm -rf "$DBT_PATH"/"$SUBPATH"/dist - cd "$DBT_PATH"/"$SUBPATH" - python setup.py sdist - cp -r "$DBT_PATH"/"$SUBPATH"/dist/* "$DBT_PATH"/dist/ -done - -cd "$DBT_PATH" -python setup.py sdist - -set +x diff --git a/setup.py b/setup.py index a1335fb0b2f..ffa6cef5164 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(fname): package_name = "dbt" -package_version = "0.13.0" +package_version = "0.13.0a1" description = """dbt (data build tool) is a command line tool that helps \ analysts and engineers transform data in their warehouse more effectively""" diff --git a/test.env.sample b/test.env.sample index 163e618f877..8cb09b20ef3 100644 --- a/test.env.sample +++ b/test.env.sample @@ -18,6 +18,6 @@ BIGQUERY_CLIENT_X509_CERT_URL= REDSHIFT_TEST_HOST= REDSHIFT_TEST_USER= -REDSHIFT_TEST_PASS= +REDSHIFT_TEST_PASSWORD= REDSHIFT_TEST_PORT= REDSHIFT_TEST_DBNAME= diff --git a/test/integration/001_simple_copy_test/test_simple_copy.py b/test/integration/001_simple_copy_test/test_simple_copy.py index 281c64d1299..bbe9349e439 100644 --- a/test/integration/001_simple_copy_test/test_simple_copy.py +++ b/test/integration/001_simple_copy_test/test_simple_copy.py @@ -1,3 +1,4 @@ +from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, use_profile diff --git a/test/integration/004_simple_archive_test/invalidate_bigquery.sql b/test/integration/004_simple_archive_test/invalidate_bigquery.sql index 32ac3f0eceb..eab9c56999e 100644 --- a/test/integration/004_simple_archive_test/invalidate_bigquery.sql +++ b/test/integration/004_simple_archive_test/invalidate_bigquery.sql @@ -8,5 +8,5 @@ where id >= 10 and id <= 20; -- invalidate records 11 - 21 update {database}.{schema}.archive_expected set - dbt_valid_to = timestamp_add(updated_at, interval 1 hour) + valid_to = timestamp_add(updated_at, interval 1 hour) where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/invalidate_postgres.sql b/test/integration/004_simple_archive_test/invalidate_postgres.sql index 491afccfac2..30902625059 100644 --- a/test/integration/004_simple_archive_test/invalidate_postgres.sql +++ b/test/integration/004_simple_archive_test/invalidate_postgres.sql @@ -1,27 +1,12 @@ -- update records 11 - 21. Change email and updated_at field update {schema}.seed set - updated_at = updated_at + interval '1 hour', - email = 'new_' || email -where id >= 10 and id <= 20; + "updated_at" = "updated_at" + interval '1 hour', + "email" = 'new_' || "email" +where "id" >= 10 and "id" <= 20; -- invalidate records 11 - 21 update {schema}.archive_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; - - -update {schema}.archive_castillo_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; - - -update {schema}.archive_alvarez_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; - - -update {schema}.archive_kelly_expected set - dbt_valid_to = updated_at + interval '1 hour' -where id >= 10 and id <= 20; + "valid_to" = "updated_at" + interval '1 hour' +where "id" >= 10 and "id" <= 20; diff --git a/test/integration/004_simple_archive_test/invalidate_snowflake.sql b/test/integration/004_simple_archive_test/invalidate_snowflake.sql index 86e3a3c7a40..4e45144a611 100644 --- a/test/integration/004_simple_archive_test/invalidate_snowflake.sql +++ b/test/integration/004_simple_archive_test/invalidate_snowflake.sql @@ -1,12 +1,12 @@ -- update records 11 - 21. Change email and updated_at field update {database}.{schema}.seed set - updated_at = DATEADD(hour, 1, updated_at), - email = 'new_' || email -where id >= 10 and id <= 20; + "updated_at" = DATEADD(hour, 1, "updated_at"), + "email" = 'new_' || "email" +where "id" >= 10 and "id" <= 20; -- invalidate records 11 - 21 update {database}.{schema}.archive_expected set - dbt_valid_to = DATEADD(hour, 1, updated_at) -where id >= 10 and id <= 20; + "valid_to" = DATEADD(hour, 1, "updated_at") +where "id" >= 10 and "id" <= 20; diff --git a/test/integration/004_simple_archive_test/models/ref_archive.sql b/test/integration/004_simple_archive_test/models/ref_archive.sql deleted file mode 100644 index 5e92ea1da6e..00000000000 --- a/test/integration/004_simple_archive_test/models/ref_archive.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref('archive_actual') }} diff --git a/test/integration/004_simple_archive_test/seed.sql b/test/integration/004_simple_archive_test/seed.sql index 9edda7a3565..6113209244b 100644 --- a/test/integration/004_simple_archive_test/seed.sql +++ b/test/integration/004_simple_archive_test/seed.sql @@ -1,32 +1,32 @@ - create table {database}.{schema}.seed ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - updated_at TIMESTAMP WITHOUT TIME ZONE +create table {database}.{schema}.seed ( + "id" INTEGER, + "first_name" VARCHAR(50), + "last_name" VARCHAR(50), + "email" VARCHAR(50), + "gender" VARCHAR(50), + "ip_address" VARCHAR(20), + "updated_at" TIMESTAMP WITHOUT TIME ZONE ); create table {database}.{schema}.archive_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), + "id" INTEGER, + "first_name" VARCHAR(50), + "last_name" VARCHAR(50), + "email" VARCHAR(50), + "gender" VARCHAR(50), + "ip_address" VARCHAR(20), -- archival fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(255), - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE + "updated_at" TIMESTAMP WITHOUT TIME ZONE, + "valid_from" TIMESTAMP WITHOUT TIME ZONE, + "valid_to" TIMESTAMP WITHOUT TIME ZONE, + "scd_id" VARCHAR(255), + "dbt_updated_at" TIMESTAMP WITHOUT TIME ZONE ); -- seed inserts -insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +insert into {database}.{schema}.seed ("id", "first_name", "last_name", "email", "gender", "ip_address", "updated_at") values (1, 'Judith', 'Kennedy', 'jkennedy0@phpbb.com', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), (2, 'Arthur', 'Kelly', 'akelly1@eepurl.com', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), (3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), @@ -51,171 +51,30 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, -- populate archive table insert into {database}.{schema}.archive_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id + "id", + "first_name", + "last_name", + "email", + "gender", + "ip_address", + "updated_at", + "valid_from", + "valid_to", + "dbt_updated_at", + "scd_id" ) select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, + "id", + "first_name", + "last_name", + "email", + "gender", + "ip_address", + "updated_at", -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id + "updated_at" as valid_from, + null::timestamp as valid_to, + "updated_at" as dbt_updated_at, + md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as scd_id from {database}.{schema}.seed; - - - -create table {database}.{schema}.archive_castillo_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- archival fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(255), - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - --- one entry -insert into {database}.{schema}.archive_castillo_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed where last_name = 'Castillo'; - -create table {database}.{schema}.archive_alvarez_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- archival fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(255), - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - --- 0 entries -insert into {database}.{schema}.archive_alvarez_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed where last_name = 'Alvarez'; - -create table {database}.{schema}.archive_kelly_expected ( - id INTEGER, - first_name VARCHAR(50), - last_name VARCHAR(50), - email VARCHAR(50), - gender VARCHAR(50), - ip_address VARCHAR(20), - - -- archival fields - updated_at TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, - dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, - dbt_scd_id VARCHAR(255), - dbt_updated_at TIMESTAMP WITHOUT TIME ZONE -); - - --- 2 entries -insert into {database}.{schema}.archive_kelly_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed where last_name = 'Kelly'; diff --git a/test/integration/004_simple_archive_test/seed_bq.sql b/test/integration/004_simple_archive_test/seed_bq.sql index 9ef91799604..17b45f1e945 100644 --- a/test/integration/004_simple_archive_test/seed_bq.sql +++ b/test/integration/004_simple_archive_test/seed_bq.sql @@ -1,32 +1,32 @@ create table {database}.{schema}.seed ( - id INT64, - first_name STRING, - last_name STRING, - email STRING, - gender STRING, - ip_address STRING, - updated_at TIMESTAMP + `id` INT64, + `first_name` STRING, + `last_name` STRING, + `email` STRING, + `gender` STRING, + `ip_address` STRING, + `updated_at` TIMESTAMP ); create table {database}.{schema}.archive_expected ( - id INT64, - first_name STRING, - last_name STRING, - email STRING, - gender STRING, - ip_address STRING, + `id` INT64, + `first_name` STRING, + `last_name` STRING, + `email` STRING, + `gender` STRING, + `ip_address` STRING, -- archival fields - updated_at TIMESTAMP, - dbt_valid_from TIMESTAMP, - dbt_valid_to TIMESTAMP, - dbt_scd_id STRING, - dbt_updated_at TIMESTAMP + `updated_at` TIMESTAMP, + `valid_from` TIMESTAMP, + `valid_to` TIMESTAMP, + `scd_id` STRING, + `dbt_updated_at` TIMESTAMP ); -- seed inserts -insert {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +insert {database}.{schema}.seed (`id`, `first_name`, `last_name`, `email`, `gender`, `ip_address`, `updated_at`) values (1, 'Judith', 'Kennedy', 'jkennedy0@phpbb.com', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), (2, 'Arthur', 'Kelly', 'akelly1@eepurl.com', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), (3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), @@ -51,31 +51,31 @@ insert {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_ad -- populate archive table insert {database}.{schema}.archive_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id + `id`, + `first_name`, + `last_name`, + `email`, + `gender`, + `ip_address`, + `updated_at`, + `valid_from`, + `valid_to`, + `dbt_updated_at`, + `scd_id` ) select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, + `id`, + `first_name`, + `last_name`, + `email`, + `gender`, + `ip_address`, + `updated_at`, -- fields added by archival - updated_at as dbt_valid_from, - cast(null as timestamp) as dbt_valid_to, - updated_at as dbt_updated_at, - to_hex(md5(concat(cast(id as string), '-', first_name, '|', cast(updated_at as string)))) as dbt_scd_id + `updated_at` as valid_from, + cast(null as timestamp) as valid_to, + `updated_at` as dbt_updated_at, + to_hex(md5(concat(cast(`id` as string), '-', `first_name`, '|', cast(`updated_at` as string)))) as scd_id from {database}.{schema}.seed; diff --git a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql deleted file mode 100644 index d7dec9d043e..00000000000 --- a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% archive archive_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='concat(cast(id as string) , "-", first_name)', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from `{{database}}`.`{{schema}}`.seed - -{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql deleted file mode 100644 index 3bbe49664c1..00000000000 --- a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql +++ /dev/null @@ -1,12 +0,0 @@ -{% archive no_target_database %} - {{ - config( - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{database}}.{{schema}}.seed - -{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql deleted file mode 100644 index 9117a8df1a4..00000000000 --- a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql +++ /dev/null @@ -1,14 +0,0 @@ -{% archive archive_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{database}}.{{schema}}.seed - -{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_archive_test/test-archives-select/archives.sql deleted file mode 100644 index 30e78fe720d..00000000000 --- a/test/integration/004_simple_archive_test/test-archives-select/archives.sql +++ /dev/null @@ -1,45 +0,0 @@ -{% archive archive_castillo %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{database}}.{{schema}}.seed where last_name = 'Castillo' - -{% endarchive %} - -{% archive archive_alvarez %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{database}}.{{schema}}.seed where last_name = 'Alvarez' - -{% endarchive %} - - -{% archive archive_kelly %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='timestamp', - updated_at='updated_at', - ) - }} - select * from {{database}}.{{schema}}.seed where last_name = 'Kelly' - -{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql deleted file mode 100644 index 40a2563291f..00000000000 --- a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql +++ /dev/null @@ -1,27 +0,0 @@ -{% archive archive_actual %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='concat(cast(id as string) , "-", first_name)', - strategy='check', - check_cols=('email',), - ) - }} - select * from `{{database}}`.`{{schema}}`.seed -{% endarchive %} - - -{# This should be exactly the same #} -{% archive archive_checkall %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='concat(cast(id as string) , "-", first_name)', - strategy='check', - check_cols='all', - ) - }} - select * from `{{database}}`.`{{schema}}`.seed -{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql deleted file mode 100644 index c3ee6fe2038..00000000000 --- a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql +++ /dev/null @@ -1,28 +0,0 @@ -{% archive archive_actual %} - - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='check', - check_cols=['email'], - ) - }} - select * from {{database}}.{{schema}}.seed - -{% endarchive %} - -{# This should be exactly the same #} -{% archive archive_checkall %} - {{ - config( - target_database=var('target_database', database), - target_schema=schema, - unique_key='id || ' ~ "'-'" ~ ' || first_name', - strategy='check', - check_cols='all', - ) - }} - select * from {{database}}.{{schema}}.seed -{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index e2b36b53a28..b2ee1129511 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -1,9 +1,7 @@ -from test.integration.base import DBTIntegrationTest, use_profile -import dbt.exceptions - +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestSimpleArchive(DBTIntegrationTest): - NUM_ARCHIVE_MODELS = 1 @property def schema(self): @@ -13,9 +11,6 @@ def schema(self): def models(self): return "test/integration/004_simple_archive_test/models" - def run_archive(self): - return self.run_dbt(['archive']) - @property def project_config(self): source_table = 'seed' @@ -33,8 +28,8 @@ def project_config(self): { "source_table": source_table, "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": '''id || '-' || first_name''' + "updated_at": '"updated_at"', + "unique_key": '''"id" || '-' || "first_name"''' }, ], }, @@ -44,71 +39,63 @@ def project_config(self): def dbt_run_seed_archive(self): self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - - def assert_case_tables_equal(self, actual, expected): - if self.adapter_type == 'snowflake': - actual = actual.upper() - expected = expected.upper() - - self.assertTablesEqual(actual, expected) + results = self.run_dbt(["archive"]) + self.assertEqual(len(results), 1) - def assert_expected(self): - self.assert_case_tables_equal('archive_actual', 'archive_expected') - @use_profile('postgres') + @attr(type='postgres') def test__postgres__simple_archive(self): self.dbt_run_seed_archive() - self.assert_expected() + self.assertTablesEqual("archive_expected","archive_actual") self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + results = self.run_dbt(["archive"]) + self.assertEqual(len(results), 1) - self.assert_expected() + self.assertTablesEqual("archive_expected","archive_actual") - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__simple_archive(self): self.dbt_run_seed_archive() - self.assert_expected() + self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL") self.run_sql_file("test/integration/004_simple_archive_test/invalidate_snowflake.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + results = self.run_dbt(["archive"]) + self.assertEqual(len(results), 1) - self.assert_expected() + self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL") - @use_profile('redshift') + @attr(type='redshift') def test__redshift__simple_archive(self): self.dbt_run_seed_archive() - self.assert_expected() + self.assertTablesEqual("archive_expected","archive_actual") self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_archive() - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + results = self.run_dbt(["archive"]) + self.assertEqual(len(results), 1) - self.assert_expected() + self.assertTablesEqual("archive_expected","archive_actual") - @use_profile('presto') + @attr(type='presto') def test__presto__simple_archive_disabled(self): results = self.run_dbt(["seed"]) - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + self.assertEqual(len(results), 1) # presto does not run archives results = self.run_dbt(["archive"], expect_pass=False) - self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + self.assertEqual(len(results), 1) self.assertIn('not implemented for presto', results[0].error) + class TestSimpleArchiveBigquery(DBTIntegrationTest): @property @@ -138,10 +125,7 @@ def project_config(self): ] } - def assert_expected(self): - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('bigquery') + @attr(type='bigquery') def test__bigquery__simple_archive(self): self.use_default_project() self.use_profile('bigquery') @@ -150,17 +134,17 @@ def test__bigquery__simple_archive(self): self.run_dbt(["archive"]) - self.assert_expected() + self.assertTablesEqual("archive_expected", "archive_actual") self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") self.run_dbt(["archive"]) - self.assert_expected() + self.assertTablesEqual("archive_expected", "archive_actual") - @use_profile('bigquery') + @attr(type='bigquery') def test__bigquery__archive_with_new_field(self): self.use_default_project() self.use_profile('bigquery') @@ -222,8 +206,8 @@ def archive_project_config(self): return { "source_table": 'SEED', "target_table": "archive_actual", - "updated_at": 'updated_at', - "unique_key": '''id || '-' || first_name''' + "updated_at": '"updated_at"', + "unique_key": '''"id" || '-' || "first_name"''' } else: return { @@ -246,14 +230,11 @@ def project_config(self): ] } - def run_archive(self): - return self.run_dbt(['archive']) - - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__cross_archive(self): self.run_sql_file("test/integration/004_simple_archive_test/seed.sql") - results = self.run_archive() + results = self.run_dbt(["archive"]) self.assertEqual(len(results), 1) self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL", table_b_db=self.alternative_database) @@ -261,240 +242,23 @@ def test__snowflake__cross_archive(self): self.run_sql_file("test/integration/004_simple_archive_test/invalidate_snowflake.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_archive() + results = self.run_dbt(["archive"]) self.assertEqual(len(results), 1) self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL", table_b_db=self.alternative_database) - @use_profile('bigquery') + @attr(type='bigquery') def test__bigquery__cross_archive(self): self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - self.run_archive() + self.run_dbt(["archive"]) self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - self.run_archive() - - self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) - - -class TestSimpleArchiveFiles(TestSimpleArchive): - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], - } - - @use_profile('postgres') - def test__postgres_ref_archive(self): - self.dbt_run_seed_archive() - results = self.run_dbt(['run']) - self.assertEqual(len(results), 1) - - -class TestSimpleArchiveFileSelects(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-select', - 'test/integration/004_simple_archive_test/test-archives-pg'], - } - - @use_profile('postgres') - def test__postgres__select_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') - - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 4) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - - results = self.run_dbt(['archive']) - self.assertEqual(len(results), 4) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres_exclude_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') - results = self.run_dbt(['archive', '--exclude', 'archive_castillo']) - self.assertEqual(len(results), 3) - self.assertTableDoesNotExist('archive_castillo') - self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') - self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') - self.assertTablesEqual('archive_actual', 'archive_expected') - - @use_profile('postgres') - def test__postgres_select_archives(self): - self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') - results = self.run_dbt(['archive', '--models', 'archive_castillo']) - self.assertEqual(len(results), 1) - self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') - self.assertTableDoesNotExist('archive_alvarez') - self.assertTableDoesNotExist('archive_kelly') - self.assertTableDoesNotExist('archive_actual') - - -class TestSimpleArchiveFilesBigquery(TestSimpleArchiveBigquery): - @property - def project_config(self): - return { - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-bq'], - } - - -class TestCrossDBArchiveFiles(TestCrossDBArchive): - @property - def project_config(self): - if self.adapter_type == 'snowflake': - paths = ['test/integration/004_simple_archive_test/test-archives-pg'] - else: - paths = ['test/integration/004_simple_archive_test/test-archives-bq'] - return { - 'archive-paths': paths, - } - - def run_archive(self): - return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) - - -class TestBadArchive(DBTIntegrationTest): - @property - def schema(self): - return "simple_archive_004" - - @property - def models(self): - return "test/integration/004_simple_archive_test/models" - - @property - def project_config(self): - return { - "archive-paths": ['test/integration/004_simple_archive_test/test-archives-invalid'], - } - - @use_profile('postgres') - def test__postgres__invalid(self): - with self.assertRaises(dbt.exceptions.CompilationException) as exc: - self.run_dbt(['compile'], expect_pass=False) - - self.assertIn('target_database', str(exc.exception)) - - -class TestCheckCols(TestSimpleArchiveFiles): - NUM_ARCHIVE_MODELS = 2 - def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): - # When building the equality tests, only test columns that don't start - # with 'dbt_', because those are time-sensitive - if columns is None: - columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] - return super(TestCheckCols, self)._assertTablesEqualSql( - relation_a, - relation_b, - columns=columns - ) - - def assert_expected(self): - super(TestCheckCols, self).assert_expected() - self.assert_case_tables_equal('archive_checkall', 'archive_expected') - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-check-col-archives'], - } - - -class TestCheckColsBigquery(TestSimpleArchiveFilesBigquery): - def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): - # When building the equality tests, only test columns that don't start - # with 'dbt_', because those are time-sensitive - if columns is None: - columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] - return super(TestCheckColsBigquery, self)._assertTablesEqualSql( - relation_a, - relation_b, - columns=columns - ) - - def assert_expected(self): - super(TestCheckColsBigquery, self).assert_expected() - self.assertTablesEqual('archive_checkall', 'archive_expected') - - @property - def project_config(self): - return { - "data-paths": ['test/integration/004_simple_archive_test/data'], - "archive-paths": ['test/integration/004_simple_archive_test/test-check-col-archives-bq'], - } - - @use_profile('bigquery') - def test__bigquery__archive_with_new_field(self): - self.use_default_project() - self.use_profile('bigquery') - - self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - self.run_dbt(["archive"]) - self.assertTablesEqual("archive_expected", "archive_actual") - self.assertTablesEqual("archive_expected", "archive_checkall") - - self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") - self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - - # This adds new fields to the source table, and updates the expected archive output accordingly - self.run_sql_file("test/integration/004_simple_archive_test/add_column_to_source_bq.sql") - - # this should fail because `check="all"` will try to compare the nested field - self.run_dbt(['archive'], expect_pass=False) - - self.run_dbt(["archive", '-m', 'archive_actual']) - - # A more thorough test would assert that archived == expected, but BigQuery does not support the - # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. - - expected_cols = self.get_table_columns( - database=self.default_database, - schema=self.unique_schema(), - table='archive_expected' - ) - archived_cols = self.get_table_columns( - database=self.default_database, - schema=self.unique_schema(), - table='archive_actual' - ) - - self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") - self.assertEqual(len(expected_cols), len(archived_cols), "actual and expected column lengths are different") - - for (expected_col, actual_col) in zip(expected_cols, archived_cols): - expected_name, expected_type, _ = expected_col - actual_name, actual_type, _ = actual_col - self.assertTrue(expected_name is not None) - self.assertTrue(expected_type is not None) + self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) - self.assertEqual(expected_name, actual_name, "names are different") - self.assertEqual(expected_type, actual_type, "data types are different") diff --git a/test/integration/004_simple_archive_test/update.sql b/test/integration/004_simple_archive_test/update.sql index 0959cf9fa3f..f353e355875 100644 --- a/test/integration/004_simple_archive_test/update.sql +++ b/test/integration/004_simple_archive_test/update.sql @@ -1,130 +1,38 @@ -- insert v2 of the 11 - 21 records insert into {database}.{schema}.archive_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id + "id", + "first_name", + "last_name", + "email", + "gender", + "ip_address", + "updated_at", + "valid_from", + "valid_to", + "dbt_updated_at", + "scd_id" ) select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, + "id", + "first_name", + "last_name", + "email", + "gender", + "ip_address", + "updated_at", -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id + "updated_at" as "valid_from", + null::timestamp as "valid_to", + "updated_at" as "dbt_updated_at", + md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as "scd_id" from {database}.{schema}.seed -where id >= 10 and id <= 20; +where "id" >= 10 and "id" <= 20; -insert into {database}.{schema}.archive_castillo_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20 and last_name = 'Castillo'; - - -insert into {database}.{schema}.archive_alvarez_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20 and last_name = 'Alvarez'; - - -insert into {database}.{schema}.archive_kelly_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id >= 10 and id <= 20 and last_name = 'Kelly'; - -- insert 10 new records -insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +insert into {database}.{schema}.seed ("id", "first_name", "last_name", "email", "gender", "ip_address", "updated_at") values (21, 'Judy', 'Robinson', 'jrobinsonk@blogs.com', 'Female', '208.21.192.232', '2016-09-18 08:27:38'), (22, 'Kevin', 'Alvarez', 'kalvarezl@buzzfeed.com', 'Male', '228.106.146.9', '2016-07-29 03:07:37'), (23, 'Barbara', 'Carr', 'bcarrm@pen.io', 'Female', '106.165.140.17', '2015-09-24 13:27:23'), @@ -139,123 +47,31 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, -- add these new records to the archive table insert into {database}.{schema}.archive_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20; - - --- add these new records to the archive table -insert into {database}.{schema}.archive_castillo_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20 and last_name = 'Castillo'; - -insert into {database}.{schema}.archive_alvarez_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id -) - -select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id -from {database}.{schema}.seed -where id > 20 and last_name = 'Alvarez'; - -insert into {database}.{schema}.archive_kelly_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id + "id", + "first_name", + "last_name", + "email", + "gender", + "ip_address", + "updated_at", + "valid_from", + "valid_to", + "dbt_updated_at", + "scd_id" ) select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, + "id", + "first_name", + "last_name", + "email", + "gender", + "ip_address", + "updated_at", -- fields added by archival - updated_at as dbt_valid_from, - null::timestamp as dbt_valid_to, - updated_at as dbt_updated_at, - md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id + "updated_at" as "valid_from", + null::timestamp as "valid_to", + "updated_at" as "dbt_updated_at", + md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as "scd_id" from {database}.{schema}.seed -where id > 20 and last_name = 'Kelly'; +where "id" > 20; diff --git a/test/integration/004_simple_archive_test/update_bq.sql b/test/integration/004_simple_archive_test/update_bq.sql index aa56fb839a9..6ce7835ddd8 100644 --- a/test/integration/004_simple_archive_test/update_bq.sql +++ b/test/integration/004_simple_archive_test/update_bq.sql @@ -1,38 +1,38 @@ -- insert v2 of the 11 - 21 records insert {database}.{schema}.archive_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id + `id`, + `first_name`, + `last_name`, + `email`, + `gender`, + `ip_address`, + `updated_at`, + `valid_from`, + `valid_to`, + `dbt_updated_at`, + `scd_id` ) select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, + `id`, + `first_name`, + `last_name`, + `email`, + `gender`, + `ip_address`, + `updated_at`, -- fields added by archival - updated_at as dbt_valid_from, - cast(null as timestamp) as dbt_valid_to, - updated_at as dbt_updated_at, - to_hex(md5(concat(cast(id as string), '-', first_name, '|', cast(updated_at as string)))) as dbt_scd_id + `updated_at` as `valid_from`, + cast(null as timestamp) as `valid_to`, + `updated_at` as `dbt_updated_at`, + to_hex(md5(concat(cast(`id` as string), '-', `first_name`, '|', cast(`updated_at` as string)))) as `scd_id` from {database}.{schema}.seed -where id >= 10 and id <= 20; +where `id` >= 10 and `id` <= 20; -- insert 10 new records -insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values +insert into {database}.{schema}.seed (`id`, `first_name`, `last_name`, `email`, `gender`, `ip_address`, `updated_at`) values (21, 'Judy', 'Robinson', 'jrobinsonk@blogs.com', 'Female', '208.21.192.232', '2016-09-18 08:27:38'), (22, 'Kevin', 'Alvarez', 'kalvarezl@buzzfeed.com', 'Male', '228.106.146.9', '2016-07-29 03:07:37'), (23, 'Barbara', 'Carr', 'bcarrm@pen.io', 'Female', '106.165.140.17', '2015-09-24 13:27:23'), @@ -47,32 +47,32 @@ insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, -- add these new records to the archive table insert {database}.{schema}.archive_expected ( - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, - dbt_valid_from, - dbt_valid_to, - dbt_updated_at, - dbt_scd_id + `id`, + `first_name`, + `last_name`, + `email`, + `gender`, + `ip_address`, + `updated_at`, + `valid_from`, + `valid_to`, + `dbt_updated_at`, + `scd_id` ) select - id, - first_name, - last_name, - email, - gender, - ip_address, - updated_at, + `id`, + `first_name`, + `last_name`, + `email`, + `gender`, + `ip_address`, + `updated_at`, -- fields added by archival - updated_at as dbt_valid_from, - cast(null as timestamp) as dbt_valid_to, - updated_at as dbt_updated_at, - to_hex(md5(concat(cast(id as string), '-', first_name, '|', cast(updated_at as string)))) as dbt_scd_id + `updated_at` as `valid_from`, + cast(null as timestamp) as `valid_to`, + `updated_at` as `dbt_updated_at`, + to_hex(md5(concat(cast(`id` as string), '-', `first_name`, '|', cast(`updated_at` as string)))) as `scd_id` from {database}.{schema}.seed -where id > 20; +where `id` > 20; diff --git a/test/integration/005_simple_seed_test/test_seed_type_override.py b/test/integration/005_simple_seed_test/test_seed_type_override.py index e565d4b3bc2..ac585de6453 100644 --- a/test/integration/005_simple_seed_test/test_seed_type_override.py +++ b/test/integration/005_simple_seed_test/test_seed_type_override.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestSimpleSeedColumnOverride(DBTIntegrationTest): @@ -39,7 +40,7 @@ def seed_types(self): "birthday": "date", } - @use_profile('postgres') + @attr(type='postgres') def test_simple_seed_with_column_override_postgres(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -62,7 +63,7 @@ def seed_types(self): def profile_config(self): return self.snowflake_profile() - @use_profile('snowflake') + @attr(type='snowflake') def test_simple_seed_with_column_override_snowflake(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -85,7 +86,7 @@ def seed_types(self): def profile_config(self): return self.bigquery_profile() - @use_profile('bigquery') + @attr(type='bigquery') def test_simple_seed_with_column_override_bigquery(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) diff --git a/test/integration/005_simple_seed_test/test_simple_seed.py b/test/integration/005_simple_seed_test/test_simple_seed.py index 94f6fa86dc2..af3f4b01dfd 100644 --- a/test/integration/005_simple_seed_test/test_simple_seed.py +++ b/test/integration/005_simple_seed_test/test_simple_seed.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest from dbt.exceptions import CompilationException @@ -23,7 +24,7 @@ def project_config(self): "data-paths": ['test/integration/005_simple_seed_test/data'] } - @use_profile('postgres') + @attr(type='postgres') def test_simple_seed(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -36,7 +37,7 @@ def test_simple_seed(self): self.assertTablesEqual("seed_actual","seed_expected") - @use_profile('postgres') + @attr(type='postgres') def test_simple_seed_with_drop(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -71,7 +72,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test_simple_seed_with_schema(self): schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema') @@ -85,7 +86,7 @@ def test_simple_seed_with_schema(self): self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name) - @use_profile('postgres') + @attr(type='postgres') def test_simple_seed_with_drop_and_schema(self): schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema') @@ -125,7 +126,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test_simple_seed_with_disabled(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -152,7 +153,7 @@ def project_config(self): "data-paths": ['test/integration/005_simple_seed_test/data-bad'] } - @use_profile('postgres') + @attr(type='postgres') def test_postgres_dbt_run_skips_seeds(self): # run does not try to parse the seed files self.assertEqual(len(self.run_dbt(['run'])), 1) diff --git a/test/integration/006_simple_dependency_test/test_local_dependency.py b/test/integration/006_simple_dependency_test/test_local_dependency.py index abc22e5e2c4..17c4c50e16f 100644 --- a/test/integration/006_simple_dependency_test/test_local_dependency.py +++ b/test/integration/006_simple_dependency_test/test_local_dependency.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import mock import dbt.semver @@ -31,7 +32,7 @@ def base_schema(self): def configured_schema(self): return self.unique_schema() + '_configured' - @use_profile('postgres') + @attr(type='postgres') def test_postgres_local_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -61,16 +62,16 @@ def base_schema(self): def configured_schema(self): return 'configured_{}_macro'.format(self.unique_schema()) - @use_profile('postgres') + @attr(type='postgres') @mock.patch('dbt.config.project.get_installed_version') def test_postgres_local_dependency_out_of_date(self, mock_get): mock_get.return_value = dbt.semver.VersionSpecifier.from_version_string('0.0.1') self.run_dbt(['deps']) - with self.assertRaises(dbt.exceptions.DbtProjectError) as exc: + with self.assertRaises(dbt.exceptions.DbtProjectError) as e: self.run_dbt(['run']) - self.assertIn('--no-version-check', str(exc.exception)) + self.assertIn('--no-version-check', str(e.exception)) - @use_profile('postgres') + @attr(type='postgres') @mock.patch('dbt.config.project.get_installed_version') def test_postgres_local_dependency_out_of_date_no_check(self, mock_get): mock_get.return_value = dbt.semver.VersionSpecifier.from_version_string('0.0.1') diff --git a/test/integration/006_simple_dependency_test/test_simple_dependency.py b/test/integration/006_simple_dependency_test/test_simple_dependency.py index 1eac417cab8..e20b4b2b4eb 100644 --- a/test/integration/006_simple_dependency_test/test_simple_dependency.py +++ b/test/integration/006_simple_dependency_test/test_simple_dependency.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestSimpleDependency(DBTIntegrationTest): @@ -24,7 +25,7 @@ def packages_config(self): ] } - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -46,7 +47,7 @@ def test_simple_dependency(self): self.assertTablesEqual("seed","view_model") self.assertTablesEqual("seed","incremental") - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency_with_models(self): self.run_dbt(["deps"]) results = self.run_dbt(["run", '--models', 'view_model+']) @@ -104,7 +105,7 @@ def deps_run_assert_equality(self): self.assertEqual(created_models['view_summary'], 'view') self.assertEqual(created_models['incremental'], 'table') - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency(self): self.deps_run_assert_equality() @@ -114,7 +115,7 @@ def test_simple_dependency(self): self.deps_run_assert_equality() - @use_profile('postgres') + @attr(type='postgres') def test_empty_models_not_compiled_in_dependencies(self): self.deps_run_assert_equality() diff --git a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py index aa4877ef546..ae96afd7f41 100644 --- a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py +++ b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class BaseTestSimpleDependencyWithConfigs(DBTIntegrationTest): @@ -39,7 +40,7 @@ def project_config(self): }, } - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -82,7 +83,7 @@ def project_config(self): } - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -126,7 +127,7 @@ def project_config(self): } - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency(self): self.use_default_project() @@ -182,7 +183,7 @@ def project_config(self): } - @use_profile('postgres') + @attr(type='postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) diff --git a/test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql b/test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql deleted file mode 100644 index f539772cbb2..00000000000 --- a/test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql +++ /dev/null @@ -1,5 +0,0 @@ -{{ - config(materialized='table') -}} - -select * from {{ ref('users_rollup') }} diff --git a/test/integration/007_graph_selection_tests/test_graph_selection.py b/test/integration/007_graph_selection_tests/test_graph_selection.py index 5c830e360d5..7f6bfa87d73 100644 --- a/test/integration/007_graph_selection_tests/test_graph_selection.py +++ b/test/integration/007_graph_selection_tests/test_graph_selection.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestGraphSelection(DBTIntegrationTest): @@ -11,21 +12,22 @@ def models(self): return "test/integration/007_graph_selection_tests/models" def assert_correct_schemas(self): - with self.test_connection(): - exists = self.adapter.check_schema_exists( - self.default_database, - self.unique_schema() - ) - self.assertTrue(exists) - - schema = self.unique_schema()+'_and_then' - exists = self.adapter.check_schema_exists( - self.default_database, - schema - ) - self.assertFalse(exists) - - @use_profile('postgres') + exists = self.adapter.check_schema_exists( + self.default_database, + self.unique_schema(), + '__test' + ) + self.assertTrue(exists) + + schema = self.unique_schema()+'_and_then' + exists = self.adapter.check_schema_exists( + self.default_database, + schema, + '__test' + ) + self.assertFalse(exists) + + @attr(type='postgres') def test__postgres__specific_model(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -39,7 +41,7 @@ def test__postgres__specific_model(self): self.assertFalse('emails' in created_models) self.assert_correct_schemas() - @use_profile('postgres') + @attr(type='postgres') def test__postgres__tags(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -53,12 +55,12 @@ def test__postgres__tags(self): self.assertTrue('users_rollup' in created_models) self.assert_correct_schemas() - @use_profile('postgres') + @attr(type='postgres') def test__postgres__tags_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', 'tag:base+']) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 3) created_models = self.get_models_in_schema() self.assertFalse('base_users' in created_models) @@ -68,7 +70,7 @@ def test__postgres__tags_and_children(self): self.assertTrue('users' in created_models) self.assert_correct_schemas() - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__specific_model(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -82,12 +84,12 @@ def test__snowflake__specific_model(self): self.assertFalse('EMAILS' in created_models) self.assert_correct_schemas() - @use_profile('postgres') + @attr(type='postgres') def test__postgres__specific_model_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', 'users+']) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 3) self.assertTablesEqual("seed", "users") self.assertTablesEqual("summary_expected", "users_rollup") @@ -97,12 +99,12 @@ def test__postgres__specific_model_and_children(self): self.assertNotIn('emails', created_models) self.assert_correct_schemas() - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__specific_model_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', 'users+']) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 3) self.assertManyTablesEqual( ["SEED", "USERS"], @@ -113,7 +115,7 @@ def test__snowflake__specific_model_and_children(self): self.assertFalse('EMAILS' in created_models) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__specific_model_and_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -127,7 +129,7 @@ def test__postgres__specific_model_and_parents(self): self.assertFalse('emails' in created_models) self.assert_correct_schemas() - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__specific_model_and_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -144,7 +146,7 @@ def test__snowflake__specific_model_and_parents(self): self.assertFalse('EMAILS' in created_models) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__specific_model_with_exclusion(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -160,7 +162,7 @@ def test__postgres__specific_model_with_exclusion(self): self.assertFalse('emails' in created_models) self.assert_correct_schemas() - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__specific_model_with_exclusion(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -175,7 +177,7 @@ def test__snowflake__specific_model_with_exclusion(self): self.assertFalse('USERS_ROLLUP' in created_models) self.assertFalse('EMAILS' in created_models) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__locally_qualified_name(self): results = self.run_dbt(['run', '--models', 'test.subdir']) self.assertEqual(len(results), 2) @@ -188,11 +190,11 @@ def test__postgres__locally_qualified_name(self): self.assertIn('nested_users', created_models) self.assert_correct_schemas() - @use_profile('postgres') + @attr(type='postgres') def test__postgres__childrens_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '@base_users']) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 3) created_models = self.get_models_in_schema() self.assertIn('users_rollup', created_models) @@ -201,12 +203,12 @@ def test__postgres__childrens_parents(self): self.assertNotIn('subdir', created_models) self.assertNotIn('nested_users', created_models) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__more_childrens_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '@users']) - # base_users, emails, users_rollup, users_rollup_dependency, but not users (ephemeral) - self.assertEqual(len(results), 4) + # base_users, emails, users_rollup, but not users (ephemeral) + self.assertEqual(len(results), 3) created_models = self.get_models_in_schema() self.assertIn('users_rollup', created_models) @@ -214,24 +216,3 @@ def test__postgres__more_childrens_parents(self): self.assertIn('emails_alt', created_models) self.assertNotIn('subdir', created_models) self.assertNotIn('nested_users', created_models) - - @use_profile('snowflake') - def test__snowflake__skip_intermediate(self): - self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") - results = self.run_dbt(['run', '--models', '@users']) - # base_users, emails, users_rollup, users_rollup_dependency - self.assertEqual(len(results), 4) - - # now re-run, skipping users_rollup - results = self.run_dbt(['run', '--models', '@users', '--exclude', 'users_rollup']) - self.assertEqual(len(results), 3) - - # make sure that users_rollup_dependency and users don't interleave - users = [r for r in results if r.node.name == 'users'][0] - dep = [r for r in results if r.node.name == 'users_rollup_dependency'][0] - user_last_end = users.timing[1]['completed_at'] - dep_first_start = dep.timing[0]['started_at'] - self.assertTrue( - user_last_end < dep_first_start, - 'dependency started before its transitive parent ({} > {})'.format(user_last_end, dep_first_start) - ) diff --git a/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py b/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py index bc5971bd6de..3068f855f2f 100644 --- a/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py +++ b/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, FakeArgs from dbt.task.test import TestTask @@ -25,7 +26,7 @@ def run_schema_and_assert(self, include, exclude, expected_tests): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") self.run_dbt(["deps"]) results = self.run_dbt(['run', '--exclude', 'never_selected']) - self.assertEqual(len(results), 9) + self.assertEqual(len(results), 8) args = FakeArgs() args.models = include @@ -39,7 +40,7 @@ def run_schema_and_assert(self, include, exclude, expected_tests): self.assertEqual(ran_tests, expected_sorted) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_no_specifiers(self): self.run_schema_and_assert( None, @@ -50,7 +51,7 @@ def test__postgres__schema_tests_no_specifiers(self): 'unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_model(self): self.run_schema_and_assert( ['users'], @@ -58,7 +59,7 @@ def test__postgres__schema_tests_specify_model(self): ['unique_users_id'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_tag(self): self.run_schema_and_assert( ['tag:bi'], @@ -67,7 +68,7 @@ def test__postgres__schema_tests_specify_tag(self): 'unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_model_and_children(self): self.run_schema_and_assert( ['users+'], @@ -75,7 +76,7 @@ def test__postgres__schema_tests_specify_model_and_children(self): ['unique_users_id', 'unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_tag_and_children(self): self.run_schema_and_assert( ['tag:base+'], @@ -85,7 +86,7 @@ def test__postgres__schema_tests_specify_tag_and_children(self): 'unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_model_and_parents(self): self.run_schema_and_assert( ['+users_rollup'], @@ -93,7 +94,7 @@ def test__postgres__schema_tests_specify_model_and_parents(self): ['unique_users_id', 'unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_model_and_parents_with_exclude(self): self.run_schema_and_assert( ['+users_rollup'], @@ -101,7 +102,7 @@ def test__postgres__schema_tests_specify_model_and_parents_with_exclude(self): ['unique_users_id'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_exclude_only(self): self.run_schema_and_assert( None, @@ -109,7 +110,7 @@ def test__postgres__schema_tests_specify_exclude_only(self): ['unique_emails_email', 'unique_table_model_id', 'unique_users_id'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_specify_model_in_pkg(self): self.run_schema_and_assert( ['test.users_rollup'], @@ -119,7 +120,7 @@ def test__postgres__schema_tests_specify_model_in_pkg(self): ['unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_with_glob(self): self.run_schema_and_assert( ['*'], @@ -127,7 +128,7 @@ def test__postgres__schema_tests_with_glob(self): ['unique_emails_email', 'unique_table_model_id', 'unique_users_rollup_gender'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_dep_package_only(self): self.run_schema_and_assert( ['dbt_integration_project'], @@ -135,7 +136,7 @@ def test__postgres__schema_tests_dep_package_only(self): ['unique_table_model_id'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_model_in_dep_pkg(self): self.run_schema_and_assert( ['dbt_integration_project.table_model'], @@ -143,7 +144,7 @@ def test__postgres__schema_tests_model_in_dep_pkg(self): ['unique_table_model_id'] ) - @use_profile('postgres') + @attr(type='postgres') def test__postgres__schema_tests_exclude_pkg(self): self.run_schema_and_assert( None, diff --git a/test/integration/007_graph_selection_tests/test_tag_selection.py b/test/integration/007_graph_selection_tests/test_tag_selection.py index bd15dff6340..1eccf7252ed 100644 --- a/test/integration/007_graph_selection_tests/test_tag_selection.py +++ b/test/integration/007_graph_selection_tests/test_tag_selection.py @@ -42,7 +42,7 @@ def test__postgres__select_tag_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '+tag:specified_in_project+']) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 2) models_run = [r.node['name'] for r in results] self.assertTrue('users' in models_run) @@ -69,10 +69,8 @@ def test__postgres__select_tag_in_model_with_project_Config(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '@tag:users']) - self.assertEqual(len(results), 4) + self.assertEqual(len(results), 3) models_run = set(r.node['name'] for r in results) - self.assertEqual( - {'users', 'users_rollup', 'emails_alt', 'users_rollup_dependency'}, - models_run - ) + self.assertEqual({'users', 'users_rollup', 'emails_alt'}, models_run) + diff --git a/test/integration/008_schema_tests_test/ephemeral/ephemeral.sql b/test/integration/008_schema_tests_test/ephemeral/ephemeral.sql deleted file mode 100644 index c8e21355594..00000000000 --- a/test/integration/008_schema_tests_test/ephemeral/ephemeral.sql +++ /dev/null @@ -1,4 +0,0 @@ - -{{ config(materialized='ephemeral') }} - -select 1 as id diff --git a/test/integration/008_schema_tests_test/ephemeral/schema.yml b/test/integration/008_schema_tests_test/ephemeral/schema.yml deleted file mode 100644 index b394a95c221..00000000000 --- a/test/integration/008_schema_tests_test/ephemeral/schema.yml +++ /dev/null @@ -1,8 +0,0 @@ - -version: 2 -models: - - name: ephemeral - columns: - - name: id - tests: - - unique diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index 69836e42c44..9df0bf2b995 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -1,3 +1,4 @@ +from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile import os @@ -26,7 +27,7 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @use_profile('postgres') + @attr(type='postgres') def test_schema_tests(self): results = self.run_dbt() self.assertEqual(len(results), 5) @@ -76,7 +77,7 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @use_profile('postgres') + @attr(type='postgres') def test_malformed_schema_test_wont_brick_run(self): # dbt run should work (Despite broken schema test) results = self.run_dbt(strict=False) @@ -87,44 +88,12 @@ def test_malformed_schema_test_wont_brick_run(self): self.assertEqual(len(ran_tests), 5) self.assertEqual(sum(x.status for x in ran_tests), 0) - @use_profile('postgres') + @attr(type='postgres') def test_malformed_schema_strict_will_break_run(self): with self.assertRaises(CompilationException): self.run_dbt(strict=True) -class TestHooksInTests(DBTIntegrationTest): - - @property - def schema(self): - return "schema_tests_008" - - @property - def models(self): - # test ephemeral models so we don't need to do a run (which would fail) - return "test/integration/008_schema_tests_test/ephemeral" - - @property - def project_config(self): - return { - "on-run-start": ["{{ exceptions.raise_compiler_error('hooks called in tests -- error') if execute }}"], - "on-run-end": ["{{ exceptions.raise_compiler_error('hooks called in tests -- error') if execute }}"], - } - - @use_profile('postgres') - def test_hooks_dont_run_for_tests(self): - # This would fail if the hooks ran - results = self.run_dbt(['test', '--model', 'ephemeral']) - self.assertEqual(len(results), 1) - for result in results: - self.assertIsNone(result.error) - self.assertFalse(result.skipped) - # status = # of failing rows - self.assertEqual( - result.status, 0, - 'test {} failed'.format(result.node.get('name')) - ) - class TestCustomSchemaTests(DBTIntegrationTest): def setUp(self): @@ -168,7 +137,7 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @use_profile('postgres') + @attr(type='postgres') def test_schema_tests(self): self.run_dbt(["deps"]) results = self.run_dbt() diff --git a/test/integration/009_data_tests_test/test_data_tests.py b/test/integration/009_data_tests_test/test_data_tests.py index b7b934a5ba4..7cc5ba6f63c 100644 --- a/test/integration/009_data_tests_test/test_data_tests.py +++ b/test/integration/009_data_tests_test/test_data_tests.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, FakeArgs from dbt.task.test import TestTask import os @@ -29,7 +30,7 @@ def run_data_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @use_profile('postgres') + @attr(type='postgres') def test_postgres_data_tests(self): self.use_profile('postgres') @@ -58,7 +59,7 @@ def test_postgres_data_tests(self): self.assertNotEqual(len(test_results), 0) self.assertEqual(len(test_results), len(defined_tests)) - @use_profile('snowflake') + @attr(type='snowflake') def test_snowflake_data_tests(self): self.use_profile('snowflake') diff --git a/test/integration/010_permission_tests/test_permissions.py b/test/integration/010_permission_tests/test_permissions.py index d348569ca14..0602a8c40fb 100644 --- a/test/integration/010_permission_tests/test_permissions.py +++ b/test/integration/010_permission_tests/test_permissions.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestPermissions(DBTIntegrationTest): @@ -14,7 +15,7 @@ def schema(self): def models(self): return "test/integration/010_permission_tests/models" - @use_profile('postgres') + @attr(type='postgres') def test_no_create_schema_permissions(self): # the noaccess user does not have permissions to create a schema -- this should fail failed = False @@ -26,7 +27,7 @@ def test_no_create_schema_permissions(self): self.assertTrue(failed) - @use_profile('postgres') + @attr(type='postgres') def test_create_schema_permissions(self): # now it should work! self.run_sql('grant create on database {} to noaccess'.format(self.default_database)) diff --git a/test/integration/011_invalid_model_tests/test_invalid_models.py b/test/integration/011_invalid_model_tests/test_invalid_models.py index bbb9850d2df..ae2ee46c0eb 100644 --- a/test/integration/011_invalid_model_tests/test_invalid_models.py +++ b/test/integration/011_invalid_model_tests/test_invalid_models.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest from dbt.exceptions import ValidationException @@ -18,7 +19,7 @@ def schema(self): def models(self): return "test/integration/011_invalid_model_tests/models-2" - @use_profile('postgres') + @attr(type='postgres') def test_view_with_incremental_attributes(self): try: @@ -44,7 +45,7 @@ def schema(self): def models(self): return "test/integration/011_invalid_model_tests/models-3" - @use_profile('postgres') + @attr(type='postgres') def test_view_with_incremental_attributes(self): try: diff --git a/test/integration/012_deprecation_tests/models/sql_where.sql b/test/integration/012_deprecation_tests/models/sql_where.sql new file mode 100644 index 00000000000..34ca3c36464 --- /dev/null +++ b/test/integration/012_deprecation_tests/models/sql_where.sql @@ -0,0 +1,3 @@ +{{ config(sql_where='id > (select max(id) from {{this}})')}} + +select 1 as id diff --git a/test/integration/012_deprecation_tests/test_deprecations.py b/test/integration/012_deprecation_tests/test_deprecations.py index 9f9e3544019..2642d9acefb 100644 --- a/test/integration/012_deprecation_tests/test_deprecations.py +++ b/test/integration/012_deprecation_tests/test_deprecations.py @@ -29,6 +29,6 @@ def test_postgres_deprecations_fail(self): @use_profile('postgres') def test_postgres_deprecations(self): self.assertEqual(deprecations.active_deprecations, set()) - self.run_dbt(strict=False) - self.assertEqual({'adapter:already_exists'}, + results = self.run_dbt(strict=False) + self.assertEqual({'adapter:already_exists', 'sql_where'}, deprecations.active_deprecations) diff --git a/test/integration/013_context_var_tests/test_context_vars.py b/test/integration/013_context_var_tests/test_context_vars.py index 4fd789c198d..7c93df337ed 100644 --- a/test/integration/013_context_var_tests/test_context_vars.py +++ b/test/integration/013_context_var_tests/test_context_vars.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import os @@ -49,7 +50,7 @@ def profile_config(self): 'dev': { 'type': 'postgres', 'threads': 1, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': "root", 'pass': "password", @@ -59,7 +60,7 @@ def profile_config(self): 'prod': { 'type': 'postgres', 'threads': 1, - 'host': self.database_host, + 'host': 'database', 'port': 5432, # root/password 'user': "{{ env_var('DBT_TEST_013_USER') }}", @@ -83,7 +84,7 @@ def get_ctx_vars(self): return ctx - @use_profile('postgres') + @attr(type='postgres') def test_env_vars_dev(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) @@ -98,7 +99,7 @@ def test_env_vars_dev(self): self.assertEqual(ctx['this.table'], 'context') self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], self.database_host) + self.assertEqual(ctx['target.host'], 'database') self.assertEqual(ctx['target.name'], 'dev') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) @@ -109,7 +110,7 @@ def test_env_vars_dev(self): self.assertEqual(ctx['env_var'], '1') - @use_profile('postgres') + @attr(type='postgres') def test_env_vars_prod(self): results = self.run_dbt(['run', '--target', 'prod']) self.assertEqual(len(results), 1) @@ -124,7 +125,7 @@ def test_env_vars_prod(self): self.assertEqual(ctx['this.table'], 'context') self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], self.database_host) + self.assertEqual(ctx['target.host'], 'database') self.assertEqual(ctx['target.name'], 'prod') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) diff --git a/test/integration/014_hook_tests/test_model_hooks.py b/test/integration/014_hook_tests/test_model_hooks.py index c9f01756269..0233b6155fb 100644 --- a/test/integration/014_hook_tests/test_model_hooks.py +++ b/test/integration/014_hook_tests/test_model_hooks.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest from dbt.exceptions import CompilationException @@ -102,7 +103,7 @@ def check_hooks(self, state, count=1): for ctx in ctxs: self.assertEqual(ctx['state'], state) self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], self.database_host) + self.assertEqual(ctx['target.host'], 'database') self.assertEqual(ctx['target.name'], 'default2') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) @@ -145,7 +146,7 @@ def project_config(self): def models(self): return "test/integration/014_hook_tests/models" - @use_profile('postgres') + @attr(type='postgres') def test_postgres_pre_and_post_model_hooks(self): self.run_dbt(['run']) @@ -175,7 +176,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test_postgres_hooks_on_seeds(self): res = self.run_dbt(['seed']) self.assertEqual(len(res), 1, 'Expected exactly one item') @@ -194,14 +195,14 @@ def project_config(self): def models(self): return "test/integration/014_hook_tests/configured-models" - @use_profile('postgres') + @attr(type='postgres') def test_postgres_pre_and_post_model_hooks_model(self): self.run_dbt(['run']) self.check_hooks('start') self.check_hooks('end') - @use_profile('postgres') + @attr(type='postgres') def test_postgres_pre_and_post_model_hooks_model_and_project(self): self.use_default_project({ 'models': { @@ -246,10 +247,10 @@ def schema(self): def models(self): return "test/integration/014_hook_tests/error-models" - @use_profile('postgres') + @attr(type='postgres') def test_postgres_run_duplicate_hook_defs(self): with self.assertRaises(CompilationException) as exc: self.run_dbt(['run']) - self.assertIn('pre_hook', str(exc.exception)) - self.assertIn('pre-hook', str(exc.exception)) + self.assertIn('pre_hook', str(exc.exception)) + self.assertIn('pre-hook', str(exc.exception)) diff --git a/test/integration/014_hook_tests/test_model_hooks_bq.py b/test/integration/014_hook_tests/test_model_hooks_bq.py index 95c55d9cd6d..d9c81b212f5 100644 --- a/test/integration/014_hook_tests/test_model_hooks_bq.py +++ b/test/integration/014_hook_tests/test_model_hooks_bq.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest MODEL_PRE_HOOK = """ insert into {{this.schema}}.on_model_hook ( @@ -105,7 +106,7 @@ def check_hooks(self, state): self.assertTrue(ctx['run_started_at'] is not None and len(ctx['run_started_at']) > 0, 'run_started_at was not set') self.assertTrue(ctx['invocation_id'] is not None and len(ctx['invocation_id']) > 0, 'invocation_id was not set') - @use_profile('bigquery') + @attr(type='bigquery') def test_pre_and_post_model_hooks_bigquery(self): self.run_dbt(['run']) @@ -134,7 +135,7 @@ def project_config(self): } } - @use_profile('bigquery') + @attr(type='bigquery') def test_hooks_on_seeds_bigquery(self): res = self.run_dbt(['seed']) self.assertEqual(len(res), 1, 'Expected exactly one item') diff --git a/test/integration/014_hook_tests/test_run_hooks.py b/test/integration/014_hook_tests/test_run_hooks.py index e3af9babdd8..a92790c8473 100644 --- a/test/integration/014_hook_tests/test_run_hooks.py +++ b/test/integration/014_hook_tests/test_run_hooks.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestPrePostRunHooks(DBTIntegrationTest): @@ -75,7 +76,7 @@ def check_hooks(self, state): self.assertEqual(ctx['state'], state) self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], self.database_host) + self.assertEqual(ctx['target.host'], 'database') self.assertEqual(ctx['target.name'], 'default2') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) @@ -87,7 +88,7 @@ def check_hooks(self, state): self.assertTrue(ctx['run_started_at'] is not None and len(ctx['run_started_at']) > 0, 'run_started_at was not set') self.assertTrue(ctx['invocation_id'] is not None and len(ctx['invocation_id']) > 0, 'invocation_id was not set') - @use_profile('postgres') + @attr(type='postgres') def test__postgres__pre_and_post_run_hooks(self): self.run_dbt(['run']) @@ -98,7 +99,7 @@ def test__postgres__pre_and_post_run_hooks(self): self.assertTableDoesNotExist("end_hook_order_test") self.assert_used_schemas() - @use_profile('postgres') + @attr(type='postgres') def test__postgres__pre_and_post_seed_hooks(self): self.run_dbt(['seed']) diff --git a/test/integration/014_hook_tests/test_run_hooks_bq.py b/test/integration/014_hook_tests/test_run_hooks_bq.py index 46d5829d1f2..5dc05e57faa 100644 --- a/test/integration/014_hook_tests/test_run_hooks_bq.py +++ b/test/integration/014_hook_tests/test_run_hooks_bq.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestBigqueryPrePostRunHooks(DBTIntegrationTest): @@ -77,7 +78,7 @@ def check_hooks(self, state): self.assertTrue(ctx['run_started_at'] is not None and len(ctx['run_started_at']) > 0, 'run_started_at was not set') self.assertTrue(ctx['invocation_id'] is not None and len(ctx['invocation_id']) > 0, 'invocation_id was not set') - @use_profile('bigquery') + @attr(type='bigquery') def test_bigquery_pre_and_post_run_hooks(self): self.run_dbt(['run']) @@ -87,7 +88,7 @@ def test_bigquery_pre_and_post_run_hooks(self): self.assertTableDoesNotExist("start_hook_order_test") self.assertTableDoesNotExist("end_hook_order_test") - @use_profile('bigquery') + @attr(type='bigquery') def test_bigquery_pre_and_post_seed_hooks(self): self.run_dbt(['seed']) diff --git a/test/integration/015_cli_invocation_tests/test_cli_invocation.py b/test/integration/015_cli_invocation_tests/test_cli_invocation.py index f670a298be7..8a43eba8794 100644 --- a/test/integration/015_cli_invocation_tests/test_cli_invocation.py +++ b/test/integration/015_cli_invocation_tests/test_cli_invocation.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, DBT_PROFILES, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, DBT_PROFILES import os, shutil, yaml class TestCLIInvocation(DBTIntegrationTest): @@ -16,13 +17,13 @@ def schema(self): def models(self): return "test/integration/015_cli_invocation_tests/models" - @use_profile('postgres') + @attr(type='postgres') def test_toplevel_dbt_run(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) self.assertTablesEqual("seed", "model") - @use_profile('postgres') + @attr(type='postgres') def test_subdir_dbt_run(self): os.chdir(os.path.join(self.models, "subdir1")) @@ -61,7 +62,7 @@ def custom_profile_config(self): 'default': { 'type': 'postgres', 'threads': 1, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -85,7 +86,7 @@ def custom_schema(self): def models(self): return "test/integration/015_cli_invocation_tests/models" - @use_profile('postgres') + @attr(type='postgres') def test_toplevel_dbt_run_with_profile_dir_arg(self): results = self.run_dbt(['run', '--profiles-dir', 'dbt-profile']) self.assertEqual(len(results), 1) diff --git a/test/integration/016_macro_tests/test_macros.py b/test/integration/016_macro_tests/test_macros.py index e0cf958c1f9..a2164c9c62f 100644 --- a/test/integration/016_macro_tests/test_macros.py +++ b/test/integration/016_macro_tests/test_macros.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestMacros(DBTIntegrationTest): @@ -34,7 +35,7 @@ def project_config(self): "macro-paths": ["test/integration/016_macro_tests/macros"], } - @use_profile('postgres') + @attr(type='postgres') def test_working_macros(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -63,7 +64,7 @@ def project_config(self): "macro-paths": ["test/integration/016_macro_tests/bad-macros"] } - @use_profile('postgres') + @attr(type='postgres') def test_invalid_macro(self): try: @@ -106,7 +107,7 @@ def project_config(self): # fails, it does not raise a runtime exception. change this test to verify # that the model finished with ERROR state. # - # @use_profile('postgres') + # @attr(type='postgres') # def test_working_macros(self): # self.run_dbt(["deps"]) diff --git a/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py b/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py index 977911d2d4c..42dfc64332e 100644 --- a/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py +++ b/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestRuntimeMaterialization(DBTIntegrationTest): @@ -18,7 +19,7 @@ def schema(self): def models(self): return "test/integration/017_runtime_materialization_tests/models" - @use_profile('postgres') + @attr(type='postgres') def test_postgres_full_refresh(self): # initial full-refresh should have no effect results = self.run_dbt(['run', '--full-refresh']) @@ -43,7 +44,7 @@ def test_postgres_full_refresh(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @use_profile('postgres') + @attr(type='postgres') def test_postgres_non_destructive(self): results = self.run_dbt(['run', '--non-destructive']) self.assertEqual(len(results), 3) @@ -63,7 +64,7 @@ def test_postgres_non_destructive(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @use_profile('postgres') + @attr(type='postgres') def test_postgres_full_refresh_and_non_destructive(self): results = self.run_dbt(['run', '--full-refresh', '--non-destructive']) self.assertEqual(len(results), 3) @@ -84,7 +85,7 @@ def test_postgres_full_refresh_and_non_destructive(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @use_profile('postgres') + @attr(type='postgres') def test_postgres_delete__dbt_tmp_relation(self): # This creates a __dbt_tmp view - make sure it doesn't interfere with the dbt run self.run_sql_file("test/integration/017_runtime_materialization_tests/create_view__dbt_tmp.sql") @@ -95,7 +96,7 @@ def test_postgres_delete__dbt_tmp_relation(self): self.assertTablesEqual("seed","view") - @use_profile('snowflake') + @attr(type='snowflake') def test_snowflake_backup_different_type(self): self.run_sql_file( 'test/integration/017_runtime_materialization_tests/create_backup_and_original.sql' diff --git a/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py b/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py index caebb436a55..274bc761d13 100644 --- a/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py +++ b/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestAdapterDDL(DBTIntegrationTest): @@ -15,7 +16,7 @@ def schema(self): def models(self): return "test/integration/018_adapter_ddl_tests/models" - @use_profile('postgres') + @attr(type='postgres') def test_sort_and_dist_keys_are_nops_on_postgres(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) diff --git a/test/integration/019_analysis_tests/test_analyses.py b/test/integration/019_analysis_tests/test_analyses.py index 53948d9656e..5a69d7aace5 100644 --- a/test/integration/019_analysis_tests/test_analyses.py +++ b/test/integration/019_analysis_tests/test_analyses.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import os @@ -25,7 +26,7 @@ def assert_contents_equal(self, path, expected): with open(path) as fp: self.assertEqual(fp.read().strip(), expected) - @use_profile('postgres') + @attr(type='postgres') def test_analyses(self): compiled_analysis_path = os.path.normpath('target/compiled/test/analysis') path_1 = os.path.join(compiled_analysis_path, 'analysis.sql') diff --git a/test/integration/020_ephemeral_test/test_ephemeral.py b/test/integration/020_ephemeral_test/test_ephemeral.py index bb749ed3424..4d97097bde2 100644 --- a/test/integration/020_ephemeral_test/test_ephemeral.py +++ b/test/integration/020_ephemeral_test/test_ephemeral.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestEphemeral(DBTIntegrationTest): @@ -10,7 +11,7 @@ def schema(self): def models(self): return "test/integration/020_ephemeral_test/models" - @use_profile('postgres') + @attr(type='postgres') def test__postgres(self): self.run_sql_file("test/integration/020_ephemeral_test/seed.sql") @@ -21,7 +22,7 @@ def test__postgres(self): self.assertTablesEqual("seed", "double_dependent") self.assertTablesEqual("seed", "super_dependent") - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake(self): self.run_sql_file("test/integration/020_ephemeral_test/seed.sql") @@ -41,7 +42,7 @@ def schema(self): def models(self): return "test/integration/020_ephemeral_test/ephemeral-errors" - @use_profile('postgres') + @attr(type='postgres') def test__postgres_upstream_error(self): self.run_sql_file("test/integration/020_ephemeral_test/seed.sql") diff --git a/test/integration/021_concurrency_test/test_concurrency.py b/test/integration/021_concurrency_test/test_concurrency.py index 75758237140..a2e5d497007 100644 --- a/test/integration/021_concurrency_test/test_concurrency.py +++ b/test/integration/021_concurrency_test/test_concurrency.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestConcurrency(DBTIntegrationTest): @@ -10,7 +11,7 @@ def schema(self): def models(self): return "test/integration/021_concurrency_test/models" - @use_profile('postgres') + @attr(type='postgres') def test__postgres__concurrency(self): self.run_sql_file("test/integration/021_concurrency_test/seed.sql") @@ -36,7 +37,7 @@ def test__postgres__concurrency(self): self.assertTableDoesNotExist("invalid") self.assertTableDoesNotExist("skip") - @use_profile('snowflake') + @attr(type='snowflake') def test__snowflake__concurrency(self): self.run_sql_file("test/integration/021_concurrency_test/seed.sql") diff --git a/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py b/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py index c0576b7b346..0c76fa8e6c7 100644 --- a/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py +++ b/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, FakeArgs class TestBigqueryAdapterFunctions(DBTIntegrationTest): @@ -15,7 +16,7 @@ def models(self): def profile_config(self): return self.bigquery_profile() - @use_profile('bigquery') + @attr(type='bigquery') def test__bigquery_adapter_functions(self): results = self.run_dbt() self.assertEqual(len(results), 3) diff --git a/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py b/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py index f040cc87960..cd72a6713cf 100644 --- a/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py +++ b/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, FakeArgs class TestBigqueryDatePartitioning(DBTIntegrationTest): @@ -15,7 +16,7 @@ def models(self): def profile_config(self): return self.bigquery_profile() - @use_profile('bigquery') + @attr(type='bigquery') def test__bigquery_date_partitioning(self): results = self.run_dbt() self.assertEqual(len(results), 6) diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index bc54e01aa57..bda5a5dee9f 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest, FakeArgs import dbt.exceptions @@ -32,21 +33,21 @@ def project_config(self): ] } - @use_profile('postgres') + @attr(type='postgres') def test_exit_code_run_succeed(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertEqual(len(results), 1) self.assertTrue(success) self.assertTableDoesExist('good') - @use_profile('postgres') + @attr(type='postgres') def test__exit_code_run_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'bad']) self.assertEqual(len(results), 1) self.assertFalse(success) self.assertTableDoesNotExist('bad') - @use_profile('postgres') + @attr(type='postgres') def test___schema_test_pass(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertEqual(len(results), 1) @@ -55,7 +56,7 @@ def test___schema_test_pass(self): self.assertEqual(len(results), 1) self.assertTrue(success) - @use_profile('postgres') + @attr(type='postgres') def test___schema_test_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'dupe']) self.assertEqual(len(results), 1) @@ -64,13 +65,13 @@ def test___schema_test_fail(self): self.assertEqual(len(results), 1) self.assertFalse(success) - @use_profile('postgres') + @attr(type='postgres') def test___compile(self): results, success = self.run_dbt_and_check(['compile']) self.assertEqual(len(results), 7) self.assertTrue(success) - @use_profile('postgres') + @attr(type='postgres') def test___archive_pass(self): self.run_dbt_and_check(['run', '--model', 'good']) results, success = self.run_dbt_and_check(['archive']) @@ -107,7 +108,7 @@ def project_config(self): ] } - @use_profile('postgres') + @attr(type='postgres') def test___archive_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertTrue(success) @@ -136,7 +137,7 @@ def packages_config(self): ] } - @use_profile('postgres') + @attr(type='postgres') def test_deps(self): _, success = self.run_dbt_and_check(['deps']) self.assertTrue(success) @@ -162,7 +163,7 @@ def packages_config(self): ] } - @use_profile('postgres') + @attr(type='postgres') def test_deps(self): # this should fail try: @@ -186,7 +187,7 @@ def project_config(self): "data-paths": ['test/integration/023_exit_codes_test/data-good'] } - @use_profile('postgres') + @attr(type='postgres') def test_seed(self): results, success = self.run_dbt_and_check(['seed']) self.assertEqual(len(results), 1) @@ -207,7 +208,7 @@ def project_config(self): "data-paths": ['test/integration/023_exit_codes_test/data-bad'] } - @use_profile('postgres') + @attr(type='postgres') def test_seed(self): try: _, success = self.run_dbt_and_check(['seed']) diff --git a/test/integration/024_custom_schema_test/models/view_3.sql b/test/integration/024_custom_schema_test/models/view_3.sql index 33931704248..c208e5d32df 100644 --- a/test/integration/024_custom_schema_test/models/view_3.sql +++ b/test/integration/024_custom_schema_test/models/view_3.sql @@ -1,5 +1,5 @@ -{{ config(schema='test', materialized='table') }} +{{ config(schema='test') }} with v1 as ( diff --git a/test/integration/024_custom_schema_test/test_custom_schema.py b/test/integration/024_custom_schema_test/test_custom_schema.py index b7b3ed752ea..4a64d7e419f 100644 --- a/test/integration/024_custom_schema_test/test_custom_schema.py +++ b/test/integration/024_custom_schema_test/test_custom_schema.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestCustomSchema(DBTIntegrationTest): @@ -11,7 +12,7 @@ def schema(self): def models(self): return "test/integration/024_custom_schema_test/models" - @use_profile('postgres') + @attr(type='postgres') def test__postgres__custom_schema_no_prefix(self): self.use_default_project() self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") @@ -46,7 +47,7 @@ def profile_config(self): 'my-target': { 'type': 'postgres', 'threads': 1, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -66,7 +67,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test__postgres__custom_schema_with_prefix(self): self.use_default_project() self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") @@ -84,42 +85,6 @@ def test__postgres__custom_schema_with_prefix(self): self.assertTablesEqual("agg","view_3", schema, xf_schema) -class TestCustomProjectSchemaWithPrefixSnowflake(DBTIntegrationTest): - - @property - def schema(self): - return "custom_schema_024" - - @property - def models(self): - return "test/integration/024_custom_schema_test/models" - - @property - def project_config(self): - return { - "models": { - "schema": "dbt_test" - } - } - - @use_profile('snowflake') - def test__snowflake__custom_schema_with_prefix(self): - self.use_default_project() - self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") - - results = self.run_dbt() - self.assertEqual(len(results), 3) - - schema = self.unique_schema().upper() - v1_schema = "{}_DBT_TEST".format(schema) - v2_schema = "{}_CUSTOM".format(schema) - xf_schema = "{}_TEST".format(schema) - - self.assertTablesEqual("SEED","VIEW_1", schema, v1_schema) - self.assertTablesEqual("SEED","VIEW_2", schema, v2_schema) - self.assertTablesEqual("AGG","VIEW_3", schema, xf_schema) - - class TestCustomSchemaWithCustomMacro(DBTIntegrationTest): @property @@ -138,7 +103,7 @@ def profile_config(self): 'prod': { 'type': 'postgres', 'threads': 1, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -159,7 +124,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test__postgres__custom_schema_from_macro(self): self.use_default_project() self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") diff --git a/test/integration/025_duplicate_model_test/test_duplicate_model.py b/test/integration/025_duplicate_model_test/test_duplicate_model.py index 5d438640ac1..1aa52693293 100644 --- a/test/integration/025_duplicate_model_test/test_duplicate_model.py +++ b/test/integration/025_duplicate_model_test/test_duplicate_model.py @@ -1,5 +1,7 @@ +from nose.plugins.attrib import attr + from dbt.exceptions import CompilationException -from test.integration.base import DBTIntegrationTest, use_profile +from test.integration.base import DBTIntegrationTest class TestDuplicateModelEnabled(DBTIntegrationTest): @@ -20,7 +22,7 @@ def profile_config(self): "dev": { "type": "postgres", "threads": 1, - "host": self.database_host, + "host": "database", "port": 5432, "user": "root", "pass": "password", @@ -32,7 +34,7 @@ def profile_config(self): } } - @use_profile("postgres") + @attr(type="postgres") def test_duplicate_model_enabled(self): message = "dbt found two resources with the name" try: @@ -60,7 +62,7 @@ def profile_config(self): "dev": { "type": "postgres", "threads": 1, - "host": self.database_host, + "host": "database", "port": 5432, "user": "root", "pass": "password", @@ -72,7 +74,7 @@ def profile_config(self): } } - @use_profile("postgres") + @attr(type="postgres") def test_duplicate_model_disabled(self): try: results = self.run_dbt(["run"]) @@ -107,7 +109,7 @@ def packages_config(self): ], } - @use_profile("postgres") + @attr(type="postgres") def test_duplicate_model_enabled_across_packages(self): self.run_dbt(["deps"]) message = "dbt found two resources with the name" @@ -143,7 +145,7 @@ def packages_config(self): ], } - @use_profile("postgres") + @attr(type="postgres") def test_duplicate_model_disabled_across_packages(self): self.run_dbt(["deps"]) try: diff --git a/test/integration/025_timezones_test/test_timezones.py b/test/integration/025_timezones_test/test_timezones.py index 188a145ac59..ed1ce6a9023 100644 --- a/test/integration/025_timezones_test/test_timezones.py +++ b/test/integration/025_timezones_test/test_timezones.py @@ -1,5 +1,6 @@ from freezegun import freeze_time -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestTimezones(DBTIntegrationTest): @@ -20,7 +21,7 @@ def profile_config(self): 'dev': { 'type': 'postgres', 'threads': 1, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': "root", 'pass': "password", @@ -42,7 +43,7 @@ def query(self): """.format(schema=self.unique_schema()) @freeze_time("2017-01-01 03:00:00", tz_offset=0) - @use_profile('postgres') + @attr(type='postgres') def test_run_started_at(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) diff --git a/test/integration/026_aliases_test/test_aliases.py b/test/integration/026_aliases_test/test_aliases.py index b53d3680434..418b799cc63 100644 --- a/test/integration/026_aliases_test/test_aliases.py +++ b/test/integration/026_aliases_test/test_aliases.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestAliases(DBTIntegrationTest): @@ -26,19 +27,19 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test__alias_model_name(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 4) self.run_dbt(['test']) - @use_profile('bigquery') + @attr(type='bigquery') def test__alias_model_name_bigquery(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 4) self.run_dbt(['test']) - @use_profile('snowflake') + @attr(type='snowflake') def test__alias_model_name_snowflake(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 4) @@ -59,7 +60,7 @@ def project_config(self): "macro-paths": ['test/integration/026_aliases_test/macros'], } - @use_profile('postgres') + @attr(type='postgres') def test__alias_dupe_throws_exception(self): message = ".*identical database representation.*" with self.assertRaisesRegexp(Exception, message): @@ -80,7 +81,7 @@ def project_config(self): "macro-paths": ['test/integration/026_aliases_test/macros'], } - @use_profile('postgres') + @attr(type='postgres') def test__same_alias_succeeds_in_different_schemas(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 3) diff --git a/test/integration/027_cycle_test/test_cycles.py b/test/integration/027_cycle_test/test_cycles.py index 10a3ddb82fc..91fd22705cb 100644 --- a/test/integration/027_cycle_test/test_cycles.py +++ b/test/integration/027_cycle_test/test_cycles.py @@ -1,5 +1,6 @@ from freezegun import freeze_time -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class TestSimpleCycle(DBTIntegrationTest): @@ -13,7 +14,7 @@ def models(self): return "test/integration/027_cycle_test/simple_cycle_models" @property - @use_profile('postgres') + @attr(type='postgres') def test_simple_cycle(self): message = "Found a cycle.*" with self.assertRaisesRegexp(Exception, message): @@ -30,7 +31,7 @@ def models(self): return "test/integration/027_cycle_test/complex_cycle_models" @property - @use_profile('postgres') + @attr(type='postgres') def test_simple_cycle(self): message = "Found a cycle.*" with self.assertRaisesRegexp(Exception, message): diff --git a/test/integration/028_cli_vars/test_cli_var_override.py b/test/integration/028_cli_vars/test_cli_var_override.py index 158cd9830fe..0a2451118d0 100644 --- a/test/integration/028_cli_vars/test_cli_var_override.py +++ b/test/integration/028_cli_vars/test_cli_var_override.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import yaml @@ -21,7 +22,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test__overriden_vars_global(self): self.use_default_project() self.use_profile('postgres') @@ -52,7 +53,7 @@ def project_config(self): } } - @use_profile('postgres') + @attr(type='postgres') def test__overriden_vars_project_level(self): # This should be "override" diff --git a/test/integration/028_cli_vars/test_cli_vars.py b/test/integration/028_cli_vars/test_cli_vars.py index 86c28cca43f..1184c7a9788 100644 --- a/test/integration/028_cli_vars/test_cli_vars.py +++ b/test/integration/028_cli_vars/test_cli_vars.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import yaml @@ -11,7 +12,7 @@ def schema(self): def models(self): return "test/integration/028_cli_vars/models_complex" - @use_profile('postgres') + @attr(type='postgres') def test__cli_vars_longform(self): self.use_profile('postgres') self.use_default_project() @@ -38,7 +39,7 @@ def schema(self): def models(self): return "test/integration/028_cli_vars/models_simple" - @use_profile('postgres') + @attr(type='postgres') def test__cli_vars_shorthand(self): self.use_profile('postgres') self.use_default_project() @@ -48,7 +49,7 @@ def test__cli_vars_shorthand(self): results = self.run_dbt(["test", "--vars", "simple: abc"]) self.assertEqual(len(results), 1) - @use_profile('postgres') + @attr(type='postgres') def test__cli_vars_longer(self): self.use_profile('postgres') self.use_default_project() diff --git a/test/integration/029_docs_generate_tests/ref_models/schema.yml b/test/integration/029_docs_generate_tests/ref_models/schema.yml index 0ebd5e3af3f..087efc30108 100644 --- a/test/integration/029_docs_generate_tests/ref_models/schema.yml +++ b/test/integration/029_docs_generate_tests/ref_models/schema.yml @@ -17,15 +17,10 @@ sources: description: "{{ doc('source_info') }}" loader: a_loader schema: "{{ var('test_schema') }}" - quoting: - database: False - identifier: False tables: - name: my_table description: "{{ doc('table_info') }}" identifier: seed - quoting: - identifier: True columns: - name: id description: "An ID field" diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 26339897ad5..8e63acaec8b 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -11,28 +11,8 @@ def _read_file(path): - with open(path, 'r') as fp: - return fp.read().replace('\r', '').replace('\\r', '') - - -class LineIndifferent(object): - def __init__(self, expected): - self.expected = expected.replace('\r', '') - - def __eq__(self, other): - return self.expected == other.replace('\r', '') - - def __repr__(self): - return 'LineIndifferent("{}")'.format(self.expected) - - def __str__(self): - return self.__repr__() - - -def _read_json(path): - # read json generated by dbt. with open(path) as fp: - return json.load(fp) + return fp.read() def _normalize(path): @@ -48,15 +28,8 @@ def _normalize(path): return os.path.normcase(os.path.normpath(path)) -def walk_files(path): - for root, dirs, files in os.walk(path): - for basename in files: - yield os.path.join(root, basename) - - class TestDocsGenerate(DBTIntegrationTest): setup_alternate_db = True - def setUp(self): super(TestDocsGenerate, self).setUp() self.maxDiff = None @@ -146,7 +119,7 @@ def _redshift_stats(self): "diststyle": { "id": "diststyle", "label": "Dist Style", - "value": AnyStringWith(None), + "value": "EVEN", "description": "Distribution style or distribution key column, if key distribution is defined.", "include": True }, @@ -498,6 +471,7 @@ def expected_presto_catalog(self): model_database=self.default_database ) + @staticmethod def _clustered_bigquery_columns(update_type): return { @@ -751,7 +725,8 @@ def expected_redshift_incremental_catalog(self): def verify_catalog(self, expected): self.assertTrue(os.path.exists('./target/catalog.json')) - catalog = _read_json('./target/catalog.json') + with open('./target/catalog.json') as fp: + catalog = json.load(fp) self.assertIn('generated_at', catalog) self.assertBetween( @@ -828,7 +803,7 @@ def expected_seeded_manifest(self, model_database=None): 'path': 'model.sql', 'original_file_path': model_sql_path, 'package_name': 'test', - 'raw_sql': LineIndifferent(_read_file(model_sql_path).rstrip('\r\n')), + 'raw_sql': _read_file(model_sql_path).rstrip('\n'), 'refs': [['seed']], 'sources': [], 'depends_on': {'nodes': ['seed.test.seed'], 'macros': []}, @@ -1029,7 +1004,7 @@ def expected_postgres_references_manifest(self, model_database=None): config_vars = {'alternate_db': model_database} my_schema_name = self.unique_schema() docs_path = self.dir('ref_models/docs.md') - docs_file = LineIndifferent(_read_file(docs_path).lstrip()) + docs_file = _read_file(docs_path).lstrip() return { 'nodes': { 'model.test.ephemeral_copy': { @@ -1057,7 +1032,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': self.dir('ref_models/ephemeral_copy.sql'), 'package_name': 'test', 'path': 'ephemeral_copy.sql', - 'raw_sql': LineIndifferent( + 'raw_sql': ( '{{\n config(\n materialized = "ephemeral"\n )\n}}' '\n\nselect * from {{ source("my_source", "my_table") }}' ), @@ -1114,13 +1089,14 @@ def expected_postgres_references_manifest(self, model_database=None): } ], 'empty': False, - 'fqn': ['test', 'ephemeral_summary'], + 'fqn': ['test', + 'ephemeral_summary'], 'name': 'ephemeral_summary', 'original_file_path': self.dir('ref_models/ephemeral_summary.sql'), 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'ephemeral_summary.sql', - 'raw_sql': LineIndifferent( + 'raw_sql': ( '{{\n config(\n materialized = "table"\n )\n}}\n\n' 'select first_name, count(*) as ct from ' "{{ref('ephemeral_copy')}}\ngroup by first_name\n" @@ -1184,7 +1160,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'view_summary.sql', - 'raw_sql': LineIndifferent( + 'raw_sql': ( '{{\n config(\n materialized = "view"\n )\n}}\n\n' 'select first_name, ct from ' "{{ref('ephemeral_summary')}}\norder by ct asc" @@ -1235,36 +1211,32 @@ def expected_postgres_references_manifest(self, model_database=None): 'name': 'id' } }, - 'quoting': { - 'database': False, - 'identifier': True, - }, - 'database': self.default_database, - 'description': 'My table', - 'docrefs': [ - { - 'documentation_name': 'table_info', - 'documentation_package': '' - }, - { - 'documentation_name': 'source_info', - 'documentation_package': '' - } - ], - 'freshness': {}, - 'identifier': 'seed', - 'loaded_at_field': None, - 'loader': 'a_loader', - 'name': 'my_table', - 'original_file_path': self.dir('ref_models/schema.yml'), - 'package_name': 'test', - 'path': self.dir('ref_models/schema.yml'), - 'resource_type': 'source', - 'root_path': os.getcwd(), - 'schema': my_schema_name, - 'source_description': "{{ doc('source_info') }}", - 'source_name': 'my_source', - 'unique_id': 'source.test.my_source.my_table' + 'database': self.default_database, + 'description': 'My table', + 'docrefs': [ + { + 'documentation_name': 'table_info', + 'documentation_package': '' + }, + { + 'documentation_name': 'source_info', + 'documentation_package': '' + } + ], + 'freshness': {}, + 'identifier': 'seed', + 'loaded_at_field': None, + 'loader': 'a_loader', + 'name': 'my_table', + 'original_file_path': self.dir('ref_models/schema.yml'), + 'package_name': 'test', + 'path': self.dir('ref_models/schema.yml'), + 'resource_type': 'source', + 'root_path': os.getcwd(), + 'schema': my_schema_name, + 'source_description': "{{ doc('source_info') }}", + 'source_name': 'my_source', + 'unique_id': 'source.test.my_source.my_table' } }, 'docs': { @@ -1394,7 +1366,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': clustered_sql_path, 'package_name': 'test', 'path': 'clustered.sql', - 'raw_sql': LineIndifferent(_read_file(clustered_sql_path).rstrip('\r\n')), + 'raw_sql': _read_file(clustered_sql_path).rstrip('\n'), 'refs': [['seed']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1450,7 +1422,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': multi_clustered_sql_path, 'package_name': 'test', 'path': 'multi_clustered.sql', - 'raw_sql': LineIndifferent(_read_file(multi_clustered_sql_path).rstrip('\r\n')), + 'raw_sql': _read_file(multi_clustered_sql_path).rstrip('\n'), 'refs': [['seed']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1507,7 +1479,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': nested_view_sql_path, 'package_name': 'test', 'path': 'nested_view.sql', - 'raw_sql': LineIndifferent(_read_file(nested_view_sql_path).rstrip('\r\n')), + 'raw_sql': _read_file(nested_view_sql_path).rstrip('\n'), 'refs': [['nested_table']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1564,7 +1536,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': nested_table_sql_path, 'package_name': 'test', 'path': 'nested_table.sql', - 'raw_sql': LineIndifferent(_read_file(nested_table_sql_path).rstrip('\r\n')), + 'raw_sql': _read_file(nested_table_sql_path).rstrip('\n'), 'refs': [], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1648,7 +1620,7 @@ def expected_redshift_incremental_view_manifest(self): "path": "model.sql", "original_file_path": model_sql_path, "package_name": "test", - "raw_sql": LineIndifferent(_read_file(model_sql_path).rstrip('\r\n')), + "raw_sql": _read_file(model_sql_path).rstrip('\n'), "refs": [["seed"]], "sources": [], "depends_on": { @@ -1756,7 +1728,8 @@ def expected_redshift_incremental_view_manifest(self): def verify_manifest(self, expected_manifest): self.assertTrue(os.path.exists('./target/manifest.json')) - manifest = _read_json('./target/manifest.json') + with open('./target/manifest.json') as fp: + manifest = json.load(fp) self.assertEqual( set(manifest), @@ -1808,9 +1781,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, schema = self.unique_schema() # we are selecting from the seed, which is always in the default db - compiled_database = self.default_database - if self.adapter_type != 'snowflake': - compiled_database = self._quote(compiled_database) + compiled_database = self._quote(self.default_database) compiled_schema = self._quote(schema) if quote_schema else schema compiled_seed = self._quote('seed') if quote_model else 'seed' @@ -1861,7 +1832,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'package_name': 'test', 'patch_path': schema_yml_path, 'path': 'model.sql', - 'raw_sql': LineIndifferent(_read_file(model_sql_path).rstrip('\r\n')), + 'raw_sql': _read_file(model_sql_path).rstrip('\n'), 'refs': [['seed']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -2080,7 +2051,7 @@ def expected_postgres_references_run_results(self): ) cte_sql = ( - ' __dbt__CTE__ephemeral_copy as (\n\n\nselect * from {}."{}"."seed"\n)' + ' __dbt__CTE__ephemeral_copy as (\n\n\nselect * from "{}"."{}"."seed"\n)' ).format(self.default_database, my_schema_name) ephemeral_injected_sql = ( @@ -2162,7 +2133,7 @@ def expected_postgres_references_run_results(self): 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'ephemeral_summary.sql', - 'raw_sql': LineIndifferent( + 'raw_sql': ( '{{\n config(\n materialized = "table"\n )\n}}\n' '\nselect first_name, count(*) as ct from ' "{{ref('ephemeral_copy')}}\ngroup by first_name\n" @@ -2249,7 +2220,7 @@ def expected_postgres_references_run_results(self): 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'view_summary.sql', - 'raw_sql': LineIndifferent( + 'raw_sql': ( '{{\n config(\n materialized = "view"\n )\n}}\n\n' 'select first_name, ct from ' "{{ref('ephemeral_summary')}}\norder by ct asc" @@ -2320,7 +2291,8 @@ def expected_postgres_references_run_results(self): ] def verify_run_results(self, expected_run_results): - run_result = _read_json('./target/run_results.json') + with open('./target/run_results.json') as fp: + run_result = json.load(fp) self.assertIn('generated_at', run_result) self.assertIn('elapsed_time', run_result) diff --git a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py index 1b47ded1d10..cfb9876a4c7 100644 --- a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py +++ b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py @@ -1,6 +1,6 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import threading -from dbt.adapters.factory import get_adapter class BaseTestConcurrentTransaction(DBTIntegrationTest): @@ -10,10 +10,6 @@ def reset(self): 'model_1': 'wait', } - def setUp(self): - super(BaseTestConcurrentTransaction, self).setUp() - self.reset() - @property def schema(self): return "concurrent_transaction_032" @@ -30,8 +26,7 @@ def project_config(self): def run_select_and_check(self, rel, sql): connection_name = '__test_{}'.format(id(threading.current_thread())) try: - with get_adapter(self.config).connection_named(connection_name) as conn: - res = self.run_sql_common(self.transform_sql(sql), 'one', conn) + res = self.run_sql(sql, fetch='one', connection_name=connection_name) # The result is the output of f_sleep(), which is True if res[0] == True: @@ -59,7 +54,7 @@ def async_select(self, rel, sleep=10): sleep=sleep, rel=rel) - thread = threading.Thread(target=self.run_select_and_check, args=(rel, query)) + thread = threading.Thread(target=lambda: self.run_select_and_check(rel, query)) thread.start() return thread @@ -93,7 +88,7 @@ class TableTestConcurrentTransaction(BaseTestConcurrentTransaction): def models(self): return "test/integration/032_concurrent_transaction_test/models-table" - @use_profile("redshift") + @attr(type="redshift") def test__redshift__concurrent_transaction_table(self): self.reset() self.run_test() @@ -103,7 +98,7 @@ class ViewTestConcurrentTransaction(BaseTestConcurrentTransaction): def models(self): return "test/integration/032_concurrent_transaction_test/models-view" - @use_profile("redshift") + @attr(type="redshift") def test__redshift__concurrent_transaction_view(self): self.reset() self.run_test() @@ -113,7 +108,7 @@ class IncrementalTestConcurrentTransaction(BaseTestConcurrentTransaction): def models(self): return "test/integration/032_concurrent_transaction_test/models-incremental" - @use_profile("redshift") + @attr(type="redshift") def test__redshift__concurrent_transaction_incremental(self): self.reset() self.run_test() diff --git a/test/integration/033_event_tracking_test/test_events.py b/test/integration/033_event_tracking_test/test_events.py index 91121872623..83d94335f85 100644 --- a/test/integration/033_event_tracking_test/test_events.py +++ b/test/integration/033_event_tracking_test/test_events.py @@ -1,7 +1,7 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest import mock import hashlib -import os from mock import call, ANY @@ -188,7 +188,7 @@ def project_config(self): "test-paths": [self.dir("test")], } - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_compile(self): expected_calls = [ call( @@ -216,7 +216,7 @@ def test__event_tracking_compile(self): expected_contexts ) - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_deps(self): package_context = [ { @@ -259,7 +259,7 @@ def test__event_tracking_deps(self): self.run_event_test(["deps"], expected_calls, expected_contexts) - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_seed(self): def seed_context(project_id, user_id, invocation_id, version): return [{ @@ -313,7 +313,7 @@ def seed_context(project_id, user_id, invocation_id, version): self.run_event_test(["seed"], expected_calls, expected_contexts) - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_models(self): expected_calls = [ call( @@ -342,12 +342,6 @@ def test__event_tracking_models(self): ), ] - hashed = '20ff78afb16c8b3b8f83861b1d3b99bd' - # this hashed contents field changes on azure postgres tests, I believe - # due to newlines again - if os.name == 'nt': - hashed = '52cf9d1db8f0a18ca64ef64681399746' - expected_contexts = [ self.build_context('run', 'start'), self.run_context( @@ -359,7 +353,7 @@ def test__event_tracking_models(self): materialization='view' ), self.run_context( - hashed_contents=hashed, + hashed_contents='20ff78afb16c8b3b8f83861b1d3b99bd', model_id='57994a805249953b31b738b1af7a1eeb', index=2, total=2, @@ -375,7 +369,7 @@ def test__event_tracking_models(self): expected_contexts ) - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_model_error(self): # cmd = ["run", "--model", "model_error"] # self.run_event_test(cmd, event_run_model_error, expect_pass=False) @@ -421,7 +415,7 @@ def test__event_tracking_model_error(self): expect_pass=False ) - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_tests(self): # TODO: dbt does not track events for tests, but it should! self.run_dbt(["run", "--model", "example", "example_2"]) @@ -461,7 +455,7 @@ def project_config(self): "source-paths": [self.dir("model-compilation-error")], } - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_with_compilation_error(self): expected_calls = [ call( @@ -505,7 +499,7 @@ def profile_config(self): 'default2': { 'type': 'postgres', 'threads': 4, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -515,7 +509,7 @@ def profile_config(self): 'noaccess': { 'type': 'postgres', 'threads': 4, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'BAD', 'pass': 'bad_password', @@ -527,7 +521,7 @@ def profile_config(self): } } - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_unable_to_connect(self): expected_calls = [ call( @@ -577,7 +571,7 @@ def project_config(self): ] } - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_archive(self): self.run_dbt(["run", "--models", "archivable"]) @@ -602,11 +596,10 @@ def test__event_tracking_archive(self): ), ] - # the model here has a raw_sql that contains the schema, which changes expected_contexts = [ self.build_context('archive', 'start'), self.run_context( - hashed_contents=ANY, + hashed_contents='f785c4490e73e5b52fed5627f5709bfa', model_id='3cdcd0fef985948fd33af308468da3b9', index=1, total=1, @@ -624,7 +617,7 @@ def test__event_tracking_archive(self): class TestEventTrackingCatalogGenerate(TestEventTracking): - @use_profile("postgres") + @attr(type="postgres") def test__event_tracking_catalog_generate(self): # create a model for the catalog self.run_dbt(["run", "--models", "example"]) diff --git a/test/integration/034_redshift_test/test_late_binding_view.py b/test/integration/034_redshift_test/test_late_binding_view.py index 37350ef7d57..a55318443bf 100644 --- a/test/integration/034_redshift_test/test_late_binding_view.py +++ b/test/integration/034_redshift_test/test_late_binding_view.py @@ -1,6 +1,7 @@ import json import os +from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, use_profile diff --git a/test/integration/037_external_reference_test/test_external_reference.py b/test/integration/037_external_reference_test/test_external_reference.py index ba6bf73bdb6..bd754ae169c 100644 --- a/test/integration/037_external_reference_test/test_external_reference.py +++ b/test/integration/037_external_reference_test/test_external_reference.py @@ -29,8 +29,8 @@ def tearDown(self): # This has to happen before we drop the external schema, because # otherwise postgres hangs forever. self._drop_schemas() - with self.test_connection(): - self.adapter.drop_schema(self.default_database, self.external_schema) + self.adapter.drop_schema(self.default_database, self.external_schema, + model_name='__test') super(TestExternalReference, self).tearDown() @use_profile('postgres') @@ -39,7 +39,6 @@ def test__postgres__external_reference(self): # running it again should succeed self.assertEquals(len(self.run_dbt()), 1) - # The opposite of the test above -- check that external relations that # depend on a dbt model do not create issues with caching class TestExternalDependency(DBTIntegrationTest): @@ -55,8 +54,8 @@ def tearDown(self): # This has to happen before we drop the external schema, because # otherwise postgres hangs forever. self._drop_schemas() - with self.test_connection(): - self.adapter.drop_schema(self.default_database, self.external_schema) + self.adapter.drop_schema(self.default_database, self.external_schema, + model_name='__test') super(TestExternalDependency, self).tearDown() @use_profile('postgres') diff --git a/test/integration/040_override_database_test/test_override_database.py b/test/integration/040_override_database_test/test_override_database.py index 1071eb2bd31..a3319441d6b 100644 --- a/test/integration/040_override_database_test/test_override_database.py +++ b/test/integration/040_override_database_test/test_override_database.py @@ -1,4 +1,5 @@ -from test.integration.base import DBTIntegrationTest, use_profile +from nose.plugins.attrib import attr +from test.integration.base import DBTIntegrationTest class BaseOverrideDatabase(DBTIntegrationTest): @@ -41,11 +42,11 @@ def run_database_override(self): (func('view_4'), self.unique_schema(), self.alternative_database), ]) - @use_profile('bigquery') + @attr(type='bigquery') def test_bigquery_database_override(self): self.run_database_override() - @use_profile('snowflake') + @attr(type='snowflake') def test_snowflake_database_override(self): self.run_database_override() @@ -81,11 +82,11 @@ def run_database_override(self): (func('view_4'), self.unique_schema(), self.alternative_database), ]) - @use_profile('bigquery') + @attr(type='bigquery') def test_bigquery_database_override(self): self.run_database_override() - @use_profile('snowflake') + @attr(type='snowflake') def test_snowflake_database_override(self): self.run_database_override() @@ -111,10 +112,10 @@ def run_database_override(self): (func('view_4'), self.unique_schema(), self.alternative_database), ]) - @use_profile('bigquery') + @attr(type='bigquery') def test_bigquery_database_override(self): self.run_database_override() - @use_profile('snowflake') + @attr(type='snowflake') def test_snowflake_database_override(self): self.run_database_override() diff --git a/test/integration/042_sources_test/macros/macro.sql b/test/integration/042_sources_test/macros/macro.sql deleted file mode 100644 index a400a94f625..00000000000 --- a/test/integration/042_sources_test/macros/macro.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro override_me() -%} - {{ exceptions.raise_compiler_error('this is a bad macro') }} -{%- endmacro %} - -{% macro happy_little_macro() -%} - {{ override_me() }} -{%- endmacro %} diff --git a/test/integration/042_sources_test/models/ephemeral_model.sql b/test/integration/042_sources_test/models/ephemeral_model.sql deleted file mode 100644 index 8de35cd3e21..00000000000 --- a/test/integration/042_sources_test/models/ephemeral_model.sql +++ /dev/null @@ -1,3 +0,0 @@ -{{ config(materialized='ephemeral') }} - -select 1 as id diff --git a/test/integration/042_sources_test/models/schema.yml b/test/integration/042_sources_test/models/schema.yml index 00b32bc6dcd..894dbfc03f4 100644 --- a/test/integration/042_sources_test/models/schema.yml +++ b/test/integration/042_sources_test/models/schema.yml @@ -14,8 +14,6 @@ sources: warn_after: {count: 10, period: hour} error_after: {count: 1, period: day} schema: "{{ var(env_var('DBT_TEST_SCHEMA_NAME_VARIABLE')) }}" - quoting: - identifier: True tables: - name: test_table identifier: source @@ -48,8 +46,6 @@ sources: identifier: other_table - name: other_source schema: "{{ var('test_run_schema') }}" - quoting: - identifier: True tables: - name: test_table identifier: other_source_table diff --git a/test/integration/042_sources_test/test_sources.py b/test/integration/042_sources_test/test_sources.py index ab56d077443..0c4aa17538a 100644 --- a/test/integration/042_sources_test/test_sources.py +++ b/test/integration/042_sources_test/test_sources.py @@ -1,19 +1,11 @@ +from nose.plugins.attrib import attr +from datetime import datetime, timedelta import json -import multiprocessing import os -import socket -import sys -import time -import unittest -from base64 import standard_b64encode as b64 -from datetime import datetime, timedelta - -import requests from dbt.exceptions import CompilationException from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \ AnyStringWith -from dbt.main import handle_and_check class BaseSourcesTest(DBTIntegrationTest): @@ -121,14 +113,13 @@ def test_source_childrens_parents(self): ) self.assertTableDoesNotExist('nonsource_descendant') - class TestSourceFreshness(BaseSourcesTest): def setUp(self): super(TestSourceFreshness, self).setUp() self.maxDiff = None self._id = 100 # this is the db initial value - self.last_inserted_time = "2016-09-19T14:45:51+00:00" + self.last_inserted_time = "2016-09-19T14:45:51+00:00Z" # test_source.test_table should have a loaded_at field of `updated_at` # and a freshness of warn_after: 10 hours, error_after: 18 hours @@ -153,7 +144,7 @@ def _set_updated_at_to(self, delta): 'source': self.adapter.quote('source'), } ) - self.last_inserted_time = insert_time.strftime("%Y-%m-%dT%H:%M:%S+00:00") + self.last_inserted_time = insert_time.strftime("%Y-%m-%dT%H:%M:%S+00:00Z") def _assert_freshness_results(self, path, state): self.assertTrue(os.path.exists(path)) @@ -169,7 +160,7 @@ def _assert_freshness_results(self, path, state): last_inserted_time = self.last_inserted_time if last_inserted_time is None: - last_inserted_time = "2016-09-19T14:45:51+00:00" + last_inserted_time = "2016-09-19T14:45:51+00:00Z" self.assertEqual(data['sources'], { 'source.test.test_source.test_table': { @@ -265,594 +256,3 @@ def test_postgres_malformed_schema_nonstrict_will_not_break_run(self): def test_postgres_malformed_schema_strict_will_break_run(self): with self.assertRaises(CompilationException): self.run_dbt_with_vars(['run'], strict=True) - - -class ServerProcess(multiprocessing.Process): - def __init__(self, cli_vars=None): - self.port = 22991 - handle_and_check_args = [ - '--strict', 'rpc', '--log-cache-events', - '--port', str(self.port), - ] - if cli_vars: - handle_and_check_args.extend(['--vars', cli_vars]) - super(ServerProcess, self).__init__( - target=handle_and_check, - args=(handle_and_check_args,), - name='ServerProcess') - - def is_up(self): - sock = socket.socket() - try: - sock.connect(('localhost', self.port)) - except socket.error: - return False - sock.close() - return True - - def start(self): - super(ServerProcess, self).start() - for _ in range(10): - if self.is_up(): - break - time.sleep(0.5) - if not self.is_up(): - self.terminate() - raise Exception('server never appeared!') - - -def query_url(url, query): - headers = {'content-type': 'application/json'} - return requests.post(url, headers=headers, data=json.dumps(query)) - - -class BackgroundQueryProcess(multiprocessing.Process): - def __init__(self, query, url, group=None, name=None): - parent, child = multiprocessing.Pipe() - self.parent_pipe = parent - self.child_pipe = child - self.query = query - self.url = url - super(BackgroundQueryProcess, self).__init__(group=group, name=name) - - def run(self): - try: - result = query_url(self.url, self.query).json() - except Exception as exc: - self.child_pipe.send(('error', str(exc))) - else: - self.child_pipe.send(('result', result)) - - def wait_result(self): - result_type, result = self.parent_pipe.recv() - self.join() - if result_type == 'error': - raise Exception(result) - else: - return result - -_select_from_ephemeral = '''with __dbt__CTE__ephemeral_model as ( - - -select 1 as id -)select * from __dbt__CTE__ephemeral_model''' - - -@unittest.skipIf(os.name == 'nt', 'Windows not supported for now') -class TestRPCServer(BaseSourcesTest): - def setUp(self): - super(TestRPCServer, self).setUp() - self._server = ServerProcess( - cli_vars='{{test_run_schema: {}}}'.format(self.unique_schema()) - ) - self._server.start() - - def tearDown(self): - self._server.terminate() - super(TestRPCServer, self).tearDown() - - @property - def project_config(self): - return { - 'data-paths': ['test/integration/042_sources_test/data'], - 'quoting': {'database': True, 'schema': True, 'identifier': True}, - 'macro-paths': ['test/integration/042_sources_test/macros'], - } - - def build_query(self, method, kwargs, sql=None, test_request_id=1, - macros=None): - body_data = '' - if sql is not None: - body_data += sql - - if macros is not None: - body_data += macros - - if sql is not None or macros is not None: - kwargs['sql'] = b64(body_data.encode('utf-8')).decode('utf-8') - - return { - 'jsonrpc': '2.0', - 'method': method, - 'params': kwargs, - 'id': test_request_id - } - - @property - def url(self): - return 'http://localhost:{}/jsonrpc'.format(self._server.port) - - def query(self, _method, _sql=None, _test_request_id=1, macros=None, **kwargs): - built = self.build_query(_method, kwargs, _sql, _test_request_id, macros) - return query_url(self.url, built) - - def handle_result(self, bg_query, pipe): - result_type, result = pipe.recv() - bg_query.join() - if result_type == 'error': - raise result - else: - return result - - def background_query(self, _method, _sql=None, _test_request_id=1, - _block=False, macros=None, **kwargs): - built = self.build_query(_method, kwargs, _sql, _test_request_id, - macros) - - url = 'http://localhost:{}/jsonrpc'.format(self._server.port) - name = _method - if 'name' in kwargs: - name += ' ' + kwargs['name'] - bg_query = BackgroundQueryProcess(built, url, name=name) - bg_query.start() - return bg_query - - def assertResultHasTimings(self, result, *names): - self.assertIn('timing', result) - timings = result['timing'] - self.assertEqual(len(timings), len(names)) - for expected_name, timing in zip(names, timings): - self.assertIn('name', timing) - self.assertEqual(timing['name'], expected_name) - self.assertIn('started_at', timing) - self.assertIn('completed_at', timing) - datetime.strptime(timing['started_at'], '%Y-%m-%dT%H:%M:%S.%fZ') - datetime.strptime(timing['completed_at'], '%Y-%m-%dT%H:%M:%S.%fZ') - - def assertIsResult(self, data): - self.assertEqual(data['id'], 1) - self.assertEqual(data['jsonrpc'], '2.0') - self.assertIn('result', data) - self.assertNotIn('error', data) - return data['result'] - - def assertIsError(self, data, id_=1): - self.assertEqual(data['id'], id_) - self.assertEqual(data['jsonrpc'], '2.0') - self.assertIn('error', data) - self.assertNotIn('result', data) - return data['error'] - - def assertIsErrorWithCode(self, data, code, id_=1): - error = self.assertIsError(data, id_) - self.assertIn('code', error) - self.assertIn('message', error) - self.assertEqual(error['code'], code) - return error - - def assertIsErrorWith(self, data, code, message, error_data): - error = self.assertIsErrorWithCode(data, code) - if message is not None: - self.assertEqual(error['message'], message) - - if error_data is not None: - return self.assertHasErrorData(error, error_data) - else: - return error.get('data') - - def assertResultHasSql(self, data, raw_sql, compiled_sql=None): - if compiled_sql is None: - compiled_sql = raw_sql - result = self.assertIsResult(data) - self.assertIn('logs', result) - self.assertTrue(len(result['logs']) > 0) - self.assertIn('raw_sql', result) - self.assertIn('compiled_sql', result) - self.assertEqual(result['raw_sql'], raw_sql) - self.assertEqual(result['compiled_sql'], compiled_sql) - return result - - def assertSuccessfulCompilationResult(self, data, raw_sql, compiled_sql=None): - result = self.assertResultHasSql(data, raw_sql, compiled_sql) - self.assertNotIn('table', result) - # compile results still have an 'execute' timing, it just represents - # the time to construct a result object. - self.assertResultHasTimings(result, 'compile', 'execute') - - def assertSuccessfulRunResult(self, data, raw_sql, compiled_sql=None, table=None): - result = self.assertResultHasSql(data, raw_sql, compiled_sql) - self.assertIn('table', result) - if table is not None: - self.assertEqual(result['table'], table) - self.assertResultHasTimings(result, 'compile', 'execute') - - @use_profile('postgres') - def test_compile_postgres(self): - trivial = self.query( - 'compile', - 'select 1 as id', - name='foo' - ).json() - self.assertSuccessfulCompilationResult( - trivial, 'select 1 as id' - ) - - ref = self.query( - 'compile', - 'select * from {{ ref("descendant_model") }}', - name='foo' - ).json() - self.assertSuccessfulCompilationResult( - ref, - 'select * from {{ ref("descendant_model") }}', - compiled_sql='select * from "{}"."{}"."descendant_model"'.format( - self.default_database, - self.unique_schema()) - ) - - source = self.query( - 'compile', - 'select * from {{ source("test_source", "test_table") }}', - name='foo' - ).json() - self.assertSuccessfulCompilationResult( - source, - 'select * from {{ source("test_source", "test_table") }}', - compiled_sql='select * from "{}"."{}"."source"'.format( - self.default_database, - self.unique_schema()) - ) - - macro = self.query( - 'compile', - 'select {{ my_macro() }}', - name='foo', - macros='{% macro my_macro() %}1 as id{% endmacro %}' - ).json() - self.assertSuccessfulCompilationResult( - macro, - 'select {{ my_macro() }}', - compiled_sql='select 1 as id' - ) - - macro_override = self.query( - 'compile', - 'select {{ happy_little_macro() }}', - name='foo', - macros='{% macro override_me() %}2 as id{% endmacro %}' - ).json() - self.assertSuccessfulCompilationResult( - macro_override, - 'select {{ happy_little_macro() }}', - compiled_sql='select 2 as id' - ) - - macro_override_with_if_statement = self.query( - 'compile', - '{% if True %}select {{ happy_little_macro() }}{% endif %}', - name='foo', - macros='{% macro override_me() %}2 as id{% endmacro %}' - ).json() - self.assertSuccessfulCompilationResult( - macro_override_with_if_statement, - '{% if True %}select {{ happy_little_macro() }}{% endif %}', - compiled_sql='select 2 as id' - ) - - ephemeral = self.query( - 'compile', - 'select * from {{ ref("ephemeral_model") }}', - name='foo' - ).json() - self.assertSuccessfulCompilationResult( - ephemeral, - 'select * from {{ ref("ephemeral_model") }}', - compiled_sql=_select_from_ephemeral - ) - - @use_profile('postgres') - def test_run_postgres(self): - # seed + run dbt to make models before using them! - self.run_dbt_with_vars(['seed']) - self.run_dbt_with_vars(['run']) - data = self.query( - 'run', - 'select 1 as id', - name='foo' - ).json() - self.assertSuccessfulRunResult( - data, 'select 1 as id', table={'column_names': ['id'], 'rows': [[1.0]]} - ) - - ref = self.query( - 'run', - 'select * from {{ ref("descendant_model") }} order by updated_at limit 1', - name='foo' - ).json() - self.assertSuccessfulRunResult( - ref, - 'select * from {{ ref("descendant_model") }} order by updated_at limit 1', - compiled_sql='select * from "{}"."{}"."descendant_model" order by updated_at limit 1'.format( - self.default_database, - self.unique_schema()), - table={ - 'column_names': ['favorite_color', 'id', 'first_name', 'email', 'ip_address', 'updated_at'], - 'rows': [['blue', 38.0, 'Gary', 'gray11@statcounter.com', "'40.193.124.56'", '1970-01-27T10:04:51']], - } - ) - - source = self.query( - 'run', - 'select * from {{ source("test_source", "test_table") }} order by updated_at limit 1', - name='foo' - ).json() - self.assertSuccessfulRunResult( - source, - 'select * from {{ source("test_source", "test_table") }} order by updated_at limit 1', - compiled_sql='select * from "{}"."{}"."source" order by updated_at limit 1'.format( - self.default_database, - self.unique_schema()), - table={ - 'column_names': ['favorite_color', 'id', 'first_name', 'email', 'ip_address', 'updated_at'], - 'rows': [['blue', 38.0, 'Gary', 'gray11@statcounter.com', "'40.193.124.56'", '1970-01-27T10:04:51']], - } - ) - - macro = self.query( - 'run', - 'select {{ my_macro() }}', - name='foo', - macros='{% macro my_macro() %}1 as id{% endmacro %}' - ).json() - self.assertSuccessfulRunResult( - macro, - raw_sql='select {{ my_macro() }}', - compiled_sql='select 1 as id', - table={'column_names': ['id'], 'rows': [[1.0]]} - ) - - macro_override = self.query( - 'run', - 'select {{ happy_little_macro() }}', - name='foo', - macros='{% macro override_me() %}2 as id{% endmacro %}' - ).json() - self.assertSuccessfulRunResult( - macro_override, - raw_sql='select {{ happy_little_macro() }}', - compiled_sql='select 2 as id', - table={'column_names': ['id'], 'rows': [[2.0]]} - ) - - macro_override_with_if_statement = self.query( - 'run', - '{% if True %}select {{ happy_little_macro() }}{% endif %}', - name='foo', - macros='{% macro override_me() %}2 as id{% endmacro %}' - ).json() - self.assertSuccessfulRunResult( - macro_override_with_if_statement, - '{% if True %}select {{ happy_little_macro() }}{% endif %}', - compiled_sql='select 2 as id', - table={'column_names': ['id'], 'rows': [[2.0]]} - ) - - macro_with_raw_statement = self.query( - 'run', - '{% raw %}select 1 as{% endraw %}{{ test_macros() }}{% macro test_macros() %} id{% endmacro %}', - name='foo' - ).json() - self.assertSuccessfulRunResult( - macro_with_raw_statement, - '{% raw %}select 1 as{% endraw %}{{ test_macros() }}', - compiled_sql='select 1 as id', - table={'column_names': ['id'], 'rows': [[1.0]]} - ) - - macro_with_comment = self.query( - 'run', - '{% raw %}select 1 {% endraw %}{{ test_macros() }} {# my comment #}{% macro test_macros() -%} as{% endmacro %} id{# another comment #}', - name='foo' - ).json() - self.assertSuccessfulRunResult( - macro_with_comment, - '{% raw %}select 1 {% endraw %}{{ test_macros() }} {# my comment #} id{# another comment #}', - compiled_sql='select 1 as id', - table={'column_names': ['id'], 'rows': [[1.0]]} - ) - - ephemeral = self.query( - 'run', - 'select * from {{ ref("ephemeral_model") }}', - name='foo' - ).json() - self.assertSuccessfulRunResult( - ephemeral, - raw_sql='select * from {{ ref("ephemeral_model") }}', - compiled_sql=_select_from_ephemeral, - table={'column_names': ['id'], 'rows': [[1.0]]} - ) - - @use_profile('postgres') - def test_ps_kill_postgres(self): - done_query = self.query('compile', 'select 1 as id', name='done').json() - self.assertIsResult(done_query) - pg_sleeper, sleep_task_id, request_id = self._get_sleep_query() - - empty_ps_result = self.query('ps', completed=False, active=False).json() - result = self.assertIsResult(empty_ps_result) - self.assertEqual(len(result['rows']), 0) - - sleeper_ps_result = self.query('ps', completed=False, active=True).json() - result = self.assertIsResult(sleeper_ps_result) - self.assertEqual(len(result['rows']), 1) - rowdict = result['rows'] - self.assertEqual(rowdict[0]['request_id'], request_id) - self.assertEqual(rowdict[0]['method'], 'run') - self.assertEqual(rowdict[0]['state'], 'running') - self.assertEqual(rowdict[0]['timeout'], None) - - complete_ps_result = self.query('ps', completed=True, active=False).json() - result = self.assertIsResult(complete_ps_result) - self.assertEqual(len(result['rows']), 1) - rowdict = result['rows'] - self.assertEqual(rowdict[0]['request_id'], 1) - self.assertEqual(rowdict[0]['method'], 'compile') - self.assertEqual(rowdict[0]['state'], 'finished') - self.assertEqual(rowdict[0]['timeout'], None) - - all_ps_result = self.query('ps', completed=True, active=True).json() - result = self.assertIsResult(all_ps_result) - self.assertEqual(len(result['rows']), 2) - rowdict = result['rows'] - rowdict.sort(key=lambda r: r['start']) - self.assertEqual(rowdict[0]['request_id'], 1) - self.assertEqual(rowdict[0]['method'], 'compile') - self.assertEqual(rowdict[0]['state'], 'finished') - self.assertEqual(rowdict[0]['timeout'], None) - self.assertEqual(rowdict[1]['request_id'], request_id) - self.assertEqual(rowdict[1]['method'], 'run') - self.assertEqual(rowdict[1]['state'], 'running') - self.assertEqual(rowdict[1]['timeout'], None) - - self.kill_and_assert(pg_sleeper, sleep_task_id, request_id) - - def kill_and_assert(self, pg_sleeper, task_id, request_id): - kill_result = self.query('kill', task_id=task_id).json() - kill_time = time.time() - result = self.assertIsResult(kill_result) - self.assertTrue(result['killed']) - - sleeper_result = pg_sleeper.wait_result() - result_time = time.time() - error = self.assertIsErrorWithCode(sleeper_result, 10009, request_id) - self.assertEqual(error['message'], 'RPC process killed') - self.assertIn('data', error) - error_data = error['data'] - self.assertEqual(error_data['signum'], 2) - self.assertEqual(error_data['message'], 'RPC process killed by signal 2') - self.assertIn('logs', error_data) - # it should take less than 5s to kill the process if things are working - # properly. On python 2.x, things do not work properly. - if sys.version_info.major > 2: - self.assertLess(result_time, kill_time + 5) - return error_data - - def _get_sleep_query(self): - request_id = 90890 - pg_sleeper = self.background_query( - 'run', - 'select pg_sleep(15)', - _test_request_id=request_id, - name='sleeper', - ) - - for _ in range(20): - time.sleep(0.2) - sleeper_ps_result = self.query('ps', completed=False, active=True).json() - result = self.assertIsResult(sleeper_ps_result) - rows = result['rows'] - for row in rows: - if row['request_id'] == request_id and row['state'] == 'running': - return pg_sleeper, row['task_id'], request_id - - self.assertTrue(False, 'request ID never found running!') - - @use_profile('postgres') - def test_ps_kill_longwait_postgres(self): - pg_sleeper, sleep_task_id, request_id = self._get_sleep_query() - - # the test above frequently kills the process during parsing of the - # requested node. That's also a useful test, but we should test that - # we cancel the in-progress sleep query. - time.sleep(3) - - error_data = self.kill_and_assert(pg_sleeper, sleep_task_id, request_id) - # we should have logs if we did anything - self.assertTrue(len(error_data['logs']) > 0) - - @use_profile('postgres') - def test_invalid_requests_postgres(self): - data = self.query( - 'xxxxxnotamethodxxxxx', - 'hi this is not sql' - ).json() - self.assertIsErrorWith(data, -32601, 'Method not found', None) - - data = self.query( - 'compile', - 'select * from {{ reff("nonsource_descendant") }}', - name='mymodel' - ).json() - error_data = self.assertIsErrorWith(data, 10004, 'Compilation Error', { - 'type': 'CompilationException', - 'message': "Compilation Error in rpc mymodel (from remote system)\n 'reff' is undefined", - 'compiled_sql': None, - 'raw_sql': 'select * from {{ reff("nonsource_descendant") }}', - }) - self.assertIn('logs', error_data) - self.assertTrue(len(error_data['logs']) > 0) - - data = self.query( - 'run', - 'hi this is not sql', - name='foo' - ).json() - error_data = self.assertIsErrorWith(data, 10003, 'Database Error', { - 'type': 'DatabaseException', - 'message': 'Database Error in rpc foo (from remote system)\n syntax error at or near "hi"\n LINE 1: hi this is not sql\n ^', - 'compiled_sql': 'hi this is not sql', - 'raw_sql': 'hi this is not sql', - }) - self.assertIn('logs', error_data) - self.assertTrue(len(error_data['logs']) > 0) - - macro_no_override = self.query( - 'run', - 'select {{ happy_little_macro() }}', - name='foo', - ).json() - error_data = self.assertIsErrorWith(macro_no_override, 10004, 'Compilation Error', { - 'type': 'CompilationException', - 'raw_sql': 'select {{ happy_little_macro() }}', - 'compiled_sql': None - }) - self.assertIn('logs', error_data) - self.assertTrue(len(error_data['logs']) > 0) - - def assertHasErrorData(self, error, expected_error_data): - self.assertIn('data', error) - error_data = error['data'] - for key, value in expected_error_data.items(): - self.assertIn(key, error_data) - self.assertEqual(error_data[key], value) - return error_data - - @use_profile('postgres') - def test_timeout_postgres(self): - data = self.query( - 'run', - 'select from pg_sleep(5)', - name='foo', - timeout=1 - ).json() - error = self.assertIsErrorWithCode(data, 10008) - self.assertEqual(error['message'], 'RPC timeout error') - self.assertIn('data', error) - error_data = error['data'] - self.assertIn('timeout', error_data) - self.assertEqual(error_data['timeout'], 1) - self.assertIn('message', error_data) - self.assertEqual(error_data['message'], 'RPC timed out after 1s') - self.assertIn('logs', error_data) - self.assertTrue(len(error_data['logs']) > 0) diff --git a/test/integration/043_custom_aliases_test/macros/macros.sql b/test/integration/043_custom_aliases_test/macros/macros.sql deleted file mode 100644 index 4bc6a3990e0..00000000000 --- a/test/integration/043_custom_aliases_test/macros/macros.sql +++ /dev/null @@ -1,21 +0,0 @@ - -{% macro generate_alias_name(node, custom_alias_name=none) -%} - {%- if custom_alias_name is none -%} - {{ node.name }} - {%- else -%} - custom_{{ custom_alias_name | trim }} - {%- endif -%} -{%- endmacro %} - - -{% macro string_literal(s) -%} - {{ adapter_macro('test.string_literal', s) }} -{%- endmacro %} - -{% macro default__string_literal(s) %} - '{{ s }}'::text -{% endmacro %} - -{% macro bigquery__string_literal(s) %} - cast('{{ s }}' as string) -{% endmacro %} diff --git a/test/integration/043_custom_aliases_test/models/model1.sql b/test/integration/043_custom_aliases_test/models/model1.sql deleted file mode 100644 index 000ce2ed6c5..00000000000 --- a/test/integration/043_custom_aliases_test/models/model1.sql +++ /dev/null @@ -1,3 +0,0 @@ -{{ config(materialized='table', alias='alias') }} - -select {{ string_literal(this.name) }} as model_name diff --git a/test/integration/043_custom_aliases_test/models/model2.sql b/test/integration/043_custom_aliases_test/models/model2.sql deleted file mode 100644 index a2de8f099ea..00000000000 --- a/test/integration/043_custom_aliases_test/models/model2.sql +++ /dev/null @@ -1,3 +0,0 @@ -{{ config(materialized='table') }} - -select {{ string_literal(this.name) }} as model_name diff --git a/test/integration/043_custom_aliases_test/models/schema.yml b/test/integration/043_custom_aliases_test/models/schema.yml deleted file mode 100644 index 4d43836e482..00000000000 --- a/test/integration/043_custom_aliases_test/models/schema.yml +++ /dev/null @@ -1,15 +0,0 @@ -version: 2 - -models: - - name: model1 - columns: - - name: model_name - tests: - - accepted_values: - values: ['custom_alias'] - - name: model2 - columns: - - name: model_name - tests: - - accepted_values: - values: ['model2'] diff --git a/test/integration/043_custom_aliases_test/test_custom_aliases.py b/test/integration/043_custom_aliases_test/test_custom_aliases.py deleted file mode 100644 index 131941f5e74..00000000000 --- a/test/integration/043_custom_aliases_test/test_custom_aliases.py +++ /dev/null @@ -1,23 +0,0 @@ -from test.integration.base import DBTIntegrationTest, use_profile - - -class TestAliases(DBTIntegrationTest): - @property - def schema(self): - return "custom_aliases_043" - - @property - def models(self): - return "test/integration/043_custom_aliases_test/models" - - @property - def project_config(self): - return { - "macro-paths": ['test/integration/043_custom_aliases_test/macros'], - } - - @use_profile('postgres') - def test_postgres_customer_alias_name(self): - results = self.run_dbt(['run']) - self.assertEqual(len(results), 2) - self.run_dbt(['test']) diff --git a/test/integration/044_run_operations_test/macros/happy_macros.sql b/test/integration/044_run_operations_test/macros/happy_macros.sql deleted file mode 100644 index 6170ebc7657..00000000000 --- a/test/integration/044_run_operations_test/macros/happy_macros.sql +++ /dev/null @@ -1,24 +0,0 @@ -{% macro no_args() %} - {% if execute %} - {% call statement(auto_begin=True) %} - create table "{{ schema }}"."no_args" (id int); - commit; - {% endcall %} - {% endif %} -{% endmacro %} - - -{% macro table_name_args(table_name) %} - {% if execute %} - {% call statement(auto_begin=True) %} - create table "{{ schema }}"."{{ table_name }}" (id int); - commit; - {% endcall %} - {% endif %} -{% endmacro %} - -{% macro vacuum(table_name) %} - {% call statement(auto_begin=false) %} - vacuum "{{ schema }}"."{{ table_name }}" - {% endcall %} -{% endmacro %} diff --git a/test/integration/044_run_operations_test/macros/sad_macros.sql b/test/integration/044_run_operations_test/macros/sad_macros.sql deleted file mode 100644 index 4f2c80bc40f..00000000000 --- a/test/integration/044_run_operations_test/macros/sad_macros.sql +++ /dev/null @@ -1,7 +0,0 @@ -{% macro syntax_error() %} - {% if execute %} - {% call statement() %} - select NOPE NOT A VALID QUERY - {% endcall %} - {% endif %} -{% endmacro %} diff --git a/test/integration/044_run_operations_test/models/model.sql b/test/integration/044_run_operations_test/models/model.sql deleted file mode 100644 index 43258a71464..00000000000 --- a/test/integration/044_run_operations_test/models/model.sql +++ /dev/null @@ -1 +0,0 @@ -select 1 as id diff --git a/test/integration/044_run_operations_test/test_run_operations.py b/test/integration/044_run_operations_test/test_run_operations.py deleted file mode 100644 index c66de6d8af5..00000000000 --- a/test/integration/044_run_operations_test/test_run_operations.py +++ /dev/null @@ -1,58 +0,0 @@ -from test.integration.base import DBTIntegrationTest, use_profile -import yaml - - -class TestOperations(DBTIntegrationTest): - @property - def schema(self): - return "run_operations_044" - - @property - def models(self): - return "test/integration/044_run_operations_test/models" - - @property - def project_config(self): - return { - "macro-paths": ['test/integration/044_run_operations_test/macros'], - } - - def run_operation(self, macro, expect_pass=True, extra_args=None, **kwargs): - args = ['run-operation'] - if macro: - args.extend(('--macro', macro)) - if kwargs: - args.extend(('--args', yaml.safe_dump(kwargs))) - if extra_args: - args.extend(extra_args) - return self.run_dbt(args, expect_pass=expect_pass) - - @use_profile('postgres') - def test__postgres_macro_noargs(self): - self.run_operation('no_args') - self.assertTableDoesExist('no_args') - - @use_profile('postgres') - def test__postgres_macro_args(self): - self.run_operation('table_name_args', table_name='my_fancy_table') - self.assertTableDoesExist('my_fancy_table') - - @use_profile('postgres') - def test__postgres_macro_exception(self): - self.run_operation('syntax_error', False) - - @use_profile('postgres') - def test__postgres_macro_missing(self): - self.run_operation('this_macro_does_not_exist', False) - - @use_profile('postgres') - def test__postgres_cannot_connect(self): - self.run_operation('no_args', - extra_args=['--target', 'noaccess'], - expect_pass=False) - - @use_profile('postgres') - def test__postgres_vacuum(self): - self.run_dbt(['run']) - # this should succeed - self.run_operation('vacuum', table_name='model') diff --git a/test/integration/base.py b/test/integration/base.py index e1c77954b37..8f0c7838eae 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -1,7 +1,6 @@ import unittest import dbt.main as dbt -import os -import shutil +import os, shutil import yaml import random import time @@ -9,8 +8,7 @@ from datetime import datetime from functools import wraps -import pytest -from mock import patch +from nose.plugins.attrib import attr import dbt.flags as flags @@ -18,9 +16,6 @@ from dbt.clients.jinja import template_cache from dbt.config import RuntimeConfig from dbt.compat import basestring -from dbt.context import common - -from contextlib import contextmanager from dbt.logger import GLOBAL_LOGGER as logger import logging @@ -76,12 +71,6 @@ class DBTIntegrationTest(unittest.TestCase): prefix = "test{}{:04}".format(int(time.time()), random.randint(0, 9999)) setup_alternate_db = False - @property - def database_host(self): - if os.name == 'nt': - return 'localhost' - return 'database' - def postgres_profile(self): return { 'config': { @@ -92,7 +81,7 @@ def postgres_profile(self): 'default2': { 'type': 'postgres', 'threads': 4, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -102,7 +91,7 @@ def postgres_profile(self): 'noaccess': { 'type': 'postgres', 'threads': 4, - 'host': self.database_host, + 'host': 'database', 'port': 5432, 'user': 'noaccess', 'pass': 'password', @@ -377,7 +366,7 @@ def _get_schema_fqn(self, database, schema): def _create_schema_named(self, database, schema): if self.adapter_type == 'bigquery': - self.adapter.create_schema(database, schema) + self.adapter.create_schema(database, schema, '__test') else: schema_fqn = self._get_schema_fqn(database, schema) self.run_sql(self.CREATE_SCHEMA_STATEMENT.format(schema_fqn)) @@ -386,7 +375,7 @@ def _create_schema_named(self, database, schema): def _drop_schema_named(self, database, schema): if self.adapter_type == 'bigquery' or self.adapter_type == 'presto': self.adapter.drop_schema( - database, schema + database, schema, '__test' ) else: schema_fqn = self._get_schema_fqn(database, schema) @@ -394,10 +383,9 @@ def _drop_schema_named(self, database, schema): def _create_schemas(self): schema = self.unique_schema() - with self.adapter.connection_named('__test'): - self._create_schema_named(self.default_database, schema) - if self.setup_alternate_db and self.adapter_type == 'snowflake': - self._create_schema_named(self.alternative_database, schema) + self._create_schema_named(self.default_database, schema) + if self.setup_alternate_db and self.adapter_type == 'snowflake': + self._create_schema_named(self.alternative_database, schema) def _drop_schemas_adapter(self): schema = self.unique_schema() @@ -427,11 +415,10 @@ def _drop_schemas_sql(self): self._created_schemas.clear() def _drop_schemas(self): - with self.adapter.connection_named('__test'): - if self.adapter_type == 'bigquery' or self.adapter_type == 'presto': - self._drop_schemas_adapter() - else: - self._drop_schemas_sql() + if self.adapter_type == 'bigquery' or self.adapter_type == 'presto': + self._drop_schemas_adapter() + else: + self._drop_schemas_sql() @property def project_config(self): @@ -441,25 +428,16 @@ def project_config(self): def profile_config(self): return {} - def run_dbt(self, args=None, expect_pass=True, strict=True, parser=True): + def run_dbt(self, args=None, expect_pass=True, strict=True): if args is None: args = ["run"] - final_args = [] - if strict: - final_args.append('--strict') - if parser: - final_args.append('--test-new-parser') - if os.getenv('DBT_TEST_SINGLE_THREADED') in ('y', 'Y', '1'): - final_args.append('--single-threaded') - - final_args.extend(args) - final_args.append('--log-cache-events') - - logger.info("Invoking dbt with {}".format(final_args)) + args = ["--strict"] + args + args.append('--log-cache-events') + logger.info("Invoking dbt with {}".format(args)) - res, success = dbt.handle_and_check(final_args) + res, success = dbt.handle_and_check(args) self.assertEqual( success, expect_pass, "dbt exit state did not match expected") @@ -513,7 +491,8 @@ def run_sql_bigquery(self, sql, fetch): else: return list(res) - def run_sql_presto(self, sql, fetch, conn): + def run_sql_presto(self, sql, fetch, connection_name=None): + conn = self.adapter.acquire_connection(connection_name) cursor = conn.handle.cursor() try: cursor.execute(sql) @@ -534,24 +513,6 @@ def run_sql_presto(self, sql, fetch, conn): conn.handle.commit() conn.transaction_open = False - def run_sql_common(self, sql, fetch, conn): - with conn.handle.cursor() as cursor: - try: - cursor.execute(sql) - conn.handle.commit() - if fetch == 'one': - return cursor.fetchone() - elif fetch == 'all': - return cursor.fetchall() - else: - return - except BaseException as e: - conn.handle.rollback() - print(sql) - print(e) - raise e - finally: - conn.transaction_open = False def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): if connection_name is None: @@ -561,15 +522,30 @@ def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): return sql = self.transform_sql(query, kwargs=kwargs) - - with self.test_connection(connection_name) as conn: - logger.debug('test connection "{}" executing: {}'.format(conn.name, sql)) - if self.adapter_type == 'bigquery': - return self.run_sql_bigquery(sql, fetch) - elif self.adapter_type == 'presto': - return self.run_sql_presto(sql, fetch, conn) - else: - return self.run_sql_common(sql, fetch, conn) + if self.adapter_type == 'bigquery': + return self.run_sql_bigquery(sql, fetch) + elif self.adapter_type == 'presto': + return self.run_sql_presto(sql, fetch, connection_name) + + conn = self.adapter.acquire_connection(connection_name) + with conn.handle.cursor() as cursor: + logger.debug('test connection "{}" executing: {}'.format(connection_name, sql)) + try: + cursor.execute(sql) + conn.handle.commit() + if fetch == 'one': + return cursor.fetchone() + elif fetch == 'all': + return cursor.fetchall() + else: + return + except BaseException as e: + conn.handle.rollback() + print(query) + print(e) + raise e + finally: + conn.transaction_open = False def _ilike(self, target, value): # presto has this regex substitution monstrosity instead of 'ilike' @@ -630,23 +606,11 @@ def filter_many_columns(self, column): char_size = 16777216 return (table_name, column_name, data_type, char_size) - @contextmanager - def test_connection(self, name=None): - """Create a test connection context where all executed macros, etc will - get self.adapter as the adapter. - - This allows tests to run normal adapter macros as if reset_adapters() - were not called by handle_and_check (for asserts, etc) - """ - if name is None: - name = '__test' - with patch.object(common, 'get_adapter', return_value=self.adapter): - with self.adapter.connection_named(name) as conn: - yield conn - def get_relation_columns(self, relation): - with self.test_connection(): - columns = self.adapter.get_columns_in_relation(relation) + columns = self.adapter.get_columns_in_relation( + relation, + model_name='__test' + ) return sorted(((c.name, c.dtype, c.char_size) for c in columns), key=lambda x: x[0]) @@ -811,8 +775,7 @@ def assertManyRelationsEqual(self, relations, default_schema=None, default_datab specs.append(relation) - with self.test_connection(): - column_specs = self.get_many_relation_columns(specs) + column_specs = self.get_many_relation_columns(specs) # make sure everyone has equal column definitions first_columns = None @@ -998,7 +961,7 @@ def test_snowflake_thing(self): self.assertEqual(self.adapter_type, 'snowflake') """ def outer(wrapped): - @getattr(pytest.mark, 'profile_'+profile_name) + @attr(type=profile_name) @wraps(wrapped) def func(self, *args, **kwargs): return wrapped(self, *args, **kwargs) diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index f982d40833f..b667cf38a6f 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -69,7 +69,7 @@ def get_adapter(self, target): profile=profile, ) adapter = BigQueryAdapter(config) - inject_adapter(adapter) + inject_adapter('bigquery', adapter) return adapter @@ -109,14 +109,14 @@ def test_cancel_open_connections_empty(self): def test_cancel_open_connections_master(self): adapter = self.get_adapter('oauth') - adapter.connections.thread_connections[0] = object() + adapter.connections.in_use['master'] = object() self.assertEqual(adapter.cancel_open_connections(), None) def test_cancel_open_connections_single(self): adapter = self.get_adapter('oauth') - adapter.connections.thread_connections.update({ - 0: object(), - 1: object(), + adapter.connections.in_use.update({ + 'master': object(), + 'model': object(), }) # actually does nothing self.assertEqual(adapter.cancel_open_connections(), None) diff --git a/test/unit/test_config.py b/test/unit/test_config.py index ce3971a89c4..1163468a535 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -42,6 +42,7 @@ def temp_cd(path): 'sort': 'timestamp', 'materialized': 'incremental', 'dist': 'user_id', + 'sql_where': 'created_at > (select max(created_at) from {{ this }})', 'unique_key': 'id' }, 'base': { @@ -356,6 +357,7 @@ def from_args(self, project_profile_name='default', **kwargs): kw = { 'args': self.args, 'project_profile_name': project_profile_name, + 'cli_vars': {}, } kw.update(kwargs) return dbt.config.Profile.from_args(**kw) @@ -482,7 +484,7 @@ def test_cli_and_env_vars(self): self.args.target = 'cli-and-env-vars' self.args.vars = '{"cli_value_host": "cli-postgres-host"}' with mock.patch.dict(os.environ, self.env_override): - profile = self.from_args() + profile = self.from_args(cli_vars=None) from_raw = self.from_raw_profile_info( target_override='cli-and-env-vars', cli_vars={'cli_value_host': 'cli-postgres-host'}, @@ -754,7 +756,7 @@ def test_no_project(self): def test_invalid_version(self): self.default_project_data['require-dbt-version'] = 'hello!' - with self.assertRaises(dbt.exceptions.DbtProjectError): + with self.assertRaises(dbt.exceptions.DbtProjectError) as exc: dbt.config.Project.from_project_config(self.default_project_data) def test_unsupported_version(self): @@ -798,18 +800,16 @@ def test__get_unused_resource_config_paths_empty(self): ))}, []) self.assertEqual(len(unused), 0) - def test__warn_for_unused_resource_config_paths_empty(self): + @mock.patch.object(dbt.config.project, 'logger') + def test__warn_for_unused_resource_config_paths_empty(self, mock_logger): project = dbt.config.Project.from_project_config( self.default_project_data ) - dbt.flags.WARN_ERROR = True - try: - unused = project.warn_for_unused_resource_config_paths({'models': frozenset(( - ('my_test_project', 'foo', 'bar'), - ('my_test_project', 'foo', 'baz'), - ))}, []) - finally: - dbt.flags.WARN_ERROR = False + unused = project.warn_for_unused_resource_config_paths({'models': frozenset(( + ('my_test_project', 'foo', 'bar'), + ('my_test_project', 'foo', 'baz'), + ))}, []) + mock_logger.info.assert_not_called() def test_none_values(self): self.default_project_data.update({ @@ -892,7 +892,8 @@ def test__warn_for_unused_resource_config_paths(self, warn_or_error): unused = project.warn_for_unused_resource_config_paths(self.used, []) warn_or_error.assert_called_once() - def test__warn_for_unused_resource_config_paths_disabled(self): + @mock.patch.object(dbt.config.project, 'logger') + def test__warn_for_unused_resource_config_paths_disabled(self, mock_logger): project = dbt.config.Project.from_project_config( self.default_project_data ) @@ -922,7 +923,7 @@ def test_from_project_root(self): def test_with_invalid_package(self): self.write_packages({'invalid': ['not a package of any kind']}) - with self.assertRaises(dbt.exceptions.DbtProjectError): + with self.assertRaises(dbt.exceptions.DbtProjectError) as exc: dbt.config.Project.from_project_root(self.project_dir, {}) @@ -970,7 +971,8 @@ def from_parts(self, exc=None): if exc is None: return dbt.config.RuntimeConfig.from_parts(project, profile, self.args) - with self.assertRaises(exc) as err: + with self.assertRaises(exc) as raised: + err = raised dbt.config.RuntimeConfig.from_parts(project, profile, self.args) return err diff --git a/test/unit/test_deps.py b/test/unit/test_deps.py index 06eb163e38d..1e18856b7f0 100644 --- a/test/unit/test_deps.py +++ b/test/unit/test_deps.py @@ -151,7 +151,8 @@ def test_resolve_missing_package(self): package='fishtown-analytics-test/a', version='0.1.2' ) - with self.assertRaises(dbt.exceptions.DependencyException) as exc: + with self.assertRaises(dbt.exceptions.DependencyException) as e: + exc = e a.resolve_version() msg = 'Package fishtown-analytics-test/a was not found in the package index' @@ -168,7 +169,8 @@ def test_resolve_missing_version(self): package='fishtown-analytics-test/a', version='0.1.2' ) - with self.assertRaises(dbt.exceptions.DependencyException) as exc: + with self.assertRaises(dbt.exceptions.DependencyException) as e: + exc = e a.resolve_version() msg = ( "Could not find a matching version for package " @@ -193,7 +195,8 @@ def test_resolve_conflict(self): version='0.1.3' ) c = a.incorporate(b) - with self.assertRaises(dbt.exceptions.DependencyException) as exc: + with self.assertRaises(dbt.exceptions.DependencyException) as e: + exc = e c.resolve_version() msg = ( "Version error for package fishtown-analytics-test/a: Could not " diff --git a/test/unit/test_docs_blocks.py b/test/unit/test_docs_blocks.py index 104ae251af4..17fd0e9fd38 100644 --- a/test/unit/test_docs_blocks.py +++ b/test/unit/test_docs_blocks.py @@ -1,4 +1,3 @@ -import os import mock import unittest @@ -55,15 +54,6 @@ class DocumentationParserTest(unittest.TestCase): def setUp(self): - if os.name == 'nt': - self.root_path = 'C:\\test_root' - self.subdir_path = 'C:\\test_root\\test_subdir' - self.testfile_path = 'C:\\test_root\\test_subdir\\test_file.md' - else: - self.root_path = '/test_root' - self.subdir_path = '/test_root/test_subdir' - self.testfile_path = '/test_root/test_subdir/test_file.md' - profile_data = { 'outputs': { 'test': { @@ -82,14 +72,14 @@ def setUp(self): 'name': 'root', 'version': '0.1', 'profile': 'test', - 'project-root': self.root_path, + 'project-root': '/test_root', } subdir_project = { 'name': 'some_package', 'version': '0.1', 'profile': 'test', - 'project-root': self.subdir_path, + 'project-root': '/test_root/test_subdir', 'quoting': {}, } self.root_project_config = config_from_parts_or_dicts( @@ -98,33 +88,35 @@ def setUp(self): self.subdir_project_config = config_from_parts_or_dicts( project=subdir_project, profile=profile_data ) + + @mock.patch('dbt.clients.system') def test_load_file(self, system): system.load_file_contents.return_value = TEST_DOCUMENTATION_FILE system.find_matching.return_value = [{ 'relative_path': 'test_file.md', - 'absolute_path': self.testfile_path, - 'searched_path': self.subdir_path, + 'absolute_path': '/test_root/test_subdir/test_file.md', + 'searched_path': '/test_root/test_subdir', }] results = list(docs.DocumentationParser.load_file( - 'some_package', self.root_path, ['test_subdir']) + 'some_package', '/test_root', ['test_subdir']) ) self.assertEqual(len(results), 1) result = results[0] self.assertEqual(result.package_name, 'some_package') self.assertEqual(result.file_contents, TEST_DOCUMENTATION_FILE) self.assertEqual(result.original_file_path, - self.testfile_path) - self.assertEqual(result.root_path, self.root_path) + '/test_root/test_subdir/test_file.md') + self.assertEqual(result.root_path, '/test_root') self.assertEqual(result.resource_type, NodeType.Documentation) self.assertEqual(result.path, 'test_file.md') def test_parse(self): docfile = UnparsedDocumentationFile( - root_path=self.root_path, + root_path='/test_root', resource_type=NodeType.Documentation, path='test_file.md', - original_file_path=self.testfile_path, + original_file_path='/test_root/test_subdir/test_file.md', package_name='some_package', file_contents=TEST_DOCUMENTATION_FILE ) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 5d34c3d97d5..88f3f15e694 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -32,7 +32,6 @@ def tearDown(self): self.load_projects_patcher.stop() self.find_matching_patcher.stop() self.load_file_contents_patcher.stop() - self.get_adapter_patcher.stop() def setUp(self): dbt.flags.STRICT_MODE = True @@ -42,8 +41,6 @@ def setUp(self): self.load_projects_patcher = patch('dbt.loader._load_projects') self.find_matching_patcher = patch('dbt.clients.system.find_matching') self.load_file_contents_patcher = patch('dbt.clients.system.load_file_contents') - self.get_adapter_patcher = patch('dbt.context.parser.get_adapter') - self.factory = self.get_adapter_patcher.start() def mock_write_gpickle(graph, outfile): self.graph_result = graph @@ -55,7 +52,7 @@ def mock_write_gpickle(graph, outfile): 'test': { 'type': 'postgres', 'threads': 4, - 'host': 'thishostshouldnotexist', + 'host': 'database', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -208,6 +205,7 @@ def test__model_incremental(self): "test_models_compile": { "model_one": { "materialized": "incremental", + "sql_where": "created_at", "unique_key": "id" }, } diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 5bcab016666..21abd573b5b 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -1,9 +1,6 @@ import unittest from dbt.clients.jinja import get_template -from dbt.clients.jinja import extract_toplevel_blocks -from dbt.exceptions import CompilationException - class TestJinja(unittest.TestCase): def test_do(self): @@ -12,316 +9,3 @@ def test_do(self): template = get_template(s, {}) mod = template.make_module() self.assertEqual(mod.my_dict, {'a': 1}) - - -class TestBlockLexer(unittest.TestCase): - def test_basic(self): - body = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' - block_data = ' \n\r\t{%- mytype foo %}'+body+'{%endmytype -%}' - blocks = extract_toplevel_blocks(block_data) - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'mytype') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].contents, body) - self.assertEqual(blocks[0].full_block, block_data) - - def test_multiple(self): - body_one = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' - body_two = ( - '{{ config(bar=1)}}\r\nselect * from {% if foo %} thing ' - '{% else %} other_thing {% endif %}' - ) - - block_data = ( - ' {% mytype foo %}' + body_one + '{% endmytype %}' + - '\r\n{% othertype bar %}' + body_two + '{% endothertype %}' - ) - all_blocks = extract_toplevel_blocks(block_data) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - - def test_comments(self): - body = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' - comment = '{# my comment #}' - block_data = ' \n\r\t{%- mytype foo %}'+body+'{%endmytype -%}' - all_blocks = extract_toplevel_blocks(comment+block_data) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'mytype') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].contents, body) - self.assertEqual(blocks[0].full_block, block_data) - - def test_evil_comments(self): - body = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' - comment = '{# external comment {% othertype bar %} select * from thing.other_thing{% endothertype %} #}' - block_data = ' \n\r\t{%- mytype foo %}'+body+'{%endmytype -%}' - all_blocks = extract_toplevel_blocks(comment+block_data) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'mytype') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].contents, body) - self.assertEqual(blocks[0].full_block, block_data) - - def test_nested_comments(self): - body = '{# my comment #} {{ config(foo="bar") }}\r\nselect * from {# my other comment embedding {% endmytype %} #} this.that\r\n' - block_data = ' \n\r\t{%- mytype foo %}'+body+'{% endmytype -%}' - comment = '{# external comment {% othertype bar %} select * from thing.other_thing{% endothertype %} #}' - all_blocks = extract_toplevel_blocks(comment+block_data) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'mytype') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].contents, body) - self.assertEqual(blocks[0].full_block, block_data) - - def test_complex_file(self): - all_blocks = extract_toplevel_blocks(complex_archive_file) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 3) - self.assertEqual(blocks[0].block_type_name, 'mytype') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].full_block, '{% mytype foo %} some stuff {% endmytype %}') - self.assertEqual(blocks[0].contents, ' some stuff ') - self.assertEqual(blocks[1].block_type_name, 'mytype') - self.assertEqual(blocks[1].block_name, 'bar') - self.assertEqual(blocks[1].full_block, bar_block) - self.assertEqual(blocks[1].contents, bar_block[16:-15].rstrip()) - self.assertEqual(blocks[2].block_type_name, 'myothertype') - self.assertEqual(blocks[2].block_name, 'x') - self.assertEqual(blocks[2].full_block, x_block.strip()) - self.assertEqual(blocks[2].contents, x_block[len('\n{% myothertype x %}'):-len('{% endmyothertype %}\n')]) - - def test_peaceful_macro_coexistence(self): - body = '{# my macro #} {% macro foo(a, b) %} do a thing {%- endmacro %} {# my model #} {% a b %} {% enda %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - self.assertEqual(blocks[0].block_type_name, 'macro') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].contents, ' do a thing') - self.assertEqual(blocks[1].block_type_name, 'a') - self.assertEqual(blocks[1].block_name, 'b') - self.assertEqual(blocks[1].contents, ' ') - - def test_macro_with_crazy_args(self): - body = '''{% macro foo(a, b=asdf("cool this is 'embedded'" * 3) + external_var, c)%}cool{# block comment with {% endmacro %} in it #} stuff here {% endmacro %}''' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'macro') - self.assertEqual(blocks[0].block_name, 'foo') - self.assertEqual(blocks[0].contents, 'cool{# block comment with {% endmacro %} in it #} stuff here ') - - def test_materialization_parse(self): - body = '{% materialization xxx, default %} ... {% endmaterialization %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'materialization') - self.assertEqual(blocks[0].block_name, 'xxx') - self.assertEqual(blocks[0].full_block, body) - - body = '{% materialization xxx, adapter="other" %} ... {% endmaterialization %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'materialization') - self.assertEqual(blocks[0].block_name, 'xxx') - self.assertEqual(blocks[0].full_block, body) - - def test_nested_failure(self): - # we don't allow nesting same blocks - # ideally we would not allow nesting any, but that's much harder - body = '{% myblock a %} {% myblock b %} {% endmyblock %} {% endmyblock %}' - with self.assertRaises(CompilationException): - extract_toplevel_blocks(body) - - def test_incomplete_block_failure(self): - fullbody = '{% myblock foo %} {% endblock %}' - for length in range(1, len(fullbody)-1): - body = fullbody[:length] - with self.assertRaises(CompilationException): - extract_toplevel_blocks(body) - - def test_wrong_end_failure(self): - body = '{% myblock foo %} {% endotherblock %}' - with self.assertRaises(CompilationException): - extract_toplevel_blocks(body) - - def test_comment_no_end_failure(self): - body = '{# ' - with self.assertRaises(CompilationException): - extract_toplevel_blocks(body) - - def test_comment_only(self): - body = '{# myblock #}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 0) - - def test_comment_block_self_closing(self): - # test the case where a comment start looks a lot like it closes itself - # (but it doesn't in jinja!) - body = '{#} {% myblock foo %} {#}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 0) - - def test_embedded_self_closing_comment_block(self): - body = '{% myblock foo %} {#}{% endmyblock %} {#}{% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].full_block, body) - self.assertEqual(blocks[0].contents, ' {#}{% endmyblock %} {#}') - - def test_set_statement(self): - body = '{% set x = 1 %}{% myblock foo %}hi{% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - self.assertEqual(blocks[0].full_block, '{% set x = 1 %}') - self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') - - def test_set_block(self): - body = '{% set x %}1{% endset %}{% myblock foo %}hi{% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - self.assertEqual(blocks[0].contents, '1') - self.assertEqual(blocks[0].block_type_name, 'set') - self.assertEqual(blocks[0].block_name, 'x') - self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') - - def test_crazy_set_statement(self): - body = '{% set x = (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% set y = otherthing("{% myblock foo %}") %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 3) - self.assertEqual(blocks[0].full_block, '{% set x = (thing("{% myblock foo %}")) %}') - self.assertEqual(blocks[0].block_type_name, 'set') - self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}') - self.assertEqual(blocks[1].block_type_name, 'otherblock') - self.assertEqual(blocks[2].full_block, '{% set y = otherthing("{% myblock foo %}") %}') - self.assertEqual(blocks[2].block_type_name, 'set') - - def test_do_statement(self): - body = '{% do thing.update() %}{% myblock foo %}hi{% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - self.assertEqual(blocks[0].full_block, '{% do thing.update() %}') - self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') - - def test_deceptive_do_statement(self): - body = '{% do thing %}{% myblock foo %}hi{% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - self.assertEqual(blocks[0].full_block, '{% do thing %}') - self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') - - def test_do_block(self): - body = '{% do %}thing.update(){% enddo %}{% myblock foo %}hi{% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 2) - self.assertEqual(blocks[0].contents, 'thing.update()') - self.assertEqual(blocks[0].block_type_name, 'do') - self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') - - def test_crazy_do_statement(self): - body = '{% do (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% do otherthing("{% myblock foo %}") %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 3) - self.assertEqual(blocks[0].full_block, '{% do (thing("{% myblock foo %}")) %}') - self.assertEqual(blocks[0].block_type_name, 'do') - self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}') - self.assertEqual(blocks[1].block_type_name, 'otherblock') - self.assertEqual(blocks[2].full_block, '{% do otherthing("{% myblock foo %}") %}') - self.assertEqual(blocks[2].block_type_name, 'do') - - def test_awful_jinja(self): - all_blocks = extract_toplevel_blocks(if_you_do_this_you_are_awful) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 4) - self.assertEqual(blocks[0].block_type_name, 'do') - self.assertEqual(blocks[0].full_block, '''{% do\n set('foo="bar"')\n%}''') - self.assertEqual(blocks[1].block_type_name, 'set') - self.assertEqual(blocks[1].full_block, '''{% set x = ("100" + "hello'" + '%}') %}''') - self.assertEqual(blocks[2].block_type_name, 'archive') - self.assertEqual(blocks[2].contents, '\n '.join([ - '''{% set x = ("{% endarchive %}" + (40 * '%})')) %}''', - '{# {% endarchive %} #}', - '{% embedded %}', - ' some block data right here', - '{% endembedded %}' - ])) - self.assertEqual(blocks[3].block_type_name, 'materialization') - self.assertEqual(blocks[3].contents, '\nhi\n') - - def test_quoted_endblock_within_block(self): - body = '{% myblock something -%} {% set x = ("{% endmyblock %}") %} {% endmyblock %}' - all_blocks = extract_toplevel_blocks(body) - blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] - self.assertEqual(len(blocks), 1) - self.assertEqual(blocks[0].block_type_name, 'myblock') - self.assertEqual(blocks[0].contents, '{% set x = ("{% endmyblock %}") %} ') - -bar_block = '''{% mytype bar %} -{# a comment - that inside it has - {% mytype baz %} -{% endmyothertype %} -{% endmytype %} -{% endmytype %} - {# -{% endmytype %}#} - -some other stuff - -{%- endmytype%}''' - -x_block = ''' -{% myothertype x %} -before -{##} -and after -{% endmyothertype %} -''' - -complex_archive_file = ''' -{#some stuff {% mytype foo %} #} -{% mytype foo %} some stuff {% endmytype %} - -'''+bar_block+x_block - - -if_you_do_this_you_are_awful = ''' -{#} here is a comment with a block inside {% block x %} asdf {% endblock %} {#} -{% do - set('foo="bar"') -%} -{% set x = ("100" + "hello'" + '%}') %} -{% archive something -%} - {% set x = ("{% endarchive %}" + (40 * '%})')) %} - {# {% endarchive %} #} - {% embedded %} - some block data right here - {% endembedded %} -{%- endarchive %} - -{% raw %} - {% set x = SYNTAX ERROR} -{% endraw %} - - -{% materialization whatever, adapter='thing' %} -hi -{% endmaterialization %} -''' - - diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 854d607dd4e..7cce0ca75e4 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -54,6 +54,7 @@ def setUp(self): 'project-root': os.path.abspath('.'), } + self.root_project_config = config_from_parts_or_dicts( project=root_project, profile=profile_data, @@ -75,11 +76,8 @@ def setUp(self): 'root': self.root_project_config, 'snowplow': self.snowplow_project_config } - self.patcher = mock.patch('dbt.context.parser.get_adapter') - self.factory = self.patcher.start() - def tearDown(self): - self.patcher.stop() + class SourceConfigTest(BaseParserTest): def test__source_config_single_call(self): @@ -208,11 +206,7 @@ def setUp(self): database='test', schema='foo', identifier='bar', - resource_type='source', - quoting={ - 'schema': True, - 'identifier': False, - } + resource_type='source' ) self._expected_source_tests = [ @@ -474,9 +468,6 @@ def test__source_schema(self): - name: my_source loader: some_loader description: my source description - quoting: - schema: True - identifier: True freshness: warn_after: count: 10 @@ -485,7 +476,7 @@ def test__source_schema(self): count: 20 period: hour loaded_at_field: something - schema: '{{ var("test_schema_name") }}' + schema: foo tables: - name: my_table description: "my table description" @@ -494,8 +485,6 @@ def test__source_schema(self): warn_after: count: 7 period: hour - quoting: - identifier: False columns: - name: id description: user ID @@ -597,8 +586,6 @@ def test__model_schema(self): def test__mixed_schema(self): test_yml = yaml.safe_load(''' version: 2 - quoting: - database: True models: - name: model_one description: blah blah @@ -622,9 +609,6 @@ def test__mixed_schema(self): - name: my_source loader: some_loader description: my source description - quoting: - schema: True - identifier: True freshness: warn_after: count: 10 @@ -642,8 +626,6 @@ def test__mixed_schema(self): warn_after: count: 7 period: hour - quoting: - identifier: False columns: - name: id description: user ID @@ -699,9 +681,6 @@ def test__source_schema_invalid_test_strict(self): - name: my_source loader: some_loader description: my source description - quoting: - schema: True - identifier: True freshness: warn_after: count: 10 @@ -719,8 +698,6 @@ def test__source_schema_invalid_test_strict(self): warn_after: count: 7 period: hour - quoting: - identifier: False columns: - name: id description: user ID @@ -761,9 +738,6 @@ def test__source_schema_invalid_test_not_strict(self): - name: my_source loader: some_loader description: my source description - quoting: - schema: True - identifier: True freshness: warn_after: count: 10 @@ -781,8 +755,6 @@ def test__source_schema_invalid_test_not_strict(self): warn_after: count: 7 period: hour - quoting: - identifier: False columns: - name: id description: user ID @@ -851,8 +823,8 @@ def test__schema_v2_as_v1(self, mock_logger, find_schema_yml): parser.load_and_parse( 'test', root_dir, relative_dirs ) - self.assertIn('https://docs.getdbt.com/docs/schemayml-files', - str(cm.exception)) + self.assertIn('https://docs.getdbt.com/v0.11/docs/schemayml-files', + str(cm.exception)) @mock.patch.object(SchemaParser, 'find_schema_yml') @mock.patch.object(dbt.parser.schemas, 'logger') @@ -874,8 +846,8 @@ def test__schema_v1_version_model(self, mock_logger, find_schema_yml): parser.load_and_parse( 'test', root_dir, relative_dirs ) - self.assertIn('https://docs.getdbt.com/docs/schemayml-files', - str(cm.exception)) + self.assertIn('https://docs.getdbt.com/v0.11/docs/schemayml-files', + str(cm.exception)) @mock.patch.object(SchemaParser, 'find_schema_yml') @mock.patch.object(dbt.parser.schemas, 'logger') @@ -897,15 +869,11 @@ def test__schema_v1_version_1(self, mock_logger, find_schema_yml): parser.load_and_parse( 'test', root_dir, relative_dirs ) - self.assertIn('https://docs.getdbt.com/docs/schemayml-files', - str(cm.exception)) + self.assertIn('https://docs.getdbt.com/v0.11/docs/schemayml-files', + str(cm.exception)) class ParserTest(BaseParserTest): - def _assert_parsed_sql_nodes(self, parse_result, parsed, disabled): - self.assertEqual(parse_result.parsed, parsed) - self.assertEqual(parse_result.disabled, disabled) - def find_input_by_name(self, models, name): return next( @@ -957,9 +925,9 @@ def test__single_model(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -986,8 +954,7 @@ def test__single_model(self): description='', columns={} ) - }, - [] + }, []) ) def test__single_model__nested_configuration(self): @@ -1022,9 +989,9 @@ def test__single_model__nested_configuration(self): self.all_projects, self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -1051,8 +1018,7 @@ def test__single_model__nested_configuration(self): description='', columns={} ) - }, - [] + }, []) ) def test__empty_model(self): @@ -1073,9 +1039,9 @@ def test__empty_model(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -1102,8 +1068,7 @@ def test__empty_model(self): description='', columns={} ) - }, - [] + }, []) ) def test__simple_dependency(self): @@ -1131,9 +1096,9 @@ def test__simple_dependency(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.base': ParsedNode( alias='base', name='base', @@ -1187,8 +1152,7 @@ def test__simple_dependency(self): description='', columns={} ) - }, - [] + }, []) ) def test__multiple_dependencies(self): @@ -1244,9 +1208,9 @@ def test__multiple_dependencies(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.events': ParsedNode( alias='events', name='events', @@ -1377,8 +1341,7 @@ def test__multiple_dependencies(self): description='', columns={} ), - }, - [] + }, []) ) def test__multiple_dependencies__packages(self): @@ -1436,9 +1399,9 @@ def test__multiple_dependencies__packages(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.snowplow.events': ParsedNode( alias='events', name='events', @@ -1554,7 +1517,7 @@ def test__multiple_dependencies__packages(self): empty=False, package_name='root', refs=[['snowplow', 'sessions_tx'], - ['snowplow', 'events_tx']], + ['snowplow', 'events_tx']], sources=[], depends_on={ 'nodes': [], @@ -1570,8 +1533,7 @@ def test__multiple_dependencies__packages(self): description='', columns={} ), - }, - [] + }, []) ) def test__process_refs__packages(self): @@ -1768,9 +1730,9 @@ def test__in_model_config(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -1797,8 +1759,7 @@ def test__in_model_config(self): description='', columns={} ) - }, - [] + }, []) ) def test__root_project_config(self): @@ -1858,9 +1819,9 @@ def test__root_project_config(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.table': ParsedNode( alias='table', name='table', @@ -1939,8 +1900,7 @@ def test__root_project_config(self): description='', columns={} ), - }, - [] + }, []) ) def test__other_project_config(self): @@ -2067,9 +2027,9 @@ def test__other_project_config(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - parsed={ + ({ 'model.root.table': ParsedNode( alias='table', name='table', @@ -2175,7 +2135,7 @@ def test__other_project_config(self): columns={} ), }, - disabled=[ + [ ParsedNode( name='disabled', resource_type='model', @@ -2224,7 +2184,7 @@ def test__other_project_config(self): fqn=['snowplow', 'views', 'package'], columns={} ) - ] + ]) ) def test__simple_data_test(self): @@ -2244,9 +2204,9 @@ def test__simple_data_test(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(tests), - { + ({ 'test.root.no_events': ParsedNode( alias='no_events', name='no_events', @@ -2273,8 +2233,7 @@ def test__simple_data_test(self): description='', columns={} ) - }, - [] + }, []) ) def test__simple_macro(self): @@ -2366,9 +2325,9 @@ def test__simple_macro_used_in_model(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -2395,8 +2354,7 @@ def test__simple_macro_used_in_model(self): description='', columns={} ) - }, - [] + }, []) ) def test__macro_no_explicit_project_used_in_model(self): @@ -2416,9 +2374,9 @@ def test__macro_no_explicit_project_used_in_model(self): self.macro_manifest ) - self._assert_parsed_sql_nodes( + self.assertEqual( parser.parse_sql_nodes(models), - { + ({ 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -2445,6 +2403,5 @@ def test__macro_no_explicit_project_used_in_model(self): description='', columns={} ) - }, - [] + }, []) ) diff --git a/test/unit/test_postgres_adapter.py b/test/unit/test_postgres_adapter.py index 1998ec59a71..1677d9e2140 100644 --- a/test/unit/test_postgres_adapter.py +++ b/test/unit/test_postgres_adapter.py @@ -8,10 +8,9 @@ from dbt.exceptions import ValidationException from dbt.logger import GLOBAL_LOGGER as logger # noqa from psycopg2 import extensions as psycopg2_extensions -from psycopg2 import DatabaseError, Error import agate -from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection +from .utils import config_from_parts_or_dicts, inject_adapter class TestPostgresAdapter(unittest.TestCase): @@ -30,7 +29,7 @@ def setUp(self): 'type': 'postgres', 'dbname': 'postgres', 'user': 'root', - 'host': 'thishostshouldnotexist', + 'host': 'database', 'pass': 'password', 'port': 5432, 'schema': 'public' @@ -46,45 +45,40 @@ def setUp(self): def adapter(self): if self._adapter is None: self._adapter = PostgresAdapter(self.config) - inject_adapter(self._adapter) + inject_adapter('postgres', self._adapter) return self._adapter - @mock.patch('dbt.adapters.postgres.connections.psycopg2') - def test_acquire_connection_validations(self, psycopg2): + def test_acquire_connection_validations(self): try: connection = self.adapter.acquire_connection('dummy') + self.assertEquals(connection.type, 'postgres') except ValidationException as e: self.fail('got ValidationException: {}'.format(str(e))) except BaseException as e: - self.fail('acquiring connection failed with unknown exception: {}' + self.fail('validation failed with unknown exception: {}' .format(str(e))) - self.assertEquals(connection.type, 'postgres') - psycopg2.connect.assert_called_once() - @mock.patch('dbt.adapters.postgres.connections.psycopg2') - def test_acquire_connection(self, psycopg2): + def test_acquire_connection(self): connection = self.adapter.acquire_connection('dummy') self.assertEquals(connection.state, 'open') self.assertNotEquals(connection.handle, None) - psycopg2.connect.assert_called_once() def test_cancel_open_connections_empty(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): - key = self.adapter.connections.get_thread_identifier() - self.adapter.connections.thread_connections[key] = mock_connection('master') + self.adapter.connections.in_use['master'] = mock.MagicMock() self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): - master = mock_connection('master') - model = mock_connection('model') - key = self.adapter.connections.get_thread_identifier() + master = mock.MagicMock() + model = mock.MagicMock() model.handle.get_backend_pid.return_value = 42 - self.adapter.connections.thread_connections.update({ - key: master, - 1: model, + + self.adapter.connections.in_use.update({ + 'master': master, + 'model': model, }) with mock.patch.object(self.adapter.connections, 'add_query') as add_query: query_result = mock.MagicMock() @@ -92,7 +86,7 @@ def test_cancel_open_connections_single(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 1) - add_query.assert_called_once_with('select pg_terminate_backend(42)') + add_query.assert_called_once_with('select pg_terminate_backend(42)', 'master') master.handle.get_backend_pid.assert_not_called() @@ -104,7 +98,7 @@ def test_default_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='postgres', user='root', - host='thishostshouldnotexist', + host='database', password='password', port=5432, connect_timeout=10) @@ -119,7 +113,7 @@ def test_changed_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='postgres', user='root', - host='thishostshouldnotexist', + host='database', password='password', port=5432, connect_timeout=10, @@ -135,7 +129,7 @@ def test_set_zero_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='postgres', user='root', - host='thishostshouldnotexist', + host='database', password='password', port=5432, connect_timeout=10) @@ -174,7 +168,7 @@ def setUp(self): 'type': 'postgres', 'dbname': 'postgres', 'user': 'root', - 'host': 'thishostshouldnotexist', + 'host': 'database', 'pass': 'password', 'port': 5432, 'schema': 'public' @@ -200,14 +194,10 @@ def setUp(self): self.mock_execute = self.cursor.execute self.patcher = mock.patch('dbt.adapters.postgres.connections.psycopg2') self.psycopg2 = self.patcher.start() - # there must be a better way to do this... - self.psycopg2.DatabaseError = DatabaseError - self.psycopg2.Error = Error self.psycopg2.connect.return_value = self.handle self.adapter = PostgresAdapter(self.config) - self.adapter.acquire_connection() - inject_adapter(self.adapter) + inject_adapter('postgres', self.adapter) def tearDown(self): # we want a unique self.handle every time. diff --git a/test/unit/test_redshift_adapter.py b/test/unit/test_redshift_adapter.py index 63d9dec822b..5611a2a6efc 100644 --- a/test/unit/test_redshift_adapter.py +++ b/test/unit/test_redshift_adapter.py @@ -9,7 +9,7 @@ from dbt.exceptions import ValidationException, FailedToConnectException from dbt.logger import GLOBAL_LOGGER as logger # noqa -from .utils import config_from_parts_or_dicts, mock_connection +from .utils import config_from_parts_or_dicts @classmethod @@ -30,7 +30,7 @@ def setUp(self): 'type': 'redshift', 'dbname': 'redshift', 'user': 'root', - 'host': 'thishostshouldnotexist', + 'host': 'database', 'pass': 'password', 'port': 5439, 'schema': 'public' @@ -106,19 +106,17 @@ def test_cancel_open_connections_empty(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): - key = self.adapter.connections.get_thread_identifier() - self.adapter.connections.thread_connections[key] = mock_connection('master') + self.adapter.connections.in_use['master'] = mock.MagicMock() self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): - master = mock_connection('master') - model = mock_connection('model') + master = mock.MagicMock() + model = mock.MagicMock() model.handle.get_backend_pid.return_value = 42 - key = self.adapter.connections.get_thread_identifier() - self.adapter.connections.thread_connections.update({ - key: master, - 1: model, + self.adapter.connections.in_use.update({ + 'master': master, + 'model': model, }) with mock.patch.object(self.adapter.connections, 'add_query') as add_query: query_result = mock.MagicMock() @@ -126,7 +124,7 @@ def test_cancel_open_connections_single(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 1) - add_query.assert_called_once_with('select pg_terminate_backend(42)') + add_query.assert_called_once_with('select pg_terminate_backend(42)', 'master') master.handle.get_backend_pid.assert_not_called() @@ -137,7 +135,7 @@ def test_default_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='redshift', user='root', - host='thishostshouldnotexist', + host='database', password='password', port=5439, connect_timeout=10, @@ -154,7 +152,7 @@ def test_changed_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='redshift', user='root', - host='thishostshouldnotexist', + host='database', password='password', port=5439, connect_timeout=10, @@ -170,7 +168,7 @@ def test_set_zero_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='redshift', user='root', - host='thishostshouldnotexist', + host='database', password='password', port=5439, connect_timeout=10) diff --git a/test/unit/test_base_column.py b/test/unit/test_schema.py similarity index 87% rename from test/unit/test_base_column.py rename to test/unit/test_schema.py index e48917af839..ec411568331 100644 --- a/test/unit/test_base_column.py +++ b/test/unit/test_schema.py @@ -1,13 +1,12 @@ import unittest import decimal - -from dbt.adapters.base import Column +import dbt.schema class TestStringType(unittest.TestCase): def test__character_type(self): - col = Column( + col = dbt.schema.Column( 'fieldname', 'character', char_size=10 @@ -19,7 +18,7 @@ def test__character_type(self): class TestNumericType(unittest.TestCase): def test__numeric_type(self): - col = Column( + col = dbt.schema.Column( 'fieldname', 'numeric', numeric_precision=decimal.Decimal('12'), @@ -29,7 +28,7 @@ def test__numeric_type(self): def test__numeric_type_with_no_precision(self): # PostgreSQL, at least, will allow empty numeric precision - col = Column( + col = dbt.schema.Column( 'fieldname', 'numeric', numeric_precision=None) diff --git a/test/unit/test_snowflake_adapter.py b/test/unit/test_snowflake_adapter.py index caba79ea2e2..0ee65d05759 100644 --- a/test/unit/test_snowflake_adapter.py +++ b/test/unit/test_snowflake_adapter.py @@ -11,7 +11,7 @@ from dbt.logger import GLOBAL_LOGGER as logger # noqa from snowflake import connector as snowflake_connector -from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection +from .utils import config_from_parts_or_dicts, inject_adapter class TestSnowflakeAdapter(unittest.TestCase): @@ -54,8 +54,8 @@ def setUp(self): self.snowflake.return_value = self.handle self.adapter = SnowflakeAdapter(self.config) - self.adapter.acquire_connection() - inject_adapter(self.adapter) + # patch our new adapter into the factory so macros behave + inject_adapter('snowflake', self.adapter) def tearDown(self): # we want a unique self.handle every time. @@ -69,7 +69,7 @@ def test_quoting_on_drop_schema(self): ) self.mock_execute.assert_has_calls([ - mock.call('drop schema if exists test_database."test_schema" cascade', None) + mock.call('drop schema if exists "test_database"."test_schema" cascade', None) ]) def test_quoting_on_drop(self): @@ -84,7 +84,7 @@ def test_quoting_on_drop(self): self.mock_execute.assert_has_calls([ mock.call( - 'drop table if exists test_database."test_schema".test_table cascade', + 'drop table if exists "test_database"."test_schema".test_table cascade', None ) ]) @@ -100,7 +100,7 @@ def test_quoting_on_truncate(self): self.adapter.truncate_relation(relation) self.mock_execute.assert_has_calls([ - mock.call('truncate table test_database."test_schema".test_table', None) + mock.call('truncate table "test_database"."test_schema".test_table', None) ]) def test_quoting_on_rename(self): @@ -125,7 +125,7 @@ def test_quoting_on_rename(self): ) self.mock_execute.assert_has_calls([ mock.call( - 'alter table test_database."test_schema".table_a rename to test_database."test_schema".table_b', + 'alter table "test_database"."test_schema".table_a rename to table_b', None ) ]) @@ -134,19 +134,17 @@ def test_cancel_open_connections_empty(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): - key = self.adapter.connections.get_thread_identifier() - self.adapter.connections.thread_connections[key] = mock_connection('master') + self.adapter.connections.in_use['master'] = mock.MagicMock() self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): - master = mock_connection('master') - model = mock_connection('model') + master = mock.MagicMock() + model = mock.MagicMock() model.handle.session_id = 42 - key = self.adapter.connections.get_thread_identifier() - self.adapter.connections.thread_connections.update({ - key: master, - 1: model, + self.adapter.connections.in_use.update({ + 'master': master, + 'model': model, }) with mock.patch.object(self.adapter.connections, 'add_query') as add_query: query_result = mock.MagicMock() @@ -155,10 +153,11 @@ def test_cancel_open_connections_single(self): self.assertEqual( len(list(self.adapter.cancel_open_connections())), 1) - add_query.assert_called_once_with('select system$abort_session(42)') + add_query.assert_called_once_with( + 'select system$abort_session(42)', 'master') def test_client_session_keep_alive_false_by_default(self): - self.adapter.connections.set_connection_name(name='new_connection_with_new_config') + self.adapter.connections.get(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( account='test_account', autocommit=False, @@ -171,7 +170,7 @@ def test_client_session_keep_alive_true(self): self.config.credentials = self.config.credentials.incorporate( client_session_keep_alive=True) self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.set_connection_name(name='new_connection_with_new_config') + self.adapter.connections.get(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -185,7 +184,7 @@ def test_user_pass_authentication(self): self.config.credentials = self.config.credentials.incorporate( password='test_password') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.set_connection_name(name='new_connection_with_new_config') + self.adapter.connections.get(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -199,7 +198,7 @@ def test_authenticator_user_pass_authentication(self): self.config.credentials = self.config.credentials.incorporate( password='test_password', authenticator='test_sso_url') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.set_connection_name(name='new_connection_with_new_config') + self.adapter.connections.get(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -214,7 +213,7 @@ def test_authenticator_externalbrowser_authentication(self): self.config.credentials = self.config.credentials.incorporate( authenticator='externalbrowser') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.set_connection_name(name='new_connection_with_new_config') + self.adapter.connections.get(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -232,7 +231,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key): private_key_passphrase='p@ssphr@se') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.set_connection_name(name='new_connection_with_new_config') + self.adapter.connections.get(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( diff --git a/test/unit/test_system_client.py b/test/unit/test_system_client.py index 0fc8e3dcfb7..282b957df63 100644 --- a/test/unit/test_system_client.py +++ b/test/unit/test_system_client.py @@ -92,12 +92,7 @@ def test__not_exe(self): dbt.clients.system.run_cmd(self.run_dir, [self.empty_file]) msg = str(exc.exception).lower() - if os.name == 'nt': - # on windows, this means it's not an executable at all! - self.assertIn('not executable', msg) - else: - # on linux, this means you don't have executable permissions on it - self.assertIn('permissions', msg) + self.assertIn('permissions', msg) self.assertIn(self.empty_file.lower(), msg) def test__cwd_does_not_exist(self): diff --git a/test/unit/utils.py b/test/unit/utils.py index 48a753c2ee1..fafb89484e7 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -3,19 +3,11 @@ Note that all imports should be inside the functions to avoid import/mocking issues. """ -import mock - class Obj(object): which = 'blah' -def mock_connection(name): - conn = mock.MagicMock() - conn.name = name - return conn - - def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): from dbt.config import Project, Profile, RuntimeConfig from dbt.utils import parse_cli_vars @@ -37,12 +29,10 @@ def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): ) -def inject_adapter(value): +def inject_adapter(key, value): """Inject the given adapter into the adapter factory, so your hand-crafted artisanal adapter will be available from get_adapter() as if dbt loaded it. """ from dbt.adapters import factory - from dbt.adapters.base.connections import BaseConnectionManager - key = value.type() factory._ADAPTERS[key] = value factory.ADAPTER_TYPES[key] = type(value) diff --git a/tox.ini b/tox.ini index 2134d39fb3c..ab55c312b26 100644 --- a/tox.ini +++ b/tox.ini @@ -1,23 +1,23 @@ [tox] skipsdist = True -envlist = unit-py27, unit-py36, integration-postgres-py27, integration-postgres-py36, integration-redshift-py27, integration-redshift-py36, integration-snowflake-py27, integration-snowflake-py36, flake8, integration-bigquery-py27, integration-bigquery-py36 +envlist = unit-py27, unit-py36, integration-postgres-py27, integration-postgres-py36, integration-redshift-py27, integration-redshift-py36, integration-snowflake-py27, integration-snowflake-py36, pep8, integration-bigquery-py27, integration-bigquery-py36 -[testenv:flake8] +[testenv:pep8] basepython = python3.6 -commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 core/dbt plugins/*/dbt' +commands = /bin/bash -c '$(which pep8) core/dbt plugins/*/dbt' deps = -r{toxinidir}/dev_requirements.txt [testenv:unit-py27] basepython = python2.7 -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' +commands = /bin/bash -c '$(which nosetests) -v {posargs} test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt [testenv:unit-py36] basepython = python3.6 -commands = /bin/bash -c '{envpython} -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' +commands = /bin/bash -c '{envpython} $(which nosetests) -v {posargs} test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -27,7 +27,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=postgres {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -38,7 +38,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=snowflake {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -49,7 +49,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=bigquery {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -60,7 +60,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=redshift {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -72,7 +72,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=presto {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/presto @@ -83,7 +83,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_postgres --cov=dbt --cov-branch --cov-report html:htmlcov {posargs} test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=postgres --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov {posargs} test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -94,7 +94,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=snowflake {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -105,7 +105,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=bigquery {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -116,7 +116,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=redshift {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -128,7 +128,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=presto {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/presto @@ -139,7 +139,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v {posargs}' +commands = /bin/bash -c '{envpython} $(which nosetests) -v {posargs}' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -149,7 +149,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} -m pytest -v {posargs}' +commands = /bin/bash -c '{envpython} $(which nosetests) -v {posargs}' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -160,71 +160,7 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = pytest -v -m 'profile_postgres or profile_snowflake or profile_bigquery or profile_redshift' --cov=dbt --cov-branch --cov-report html:htmlcov test/integration test/unit +commands = nosetests -v -a type=postgres -a type=snowflake -a type=bigquery --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration test/unit deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt - -[testenv:pywin-unit] -basepython = python.exe -passenv = * -setenv = - DBT_CONFIG_DIR = ~/.dbt - DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit -deps = - -r{toxinidir}/requirements.txt - -r{toxinidir}/dev_requirements.txt - - -[testenv:pywin-postgres] -basepython = python.exe -passenv = * -setenv = - DBT_CONFIG_DIR = ~/.dbt - DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration -deps = - -e {toxinidir}/core - -e {toxinidir}/plugins/postgres - -r{toxinidir}/dev_requirements.txt - - -[testenv:pywin-snowflake] -basepython = python.exe -passenv = * -setenv = - DBT_CONFIG_DIR = ~/.dbt - DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration -deps = - -e {toxinidir}/core - -e {toxinidir}/plugins/snowflake - -r{toxinidir}/dev_requirements.txt - - -[testenv:pywin-bigquery] -basepython = python.exe -passenv = * -setenv = - DBT_CONFIG_DIR = ~/.dbt - DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration -deps = - -e {toxinidir}/core - -e {toxinidir}/plugins/bigquery - -r{toxinidir}/dev_requirements.txt - - -[testenv:pywin-redshift] -basepython = python.exe -passenv = * -setenv = - DBT_CONFIG_DIR = ~/.dbt - DBT_INVOCATION_ENV = ci-appveyor -commands = python -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration -deps = - -e {toxinidir}/core - -e {toxinidir}/plugins/postgres - -e {toxinidir}/plugins/redshift - -r{toxinidir}/dev_requirements.txt From 08820a2061b9f330b46dd15277f3c4c6dc748ae3 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Apr 2019 08:15:23 -0700 Subject: [PATCH 20/29] fixing my jetlagged introduced bugs --- .../macros/materializations/incremental.sql | 16 ++++------------ .../snowflake/macros/materializations/table.sql | 4 ++-- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index f5323d80c43..2d30564f945 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -8,29 +8,21 @@ {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} - {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} - {%- set force_create = full_refresh_mode -%} - -- setup - {% set source_sql = sql -%} - {# -- wrap sql in parens to make it a subquery -- + {% set source_sql -%} + -- wrap sql in parens to make it a subquery -- ( select * from ( {{ sql }} ) - {% if sql_where %} - where ({{ sql_where }}) or ({{ sql_where }}) is null - {% endif %} ) - {%- endset -%} #} + {%- endset -%} {{ run_hooks(pre_hooks, inside_transaction=False) }} @@ -49,7 +41,7 @@ {{ adapter.drop_relation(old_relation) }} {% endif %} {# -- now create or replace the table because we're in full-refresh #} - {{create_table_as(target_relation, source_sql)}} + {{create_table_as(false, target_relation, source_sql)}} {%- endcall -%} {%- else -%} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 31f2139ae62..939eb855e20 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -17,7 +17,7 @@ -- drop the temp relations if they exists for some reason - {{ adapter.drop_relation(intermediate_relation) }} + {# {{ adapter.drop_relation(intermediate_relation) }} #} --- FIXME: Do we want to put this block all together? I think it serves no purpose, but need to check -- setup: if the target relation already exists, truncate or drop it (if it's a view) @@ -48,7 +48,7 @@ {{ drop_relation_if_exists(old_relation) }} {% endif %} - {{ create_or_replace_table_as(target_relation, sql) }} + {{ create_table_as(create_as_temporary, target_relation, sql) }} {%- endcall %} {{ run_hooks(post_hooks, inside_transaction=True) }} From 0432c1d7e381f19bbcb087577844251aff4277f7 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Apr 2019 10:11:59 -0700 Subject: [PATCH 21/29] conflic resolve --- .../include/snowflake/macros/materializations/incremental.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 2d30564f945..6a1cebaebaf 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -10,7 +10,9 @@ {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} + {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} + {%- set force_create = full_refresh_mode -%} -- setup From 90f8e0b70e736e8cad1577b85ea58311371aea96 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Apr 2019 10:18:30 -0700 Subject: [PATCH 22/29] Revert "Revert "Merge branch 'dev/wilt-chamberlain' into snowflake_create_or_replace"" This reverts commit 4f62978de56b2e7f09778dedd9d0f6c440dd59c7. --- .bumpversion.cfg | 4 +- .circleci/config.yml | 2 +- .coveragerc | 2 +- CHANGELOG.md | 77 ++- CONTRIBUTING.md | 4 +- Makefile | 2 +- RELEASE.md | 17 +- appveyor.yml | 64 -- azure-pipelines.yml | 125 ++++ core/dbt/adapters/base/__init__.py | 13 +- core/dbt/adapters/base/connections.py | 262 ++++---- core/dbt/adapters/base/impl.py | 327 +++++++--- core/dbt/adapters/base/meta.py | 27 +- core/dbt/adapters/base/plugin.py | 2 - core/dbt/adapters/base/relation.py | 141 +++- core/dbt/adapters/factory.py | 4 +- core/dbt/adapters/sql/__init__.py | 5 +- core/dbt/adapters/sql/connections.py | 53 +- core/dbt/adapters/sql/impl.py | 92 +-- core/dbt/api/object.py | 4 +- core/dbt/clients/_jinja_blocks.py | 481 ++++++++++++++ core/dbt/clients/jinja.py | 10 +- core/dbt/clients/registry.py | 4 + core/dbt/clients/system.py | 12 +- core/dbt/compat.py | 9 +- core/dbt/compilation.py | 35 +- core/dbt/config/__init__.py | 27 +- core/dbt/config/profile.py | 8 +- core/dbt/config/project.py | 17 +- core/dbt/config/runtime.py | 17 +- core/dbt/context/common.py | 57 +- core/dbt/context/parser.py | 18 +- core/dbt/context/runtime.py | 5 +- core/dbt/contracts/connection.py | 2 - core/dbt/contracts/graph/compiled.py | 3 - core/dbt/contracts/graph/manifest.py | 16 +- core/dbt/contracts/graph/parsed.py | 95 ++- core/dbt/contracts/graph/unparsed.py | 28 +- core/dbt/contracts/project.py | 4 + core/dbt/contracts/results.py | 88 ++- core/dbt/deprecations.py | 12 - core/dbt/exceptions.py | 120 +++- core/dbt/flags.py | 5 +- core/dbt/graph/selector.py | 1 - core/dbt/hooks.py | 2 +- .../global_project/macros/adapters/common.sql | 22 +- .../macros/etc/get_custom_alias.sql | 26 + .../materializations/archive/archive.sql | 251 +++++-- .../macros/materializations/helpers.sql | 4 +- .../incremental/incremental.sql | 17 +- .../macros/materializations/table/table.sql | 4 +- .../macros/materializations/view/view.sql | 4 +- core/dbt/linker.py | 42 +- core/dbt/loader.py | 25 +- core/dbt/logger.py | 115 +++- core/dbt/main.py | 304 +++++---- core/dbt/node_runners.py | 264 +++++--- core/dbt/node_types.py | 3 + core/dbt/parser/__init__.py | 2 + core/dbt/parser/analysis.py | 5 + core/dbt/parser/archives.py | 97 ++- core/dbt/parser/base.py | 71 +- core/dbt/parser/base_sql.py | 84 ++- core/dbt/parser/docs.py | 14 +- core/dbt/parser/hooks.py | 4 +- core/dbt/parser/macros.py | 2 - core/dbt/parser/schemas.py | 59 +- core/dbt/parser/source_config.py | 5 +- core/dbt/parser/util.py | 195 +++--- core/dbt/rpc.py | 395 ++++++++++++ core/dbt/schema.py | 177 ----- core/dbt/semver.py | 12 +- core/dbt/ssh_forward.py | 10 - core/dbt/task/archive.py | 7 +- core/dbt/task/base.py | 127 ++++ core/dbt/task/base_task.py | 13 - core/dbt/task/clean.py | 4 +- core/dbt/task/compile.py | 145 ++++- core/dbt/task/debug.py | 8 +- core/dbt/task/deps.py | 4 +- core/dbt/task/freshness.py | 4 +- core/dbt/task/generate.py | 8 +- core/dbt/task/init.py | 4 +- core/dbt/task/rpc_server.py | 83 +++ core/dbt/task/run.py | 47 +- core/dbt/task/run_operation.py | 64 ++ core/dbt/task/runnable.py | 121 +++- core/dbt/task/serve.py | 6 +- core/dbt/task/test.py | 17 +- core/dbt/ui/printer.py | 15 +- core/dbt/utils.py | 26 +- core/dbt/version.py | 3 +- core/setup.py | 6 +- dev_requirements.txt | 7 +- docker-compose.yml | 2 +- etc/dbt-horizontal.png | Bin 18063 -> 8968 bytes .../invocation_env_context.json | 0 .../invocation_event.json | 0 .../platform_context.json | 0 .../run_model_context.json | 0 .../dbt/adapters/bigquery/__init__.py | 5 +- .../dbt/adapters/bigquery/connections.py | 72 ++- .../bigquery/dbt/adapters/bigquery/impl.py | 118 ++-- .../dbt/adapters/bigquery/relation.py | 102 ++- .../dbt/include/bigquery/macros/adapters.sql | 14 +- .../macros/materializations/archive.sql | 11 +- .../macros/materializations/incremental.sql | 8 +- plugins/bigquery/setup.py | 2 +- .../dbt/adapters/postgres/__init__.py | 3 +- .../dbt/adapters/postgres/connections.py | 15 +- .../postgres/dbt/adapters/postgres/impl.py | 29 +- .../dbt/include/postgres/macros/adapters.sql | 12 +- .../dbt/include/postgres/macros/catalog.sql | 8 +- plugins/postgres/setup.py | 2 +- .../dbt/adapters/redshift/__init__.py | 2 +- .../dbt/adapters/redshift/connections.py | 20 +- .../redshift/dbt/adapters/redshift/impl.py | 7 +- .../redshift/dbt/include/redshift/__init__.py | 2 +- .../dbt/include/redshift/macros/adapters.sql | 12 +- .../dbt/include/redshift/macros/catalog.sql | 14 +- plugins/redshift/setup.py | 2 +- .../dbt/adapters/snowflake/__init__.py | 4 +- .../dbt/adapters/snowflake/connections.py | 34 +- .../snowflake/dbt/adapters/snowflake/impl.py | 4 - .../dbt/adapters/snowflake/relation.py | 3 +- .../dbt/include/snowflake/macros/adapters.sql | 21 +- .../dbt/include/snowflake/macros/catalog.sql | 9 +- plugins/snowflake/setup.py | 2 +- scripts/build-sdists.sh | 23 + setup.py | 2 +- test.env.sample | 2 +- .../001_simple_copy_test/test_simple_copy.py | 1 - .../invalidate_bigquery.sql | 2 +- .../invalidate_postgres.sql | 25 +- .../invalidate_snowflake.sql | 10 +- .../models/ref_archive.sql | 1 + .../004_simple_archive_test/seed.sql | 225 +++++-- .../004_simple_archive_test/seed_bq.sql | 82 +-- .../test-archives-bq/archive.sql | 14 + .../test-archives-invalid/archive.sql | 12 + .../test-archives-pg/archive.sql | 14 + .../test-archives-select/archives.sql | 45 ++ .../test-check-col-archives-bq/archive.sql | 27 + .../test-check-col-archives/archive.sql | 28 + .../test_simple_archive.py | 310 +++++++-- .../004_simple_archive_test/update.sql | 278 ++++++-- .../004_simple_archive_test/update_bq.sql | 94 +-- .../test_seed_type_override.py | 9 +- .../005_simple_seed_test/test_simple_seed.py | 15 +- .../test_local_dependency.py | 13 +- .../test_simple_dependency.py | 11 +- .../test_simple_dependency_with_configs.py | 11 +- .../models/users_rollup_dependency.sql | 5 + .../test_graph_selection.py | 91 +-- .../test_schema_test_graph_selection.py | 31 +- .../test_tag_selection.py | 10 +- .../ephemeral/ephemeral.sql | 4 + .../ephemeral/schema.yml | 8 + .../test_schema_v2_tests.py | 41 +- .../009_data_tests_test/test_data_tests.py | 7 +- .../010_permission_tests/test_permissions.py | 7 +- .../test_invalid_models.py | 7 +- .../models/sql_where.sql | 3 - .../test_deprecations.py | 4 +- .../test_context_vars.py | 15 +- .../014_hook_tests/test_model_hooks.py | 19 +- .../014_hook_tests/test_model_hooks_bq.py | 7 +- .../014_hook_tests/test_run_hooks.py | 9 +- .../014_hook_tests/test_run_hooks_bq.py | 7 +- .../test_cli_invocation.py | 11 +- .../016_macro_tests/test_macros.py | 9 +- .../test_runtime_materialization.py | 13 +- .../018_adapter_ddl_tests/test_adapter_ddl.py | 5 +- .../019_analysis_tests/test_analyses.py | 5 +- .../020_ephemeral_test/test_ephemeral.py | 9 +- .../021_concurrency_test/test_concurrency.py | 7 +- .../test_bigquery_adapter_functions.py | 5 +- .../test_bigquery_date_partitioning.py | 5 +- .../023_exit_codes_test/test_exit_codes.py | 25 +- .../024_custom_schema_test/models/view_3.sql | 2 +- .../test_custom_schema.py | 49 +- .../test_duplicate_model.py | 16 +- .../025_timezones_test/test_timezones.py | 7 +- .../026_aliases_test/test_aliases.py | 13 +- .../integration/027_cycle_test/test_cycles.py | 7 +- .../028_cli_vars/test_cli_var_override.py | 7 +- .../integration/028_cli_vars/test_cli_vars.py | 9 +- .../ref_models/schema.yml | 5 + .../test_docs_generate.py | 132 ++-- .../test_concurrent_transaction.py | 19 +- .../033_event_tracking_test/test_events.py | 39 +- .../test_late_binding_view.py | 1 - .../test_external_reference.py | 9 +- .../test_override_database.py | 15 +- .../042_sources_test/macros/macro.sql | 7 + .../models/ephemeral_model.sql | 3 + .../042_sources_test/models/schema.yml | 4 + .../042_sources_test/test_sources.py | 610 +++++++++++++++++- .../043_custom_aliases_test/macros/macros.sql | 21 + .../043_custom_aliases_test/models/model1.sql | 3 + .../043_custom_aliases_test/models/model2.sql | 3 + .../043_custom_aliases_test/models/schema.yml | 15 + .../test_custom_aliases.py | 23 + .../macros/happy_macros.sql | 24 + .../macros/sad_macros.sql | 7 + .../044_run_operations_test/models/model.sql | 1 + .../test_run_operations.py | 58 ++ test/integration/base.py | 137 ++-- .../{test_schema.py => test_base_column.py} | 9 +- test/unit/test_bigquery_adapter.py | 10 +- test/unit/test_config.py | 30 +- test/unit/test_deps.py | 9 +- test/unit/test_docs_blocks.py | 30 +- test/unit/test_graph.py | 6 +- test/unit/test_jinja.py | 316 +++++++++ test/unit/test_parser.py | 139 ++-- test/unit/test_postgres_adapter.py | 50 +- test/unit/test_redshift_adapter.py | 26 +- test/unit/test_snowflake_adapter.py | 43 +- test/unit/test_system_client.py | 7 +- test/unit/utils.py | 12 +- tox.ini | 100 ++- 222 files changed, 6949 insertions(+), 2481 deletions(-) delete mode 100644 appveyor.yml create mode 100644 azure-pipelines.yml create mode 100644 core/dbt/clients/_jinja_blocks.py create mode 100644 core/dbt/include/global_project/macros/etc/get_custom_alias.sql create mode 100644 core/dbt/rpc.py delete mode 100644 core/dbt/schema.py delete mode 100644 core/dbt/ssh_forward.py create mode 100644 core/dbt/task/base.py delete mode 100644 core/dbt/task/base_task.py create mode 100644 core/dbt/task/rpc_server.py create mode 100644 core/dbt/task/run_operation.py rename {core/events => events}/schemas/com.fishtownanalytics/invocation_env_context.json (100%) rename {core/events => events}/schemas/com.fishtownanalytics/invocation_event.json (100%) rename {core/events => events}/schemas/com.fishtownanalytics/platform_context.json (100%) rename {core/events => events}/schemas/com.fishtownanalytics/run_model_context.json (100%) create mode 100755 scripts/build-sdists.sh create mode 100644 test/integration/004_simple_archive_test/models/ref_archive.sql create mode 100644 test/integration/004_simple_archive_test/test-archives-bq/archive.sql create mode 100644 test/integration/004_simple_archive_test/test-archives-invalid/archive.sql create mode 100644 test/integration/004_simple_archive_test/test-archives-pg/archive.sql create mode 100644 test/integration/004_simple_archive_test/test-archives-select/archives.sql create mode 100644 test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql create mode 100644 test/integration/004_simple_archive_test/test-check-col-archives/archive.sql create mode 100644 test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql create mode 100644 test/integration/008_schema_tests_test/ephemeral/ephemeral.sql create mode 100644 test/integration/008_schema_tests_test/ephemeral/schema.yml delete mode 100644 test/integration/012_deprecation_tests/models/sql_where.sql create mode 100644 test/integration/042_sources_test/macros/macro.sql create mode 100644 test/integration/042_sources_test/models/ephemeral_model.sql create mode 100644 test/integration/043_custom_aliases_test/macros/macros.sql create mode 100644 test/integration/043_custom_aliases_test/models/model1.sql create mode 100644 test/integration/043_custom_aliases_test/models/model2.sql create mode 100644 test/integration/043_custom_aliases_test/models/schema.yml create mode 100644 test/integration/043_custom_aliases_test/test_custom_aliases.py create mode 100644 test/integration/044_run_operations_test/macros/happy_macros.sql create mode 100644 test/integration/044_run_operations_test/macros/sad_macros.sql create mode 100644 test/integration/044_run_operations_test/models/model.sql create mode 100644 test/integration/044_run_operations_test/test_run_operations.py rename test/unit/{test_schema.py => test_base_column.py} (87%) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 00c2666d1f2..1afdbf592fd 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.13.0a1 +current_version = 0.13.0 parse = (?P\d+) \.(?P\d+) \.(?P\d+) @@ -20,6 +20,8 @@ values = [bumpversion:part:num] first_value = 1 +[bumpversion:file:setup.py] + [bumpversion:file:core/setup.py] [bumpversion:file:core/dbt/version.py] diff --git a/.circleci/config.yml b/.circleci/config.yml index fa3942ffeef..6d0474d57e6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -19,7 +19,7 @@ jobs: PGUSER: root PGPASSWORD: password PGDATABASE: postgres - - run: tox -e pep8,unit-py27,unit-py36 + - run: tox -e flake8,unit-py27,unit-py36 integration-postgres-py36: docker: *test_and_postgres steps: diff --git a/.coveragerc b/.coveragerc index 18244411816..5233b856876 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,4 @@ [report] include = core/dbt/* - plugins/adapters/dbt/* + plugins/*/dbt/* diff --git a/CHANGELOG.md b/CHANGELOG.md index 41a5fa0db2c..4400f7af66e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,20 +1,71 @@ -## dbt dev/stephen-girard (0.13.0? - To be released) +## dbt 0.13.0 - Stephen Girard (March 21, 2019) -## Overview +### Overview -This release makes dbt and its adapters into a core-and-plugin architecture. +This release provides [a stable API for building new adapters](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter) and reimplements dbt's adapters as "plugins". Additionally, a new adapter for [Presto](https://github.com/fishtown-analytics/dbt-presto) was added using this architecture. Beyond adapters, this release of dbt also includes [Sources](https://docs.getdbt.com/v0.13/docs/using-sources) which can be used to document and test source data tables. See the full list of features added in 0.13.0 below. ### Breaking Changes -- '{{this}}' is no longer respected in hooks [#1176](https://github.com/fishtown-analytics/dbt/pull/1176), implementing [#878](https://github.com/fishtown-analytics/dbt/issues/878) -- A number of widely-used adapter methods previously available in macros/materializations have changed in breaking ways: - - get_missing_columns - takes Relations instead of schemas and identifiers - - get_columns_in_table - deprecated in favor of get_columns_in_relation (takes a Relation instead of schema, identifier) - - expand_target_column_types - takes a Relation instead of schema, identifier - - query_for_existing - removed - - get_relation - database parameter added - - create_schema - database parameter added - - drop_schema - database parameter added - - already_exists - deprecated in favor of get_relation (takes a relation) +- version 1 schema.yml specs are no longer implemented. Please use the version 2 spec instead ([migration guide](https://docs.getdbt.com/docs/upgrading-from-0-10-to-0-11#section-schema-yml-v2-syntax)) +- `{{this}}` is no longer implemented for `on-run-start` and `on-run-end` hooks. Use `{{ target }}` or an [`on-run-end` context variable](https://docs.getdbt.com/reference#schemas) instead ([#1176](https://github.com/fishtown-analytics/dbt/pull/1176), implementing [#878](https://github.com/fishtown-analytics/dbt/issues/878)) +- A number of materialization-specific adapter methods have changed in breaking ways. If you use these adapter methods in your macros or materializations, you may need to update your code accordingly. + - query_for_existing - **removed**, use [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) instead. + - [get_missing_columns](https://docs.getdbt.com/v0.13/reference#adapter-get-missing-columns) - changed to take `Relation`s instead of schemas and identifiers + - [expand_target_column_types](https://docs.getdbt.com/v0.13/reference#adapter-expand-target-column-types) - changed to take a `Relation` instead of schema, identifier + - [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) - added a `database` argument + - [create_schema](https://docs.getdbt.com/v0.13/reference#adapter-create-schema) - added a `database` argument + - [drop_schema](https://docs.getdbt.com/v0.13/reference#adapter-drop-schema) - added a `database` argument + +### Deprecations +- The following adapter methods are now deprecated, and will be removed in a future release: + - get_columns_in_table - deprecated in favor of [get_columns_in_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-columns-in-relation) + - already_exists - deprecated in favor of [get_relation](https://docs.getdbt.com/v0.13/reference#adapter-get-relation) + +### Features +- Add `source`s to dbt, use them to calculate source data freshness ([docs](https://docs.getdbt.com/v0.13/docs/using-sources) ) ([#814](https://github.com/fishtown-analytics/dbt/issues/814), [#1240](https://github.com/fishtown-analytics/dbt/issues/1240)) +- Add support for Presto ([docs](https://docs.getdbt.com/v0.13/docs/profile-presto), [repo](https://github.com/fishtown-analytics/dbt-presto)) ([#1106](https://github.com/fishtown-analytics/dbt/issues/1106)) +- Add `require-dbt-version` option to `dbt_project.yml` to state the supported versions of dbt for packages ([docs](https://docs.getdbt.com/v0.13/docs/requiring-dbt-versions)) ([#581](https://github.com/fishtown-analytics/dbt/issues/581)) +- Add an output line indicating the installed version of dbt to every run ([#1134](https://github.com/fishtown-analytics/dbt/issues/1134)) +- Add a new model selector (`@`) which build models, their children, and their children's parents ([docs](https://docs.getdbt.com/v0.13/reference#section-the-at-operator)) ([#1156](https://github.com/fishtown-analytics/dbt/issues/1156)) +- Add support for Snowflake Key Pair Authentication ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-key-pair-authentication)) ([#1232](https://github.com/fishtown-analytics/dbt/pull/1232)) +- Support SSO Authentication for Snowflake ([docs](https://docs.getdbt.com/v0.13/docs/profile-snowflake#section-sso-authentication)) ([#1172](https://github.com/fishtown-analytics/dbt/issues/1172)) +- Add support for Snowflake's transient tables ([docs](https://docs.getdbt.com/v0.13/docs/snowflake-configs#section-transient-tables)) ([#946](https://github.com/fishtown-analytics/dbt/issues/946)) +- Capture build timing data in `run_results.json` to visualize project performance ([#1179](https://github.com/fishtown-analytics/dbt/issues/1179)) +- Add CLI flag to toggle warnings as errors ([docs](https://docs.getdbt.com/v0.13/reference#section-treat-warnings-as-errors)) ([#1243](https://github.com/fishtown-analytics/dbt/issues/1243)) +- Add tab completion script for Bash ([docs](https://github.com/fishtown-analytics/dbt-completion.bash)) ([#1197](https://github.com/fishtown-analytics/dbt/issues/1197)) +- Added docs on how to build a new adapter ([docs](https://docs.getdbt.com/v0.13/docs/building-a-new-adapter)) ([#560](https://github.com/fishtown-analytics/dbt/issues/560)) +- Use new logo ([#1349](https://github.com/fishtown-analytics/dbt/pull/1349)) + +### Fixes +- Fix for Postgres character columns treated as string types ([#1194](https://github.com/fishtown-analytics/dbt/issues/1194)) +- Fix for hard to reach edge case in which dbt could hang ([#1223](https://github.com/fishtown-analytics/dbt/issues/1223)) +- Fix for `dbt deps` in non-English shells ([#1222](https://github.com/fishtown-analytics/dbt/issues/1222)) +- Fix for over eager schema creation when models are run with `--models` ([#1239](https://github.com/fishtown-analytics/dbt/issues/1239)) +- Fix for `dbt seed --show` ([#1288](https://github.com/fishtown-analytics/dbt/issues/1288)) +- Fix for `is_incremental()` which should only return `True` if the target relation is a `table` ([#1292](https://github.com/fishtown-analytics/dbt/issues/1292)) +- Fix for error in Snowflake table materializations with custom schemas ([#1316](https://github.com/fishtown-analytics/dbt/issues/1316)) +- Fix errored out concurrent transactions on Redshift and Postgres ([#1356](https://github.com/fishtown-analytics/dbt/pull/1356)) +- Fix out of order execution on model select ([#1354](https://github.com/fishtown-analytics/dbt/issues/1354), [#1355](https://github.com/fishtown-analytics/dbt/pull/1355)) +- Fix adapter macro namespace issue ([#1352](https://github.com/fishtown-analytics/dbt/issues/1352), [#1353](https://github.com/fishtown-analytics/dbt/pull/1353)) +- Re-add CLI flag to toggle warnings as errors ([#1347](https://github.com/fishtown-analytics/dbt/pull/1347)) +- Fix release candidate regression that runs run hooks on test invocations ([#1346](https://github.com/fishtown-analytics/dbt/pull/1346)) +- Fix Snowflake source quoting ([#1338](https://github.com/fishtown-analytics/dbt/pull/1338), [#1317](https://github.com/fishtown-analytics/dbt/issues/1317), [#1332](https://github.com/fishtown-analytics/dbt/issues/1332)) +- Handle unexpected max_loaded_at types ([#1330](https://github.com/fishtown-analytics/dbt/pull/1330)) + +### Under the hood +- Replace all SQL in Python code with Jinja in macros ([#1204](https://github.com/fishtown-analytics/dbt/issues/1204)) +- Loosen restrictions of boto3 dependency ([#1234](https://github.com/fishtown-analytics/dbt/issues/1234)) +- Rewrote Postgres introspective queries to be faster on large databases ([#1192](https://github.com/fishtown-analytics/dbt/issues/1192) + + +### Contributors: +Thanks for your contributions to dbt! + +- [@patrickgoss](https://github.com/patrickgoss) [#1193](https://github.com/fishtown-analytics/dbt/issues/1193) +- [@brianhartsock](https://github.com/brianhartsock) [#1191](https://github.com/fishtown-analytics/dbt/pull/1191) +- [@alexyer](https://github.com/alexyer) [#1232](https://github.com/fishtown-analytics/dbt/pull/1232) +- [@adriank-convoy](https://github.com/adriank-convoy) [#1224](https://github.com/fishtown-analytics/dbt/pull/1224) +- [@mikekaminsky](https://github.com/mikekaminsky) [#1216](https://github.com/fishtown-analytics/dbt/pull/1216) +- [@vijaykiran](https://github.com/vijaykiran) [#1198](https://github.com/fishtown-analytics/dbt/pull/1198), [#1199](https://github.com/fishtown-analytics/dbt/pull/1199) ## dbt 0.12.2 - Grace Kelly (January 8, 2019) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8cbfc4facaf..cf58e691b67 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -76,8 +76,8 @@ A short list of tools used in dbt testing that will be helpful to your understan - [tox](https://tox.readthedocs.io/en/latest/) to manage virtualenvs across python versions - [nosetests](http://nose.readthedocs.io/en/latest) to discover/run tests - [make](https://users.cs.duke.edu/~ola/courses/programming/Makefiles/Makefiles.html) - but don't worry too much, nobody _really_ understands how make works and our Makefile is super simple -- [pep8](https://pep8.readthedocs.io/en/release-1.7.x/) for code linting -- [CircleCI](https://circleci.com/product/) and [Appveyor](https://www.appveyor.com/docs/) +- [flake8](https://gitlab.com/pycqa/flake8) for code linting +- [CircleCI](https://circleci.com/product/) and [Azure Pipelines](https://azure.microsoft.com/en-us/services/devops/pipelines/) If you're unfamiliar with any or all of these, that's fine! You really do not have to have a deep understanding of any of these to get by. diff --git a/Makefile b/Makefile index 4350de0d140..e0e9a176360 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ test: test-unit: @echo "Unit test run starting..." - @time docker-compose run test tox -e unit-py27,unit-py36,pep8 + @time docker-compose run test tox -e unit-py27,unit-py36,flake8 test-integration: @echo "Integration test run starting..." diff --git a/RELEASE.md b/RELEASE.md index 99321e4eb15..22c8cbf3b97 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -11,21 +11,22 @@ dbt has three types of branches: #### Git & PyPI 1. Update CHANGELOG.md with the most recent changes -2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it. +2. If this is a release candidate, you want to create it off of your release branch. If it's an actual release, you must first merge to a master branch. Open a Pull Request in Github to merge it into the appropriate trunk (`X.X.latest`) 3. Bump the version using `bumpversion`: - Dry run first by running `bumpversion --new-version ` and checking the diff. If it looks correct, clean up the chanages and move on: - Alpha releases: `bumpversion --commit --tag --new-version 0.10.2a1 num` - Patch releases: `bumpversion --commit --tag --new-version 0.10.2 patch` - Minor releases: `bumpversion --commit --tag --new-version 0.11.0 minor` - Major releases: `bumpversion --commit --tag --new-version 1.0.0 major` -4. (If this is a not a release candidate) Merge to x.x.latest and master. +4. (If this is a not a release candidate) Merge to `x.x.latest` and (optionally) `master`. 5. Update the default branch to the next dev release branch. -6. Deploy to pypi - - `python setup.py sdist upload -r pypi` -7. Deploy to homebrew (see below) -8. Deploy to conda-forge (see below) -9. Git release notes (points to changelog) -10. Post to slack (point to changelog) +6. Build source distributions for all packages by running `./scripts/build-sdists.sh`. Note that this will clean out your `dist/` folder, so if you have important stuff in there, don't run it!!! +7. Deploy to pypi + - `twine upload dist/*` +8. Deploy to homebrew (see below) +9. Deploy to conda-forge (see below) +10. Git release notes (points to changelog) +11. Post to slack (point to changelog) After releasing a new version, it's important to merge the changes back into the other outstanding release branches. This avoids merge conflicts moving forward. diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index f44c1569dbe..00000000000 --- a/appveyor.yml +++ /dev/null @@ -1,64 +0,0 @@ -version: 1.0.{build}-{branch} - -environment: - # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script intepreter - # See: http://stackoverflow.com/a/13751649/163740 - CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" - TOX_ENV: "pywin" - - matrix: - - PYTHON: "C:\\Python35" - PYTHON_VERSION: "3.5.2" - PYTHON_ARCH: "32" - - #- PYTHON: "C:\\Python35" - # PYTHON_VERSION: "3.5.2" - # PYTHON_ARCH: "32" - - PGUSER: postgres - PGPASSWORD: Password12! - -services: - - postgresql94 - -hosts: - database: 127.0.0.1 - -init: - - PATH=C:\Program Files\PostgreSQL\9.4\bin\;%PATH% - - ps: Set-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all ::1/128 trust" - - ps: Add-Content "c:\program files\postgresql\9.4\data\pg_hba.conf" "host all all 127.0.0.1/32 trust" - -install: - # Download setup scripts and unzip - - ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip" - - "7z e master.zip */appveyor/* -oappveyor" - - # Install Python (from the official .msi of http://python.org) and pip when - # not already installed. - - "powershell ./appveyor/install.ps1" - - # Prepend newly installed Python to the PATH of this build (this cannot be - # done from inside the powershell script as it would require to restart - # the parent CMD process). - - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - - # Check that we have the expected version and architecture for Python - - "python --version" - - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" - -build: false # Not a C# project, build stuff at the test step instead. - -before_test: - - "%CMD_IN_ENV% python -m pip install --upgrade pip" # sometimes on appveyor we get pip 7.x! - - "%CMD_IN_ENV% pip install psycopg2==2.6.2" - - "%CMD_IN_ENV% pip install tox" - -test_script: - - "bash test/setup_db.sh" - - # this is generally a bad idea TODO - - git config --system http.sslverify false - - - "%CMD_IN_ENV% tox -e %TOX_ENV%" diff --git a/azure-pipelines.yml b/azure-pipelines.yml new file mode 100644 index 00000000000..b523a14c47d --- /dev/null +++ b/azure-pipelines.yml @@ -0,0 +1,125 @@ +# Python package +# Create and test a Python package on multiple Python versions. +# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: +# https://docs.microsoft.com/azure/devops/pipelines/languages/python + +trigger: + branches: + include: + - master + - dev/* + - pr/* + +jobs: +- job: UnitTest + pool: + vmImage: 'vs2017-win2016' + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.5' + architecture: 'x64' + + - script: python -m pip install --upgrade pip && pip install tox + displayName: 'Install dependencies' + + - script: python -m tox -e pywin-unit + displayName: Run unit tests + +- job: PostgresIntegrationTest + pool: + vmImage: 'vs2017-win2016' + dependsOn: UnitTest + + steps: + - pwsh: | + choco install postgresql --params '/Password:password' --params-global --version 10.6 + Set-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all ::1/128 trust" + Add-Content "c:\program files\postgresql\10\data\pg_hba.conf" "host all all 127.0.0.1/32 trust" + # the service name is "postgresql-x64-10", conveniently it's both the display name and the actual name + Restart-Service postgresql-x64-10 + + & "C:\program files\postgresql\10\bin\createdb.exe" -U postgres dbt + & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE root WITH PASSWORD 'password';" + & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE root WITH LOGIN;" + & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CREATE, CONNECT ON DATABASE dbt TO root WITH GRANT OPTION;" + & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "CREATE ROLE noaccess WITH PASSWORD 'password' NOSUPERUSER;" + & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "ALTER ROLE noaccess WITH LOGIN;" + & "C:\program files\postgresql\10\bin\psql.exe" -U postgres -c "GRANT CONNECT ON DATABASE dbt TO noaccess;" + displayName: Install postgresql and set up database + + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.5' + architecture: 'x64' + + - script: python -m pip install --upgrade pip && pip install tox + displayName: 'Install dependencies' + + - script: python -m tox -e pywin-postgres + displayName: Run integration tests + +# These three are all similar except secure environment variables, which MUST be passed along to their tasks, +# but there's probably a better way to do this! +- job: SnowflakeIntegrationTest + pool: + vmImage: 'vs2017-win2016' + dependsOn: PostgresIntegrationTest + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.5' + architecture: 'x64' + + - script: python -m pip install --upgrade pip && pip install tox + displayName: 'Install dependencies' + + - script: python -m tox -e pywin-snowflake + env: + SNOWFLAKE_TEST_ACCOUNT: $(SNOWFLAKE_TEST_ACCOUNT) + SNOWFLAKE_TEST_PASSWORD: $(SNOWFLAKE_TEST_PASSWORD) + SNOWFLAKE_TEST_USER: $(SNOWFLAKE_TEST_USER) + SNOWFLAKE_TEST_WAREHOUSE: $(SNOWFLAKE_TEST_WAREHOUSE) + displayName: Run integration tests + +- job: BigQueryIntegrationTest + pool: + vmImage: 'vs2017-win2016' + dependsOn: PostgresIntegrationTest + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.5' + architecture: 'x64' + + - script: python -m pip install --upgrade pip && pip install tox + displayName: 'Install dependencies' + - script: python -m tox -e pywin-bigquery + env: + BIGQUERY_SERVICE_ACCOUNT_JSON: $(BIGQUERY_SERVICE_ACCOUNT_JSON) + displayName: Run integration tests + +- job: RedshiftIntegrationTest + pool: + vmImage: 'vs2017-win2016' + dependsOn: PostgresIntegrationTest + + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.5' + architecture: 'x64' + + - script: python -m pip install --upgrade pip && pip install tox + displayName: 'Install dependencies' + + - script: python -m tox -e pywin-redshift + env: + REDSHIFT_TEST_DBNAME: $(REDSHIFT_TEST_DBNAME) + REDSHIFT_TEST_PASS: $(REDSHIFT_TEST_PASS) + REDSHIFT_TEST_USER: $(REDSHIFT_TEST_USER) + REDSHIFT_TEST_PORT: $(REDSHIFT_TEST_PORT) + REDSHIFT_TEST_HOST: $(REDSHIFT_TEST_HOST) + displayName: Run integration tests diff --git a/core/dbt/adapters/base/__init__.py b/core/dbt/adapters/base/__init__.py index 98f96abe1b4..5edf237447b 100644 --- a/core/dbt/adapters/base/__init__.py +++ b/core/dbt/adapters/base/__init__.py @@ -1,5 +1,8 @@ -from dbt.adapters.base.meta import available -from dbt.adapters.base.relation import BaseRelation -from dbt.adapters.base.connections import BaseConnectionManager, Credentials -from dbt.adapters.base.impl import BaseAdapter -from dbt.adapters.base.plugin import AdapterPlugin +# these are all just exports, #noqa them so flake8 will be happy +from dbt.adapters.base.meta import available # noqa +from dbt.adapters.base.relation import BaseRelation # noqa +from dbt.adapters.base.relation import Column # noqa +from dbt.adapters.base.connections import BaseConnectionManager # noqa +from dbt.adapters.base.connections import Credentials # noqa +from dbt.adapters.base.impl import BaseAdapter # noqa +from dbt.adapters.base.plugin import AdapterPlugin # noqa diff --git a/core/dbt/adapters/base/connections.py b/core/dbt/adapters/base/connections.py index c65e932454e..8a29e7d9ff7 100644 --- a/core/dbt/adapters/base/connections.py +++ b/core/dbt/adapters/base/connections.py @@ -1,12 +1,13 @@ import abc import multiprocessing +import os import six import dbt.exceptions import dbt.flags from dbt.api import APIObject -from dbt.compat import abstractclassmethod +from dbt.compat import abstractclassmethod, get_ident from dbt.contracts.connection import Connection from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import translate_aliases @@ -71,6 +72,7 @@ class BaseConnectionManager(object): - open - begin - commit + - clear_transaction - execute You must also set the 'TYPE' class attribute with a class-unique constant @@ -80,83 +82,95 @@ class BaseConnectionManager(object): def __init__(self, profile): self.profile = profile - self.in_use = {} - self.available = [] + self.thread_connections = {} self.lock = multiprocessing.RLock() - self._set_initial_connections() - - def _set_initial_connections(self): - self.available = [] - # set up the array of connections in the 'init' state. - # we add a magic number, 2 because there are overhead connections, - # one for pre- and post-run hooks and other misc operations that occur - # before the run starts, and one for integration tests. - for idx in range(self.profile.threads + 2): - self.available.append(self._empty_connection()) - - def _empty_connection(self): - return Connection( - type=self.TYPE, - name=None, - state='init', - transaction_open=False, - handle=None, - credentials=self.profile.credentials - ) + + @staticmethod + def get_thread_identifier(): + # note that get_ident() may be re-used, but we should never experience + # that within a single process + return (os.getpid(), get_ident()) + + def get_thread_connection(self): + key = self.get_thread_identifier() + with self.lock: + if key not in self.thread_connections: + raise RuntimeError( + 'connection never acquired for thread {}, have {}' + .format(key, list(self.thread_connections)) + ) + return self.thread_connections[key] + + def get_if_exists(self): + key = self.get_thread_identifier() + with self.lock: + return self.thread_connections.get(key) + + def clear_thread_connection(self): + key = self.get_thread_identifier() + with self.lock: + if key in self.thread_connections: + del self.thread_connections[key] + + def clear_transaction(self): + """Clear any existing transactions.""" + conn = self.get_thread_connection() + if conn is not None: + if conn.transaction_open: + self._rollback(conn) + self.begin() + self.commit() @abc.abstractmethod - def exception_handler(self, sql, connection_name='master'): + def exception_handler(self, sql): """Create a context manager that handles exceptions caused by database interactions. :param str sql: The SQL string that the block inside the context manager is executing. - :param str connection_name: The name of the connection being used :return: A context manager that handles exceptions raised by the underlying database. """ raise dbt.exceptions.NotImplementedException( '`exception_handler` is not implemented for this adapter!') - def get(self, name=None): - """This is thread-safe as long as two threads don't use the same - "name". - """ + def set_connection_name(self, name=None): if name is None: # if a name isn't specified, we'll re-use a single handle # named 'master' name = 'master' - with self.lock: - if name in self.in_use: - return self.in_use[name] + conn = self.get_if_exists() + thread_id_key = self.get_thread_identifier() - logger.debug('Acquiring new {} connection "{}".' - .format(self.TYPE, name)) + if conn is None: + conn = Connection( + type=self.TYPE, + name=None, + state='init', + transaction_open=False, + handle=None, + credentials=self.profile.credentials + ) + self.thread_connections[thread_id_key] = conn - if not self.available: - raise dbt.exceptions.InternalException( - 'Tried to request a new connection "{}" but ' - 'the maximum number of connections are already ' - 'allocated!'.format(name) - ) + if conn.name == name and conn.state == 'open': + return conn - connection = self.available.pop() - # connection is temporarily neither in use nor available, but both - # collections are in a sane state, so we can release the lock. + logger.debug('Acquiring new {} connection "{}".' + .format(self.TYPE, name)) - # this potentially calls open(), but does so without holding the lock - connection = self.assign(connection, name) - - with self.lock: - if name in self.in_use: - raise dbt.exceptions.InternalException( - 'Two threads concurrently tried to get the same name: {}' - .format(name) - ) - self.in_use[name] = connection + if conn.state == 'open': + logger.debug( + 'Re-using an available connection from the pool (formerly {}).' + .format(conn.name)) + else: + logger.debug('Opening a new connection, currently in state {}' + .format(conn.state)) + self.open(conn) - return connection + conn.name = name + return conn @abc.abstractmethod def cancel_open(self): @@ -183,81 +197,39 @@ def open(cls, connection): '`open` is not implemented for this adapter!' ) - def assign(self, conn, name): - """Open a connection if it's not already open, and assign it name - regardless. - - The caller is responsible for putting the assigned connection into the - in_use collection. - - :param Connection conn: A connection, in any state. - :param str name: The name of the connection to set. - """ - if name is None: - name = 'master' - - conn.name = name - - if conn.state == 'open': - logger.debug('Re-using an available connection from the pool.') - else: - logger.debug('Opening a new connection, currently in state {}' - .format(conn.state)) - conn = self.open(conn) - - return conn - - def _release_connection(self, conn): - if conn.state == 'open': - if conn.transaction_open is True: - self._rollback(conn) - conn.name = None - else: - self.close(conn) - - def release(self, name): + def release(self): with self.lock: - if name not in self.in_use: + conn = self.get_if_exists() + if conn is None: return - to_release = self.in_use.pop(name) - # to_release is temporarily neither in use nor available, but both - # collections are in a sane state, so we can release the lock. - try: - self._release_connection(to_release) - except: - # if rollback or close failed, replace our busted connection with - # a new one - to_release = self._empty_connection() + if conn.state == 'open': + if conn.transaction_open is True: + self._rollback(conn) + else: + self.close(conn) + except Exception: + # if rollback or close failed, remove our busted connection + self.clear_thread_connection() raise - finally: - # now that this connection has been rolled back and the name reset, - # or the connection has been closed, put it back on the available - # list - with self.lock: - self.available.append(to_release) def cleanup_all(self): with self.lock: - for name, connection in self.in_use.items(): - if connection.state != 'closed': + for connection in self.thread_connections.values(): + if connection.state not in {'closed', 'init'}: logger.debug("Connection '{}' was left open." - .format(name)) + .format(connection.name)) else: logger.debug("Connection '{}' was properly closed." - .format(name)) - - conns_in_use = list(self.in_use.values()) - for conn in conns_in_use + self.available: - self.close(conn) + .format(connection.name)) + self.close(connection) # garbage collect these connections - self.in_use.clear() - self._set_initial_connections() + self.thread_connections.clear() @abc.abstractmethod - def begin(self, name): + def begin(self): """Begin a transaction. (passable) :param str name: The name of the connection to use. @@ -266,34 +238,32 @@ def begin(self, name): '`begin` is not implemented for this adapter!' ) - def get_if_exists(self, name): - if name is None: - name = 'master' - - if self.in_use.get(name) is None: - return - - return self.get(name) - @abc.abstractmethod - def commit(self, connection): - """Commit a transaction. (passable) - - :param str name: The name of the connection to use. - """ + def commit(self): + """Commit a transaction. (passable)""" raise dbt.exceptions.NotImplementedException( '`commit` is not implemented for this adapter!' ) - def _rollback_handle(self, connection): + @classmethod + def _rollback_handle(cls, connection): """Perform the actual rollback operation.""" connection.handle.rollback() - def _rollback(self, connection): - """Roll back the given connection. + @classmethod + def _close_handle(cls, connection): + """Perform the actual close operation.""" + # On windows, sometimes connection handles don't have a close() attr. + if hasattr(connection.handle, 'close'): + logger.debug('On {}: Close'.format(connection.name)) + connection.handle.close() + else: + logger.debug('On {}: No close available on handle' + .format(connection.name)) - The connection does not have to be in in_use or available, so this - operation does not require the lock. + @classmethod + def _rollback(cls, connection): + """Roll back the given connection. """ if dbt.flags.STRICT_MODE: assert isinstance(connection, Connection) @@ -304,7 +274,7 @@ def _rollback(self, connection): 'it does not have one open!'.format(connection.name)) logger.debug('On {}: ROLLBACK'.format(connection.name)) - self._rollback_handle(connection) + cls._rollback_handle(connection) connection.transaction_open = False @@ -320,40 +290,28 @@ def close(cls, connection): return connection if connection.transaction_open and connection.handle: - connection.handle.rollback() + cls._rollback_handle(connection) connection.transaction_open = False - # On windows, sometimes connection handles don't have a close() attr. - if hasattr(connection.handle, 'close'): - connection.handle.close() - else: - logger.debug('On {}: No close available on handle' - .format(connection.name)) - + cls._close_handle(connection) connection.state = 'closed' return connection - def commit_if_has_connection(self, name): + def commit_if_has_connection(self): """If the named connection exists, commit the current transaction. :param str name: The name of the connection to use. """ - connection = self.in_use.get(name) + connection = self.get_if_exists() if connection: - self.commit(connection) - - def clear_transaction(self, conn_name='master'): - conn = self.begin(conn_name) - self.commit(conn) - return conn_name + self.commit() @abc.abstractmethod - def execute(self, sql, name=None, auto_begin=False, fetch=False): + def execute(self, sql, auto_begin=False, fetch=False): """Execute the given SQL. :param str sql: The sql to execute. - :param Optional[str] name: The name to use for the connection. :param bool auto_begin: If set, and dbt is not currently inside a transaction, automatically begin one. :param bool fetch: If set, fetch results. diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py index 1f579fb9ab9..1207645fa83 100644 --- a/core/dbt/adapters/base/impl.py +++ b/core/dbt/adapters/base/impl.py @@ -1,7 +1,5 @@ import abc -import copy -import multiprocessing -import time +from contextlib import contextmanager import agate import pytz @@ -9,19 +7,17 @@ import dbt.exceptions import dbt.flags -import dbt.schema import dbt.clients.agate_helper from dbt.compat import abstractclassmethod, classmethod -from dbt.contracts.connection import Connection +from dbt.node_types import NodeType from dbt.loader import GraphLoader from dbt.logger import GLOBAL_LOGGER as logger -from dbt.schema import Column -from dbt.utils import filter_null_values, translate_aliases +from dbt.utils import filter_null_values -from dbt.adapters.base.meta import AdapterMeta, available, available_raw, \ - available_deprecated +from dbt.adapters.base.meta import AdapterMeta, available, available_deprecated from dbt.adapters.base import BaseRelation +from dbt.adapters.base import Column from dbt.adapters.cache import RelationsCache @@ -69,16 +65,76 @@ def test(row): return test -def _utc(dt): +def _utc(dt, source, field_name): """If dt has a timezone, return a new datetime that's in UTC. Otherwise, assume the datetime is already for UTC and add the timezone. """ - if dt.tzinfo: + if dt is None: + raise dbt.exceptions.raise_database_error( + "Expected a non-null value when querying field '{}' of table " + " {} but received value 'null' instead".format( + field_name, + source)) + + elif not hasattr(dt, 'tzinfo'): + raise dbt.exceptions.raise_database_error( + "Expected a timestamp value when querying field '{}' of table " + "{} but received value of type '{}' instead".format( + field_name, + source, + type(dt).__name__)) + + elif dt.tzinfo: return dt.astimezone(pytz.UTC) else: return dt.replace(tzinfo=pytz.UTC) +class SchemaSearchMap(dict): + """A utility class to keep track of what information_schema tables to + search for what schemas + """ + def add(self, relation): + key = relation.information_schema_only() + if key not in self: + self[key] = set() + self[key].add(relation.schema.lower()) + + def search(self): + for information_schema_name, schemas in self.items(): + for schema in schemas: + yield information_schema_name, schema + + def schemas_searched(self): + result = set() + for information_schema_name, schemas in self.items(): + result.update( + (information_schema_name.database, schema) + for schema in schemas + ) + return result + + def flatten(self): + new = self.__class__() + + database = None + # iterate once to look for a database name + seen = {r.database.lower() for r in self if r.database} + if len(seen) > 1: + dbt.exceptions.raise_compiler_error(str(seen)) + elif len(seen) == 1: + database = list(seen)[0] + + for information_schema_name, schema in self.search(): + new.add(information_schema_name.incorporate( + path={'database': database, 'schema': schema}, + quote_policy={'database': False}, + include_policy={'database': False}, + )) + + return new + + @six.add_metaclass(AdapterMeta) class BaseAdapter(object): """The BaseAdapter provides an abstract base class for adapters. @@ -136,29 +192,40 @@ def __init__(self, config): ### # Methods that pass through to the connection manager ### - def acquire_connection(self, name): - return self.connections.get(name) + def acquire_connection(self, name=None): + return self.connections.set_connection_name(name) - def release_connection(self, name): - return self.connections.release(name) + def release_connection(self): + return self.connections.release() def cleanup_connections(self): return self.connections.cleanup_all() - def clear_transaction(self, conn_name='master'): - return self.connections.clear_transaction(conn_name) + def clear_transaction(self): + self.connections.clear_transaction() - def commit_if_has_connection(self, name): - return self.connections.commit_if_has_connection(name) + def commit_if_has_connection(self): + return self.connections.commit_if_has_connection() + + def nice_connection_name(self): + conn = self.connections.get_thread_connection() + if conn is None or conn.name is None: + return '' + return conn.name + + @contextmanager + def connection_named(self, name): + try: + yield self.acquire_connection(name) + finally: + self.release_connection() @available - def execute(self, sql, model_name=None, auto_begin=False, fetch=False): + def execute(self, sql, auto_begin=False, fetch=False): """Execute the given SQL. This is a thin wrapper around ConnectionManager.execute. :param str sql: The sql to execute. - :param Optional[str] model_name: The model name to use for the - connection. :param bool auto_begin: If set, and dbt is not currently inside a transaction, automatically begin one. :param bool fetch: If set, fetch results. @@ -167,7 +234,6 @@ def execute(self, sql, model_name=None, auto_begin=False, fetch=False): """ return self.connections.execute( sql=sql, - name=model_name, auto_begin=auto_begin, fetch=fetch ) @@ -201,14 +267,15 @@ def check_internal_manifest(self): ### # Caching methods ### - def _schema_is_cached(self, database, schema, model_name=None): + def _schema_is_cached(self, database, schema): """Check if the schema is cached, and by default logs if it is not.""" + if dbt.flags.USE_CACHE is False: return False elif (database, schema) not in self.cache: logger.debug( 'On "{}": cache miss for schema "{}.{}", this is inefficient' - .format(model_name or '', database, schema) + .format(self.nice_connection_name(), database, schema) ) return False else: @@ -222,6 +289,27 @@ def _relations_filter_table(cls, table, schemas): """ return table.where(_relations_filter_schemas(schemas)) + def _get_cache_schemas(self, manifest, exec_only=False): + """Get a mapping of each node's "information_schema" relations to a + set of all schemas expected in that information_schema. + + There may be keys that are technically duplicates on the database side, + for example all of '"foo", 'foo', '"FOO"' and 'FOO' could coexist as + databases, and values could overlap as appropriate. All values are + lowercase strings. + """ + info_schema_name_map = SchemaSearchMap() + for node in manifest.nodes.values(): + if exec_only and node.resource_type not in NodeType.executable(): + continue + relation = self.Relation.create_from(self.config, node) + info_schema_name_map.add(relation) + # result is a map whose keys are information_schema Relations without + # identifiers that have appropriate database prefixes, and whose values + # are sets of lowercase schema names that are valid members of those + # schemas + return info_schema_name_map + def _relations_cache_for_schemas(self, manifest): """Populate the relations cache for the given schemas. Returns an iteratble of the schemas populated, as strings. @@ -229,17 +317,16 @@ def _relations_cache_for_schemas(self, manifest): if not dbt.flags.USE_CACHE: return - schemas = manifest.get_used_schemas() - - relations = [] - # add all relations - for db, schema in schemas: + info_schema_name_map = self._get_cache_schemas(manifest, + exec_only=True) + for db, schema in info_schema_name_map.search(): for relation in self.list_relations_without_caching(db, schema): self.cache.add(relation) + # it's possible that there were no relations in some schemas. We want # to insert the schemas we query into the cache's `.schemas` attribute # so we can check it later - self.cache.update_schemas(schemas) + self.cache.update_schemas(info_schema_name_map.schemas_searched()) def set_relations_cache(self, manifest, clear=False): """Run a query that gets a populated cache of the relations in the @@ -253,11 +340,12 @@ def set_relations_cache(self, manifest, clear=False): self.cache.clear() self._relations_cache_for_schemas(manifest) - def cache_new_relation(self, relation, model_name=None): + def cache_new_relation(self, relation): """Cache a new relation in dbt. It will show up in `list relations`.""" if relation is None: + name = self.nice_connection_name() dbt.exceptions.raise_compiler_error( - 'Attempted to cache a null relation for {}'.format(model_name) + 'Attempted to cache a null relation for {}'.format(name) ) if dbt.flags.USE_CACHE: self.cache.add(relation) @@ -287,11 +375,10 @@ def is_cancelable(cls): # Abstract methods about schemas ### @abc.abstractmethod - def list_schemas(self, database, model_name=None): + def list_schemas(self, database): """Get a list of existing schemas. :param str database: The name of the database to list under. - :param Optional[str] model_name: The name of the connection to query as :return: All schemas that currently exist in the database :rtype: List[str] """ @@ -299,7 +386,7 @@ def list_schemas(self, database, model_name=None): '`list_schemas` is not implemented for this adapter!' ) - def check_schema_exists(self, database, schema, model_name=None): + def check_schema_exists(self, database, schema): """Check if a schema exists. The default implementation of this is potentially unnecessarily slow, @@ -308,7 +395,7 @@ def check_schema_exists(self, database, schema, model_name=None): """ search = ( s.lower() for s in - self.list_schemas(database=database, model_name=model_name) + self.list_schemas(database=database) ) return schema.lower() in search @@ -317,14 +404,12 @@ def check_schema_exists(self, database, schema, model_name=None): ### @abc.abstractmethod @available - def drop_relation(self, relation, model_name=None): + def drop_relation(self, relation): """Drop the given relation. *Implementors must call self.cache.drop() to preserve cache state!* :param self.Relation relation: The relation to drop - :param Optional[str] model_name: The name of the model to use for the - connection. """ raise dbt.exceptions.NotImplementedException( '`drop_relation` is not implemented for this adapter!' @@ -332,27 +417,24 @@ def drop_relation(self, relation, model_name=None): @abc.abstractmethod @available - def truncate_relation(self, relation, model_name=None): + def truncate_relation(self, relation): """Truncate the given relation. :param self.Relation relation: The relation to truncate - :param Optional[str] model_name: The name of the model to use for the - connection.""" + """ raise dbt.exceptions.NotImplementedException( '`truncate_relation` is not implemented for this adapter!' ) @abc.abstractmethod @available - def rename_relation(self, from_relation, to_relation, model_name=None): + def rename_relation(self, from_relation, to_relation): """Rename the relation from from_relation to to_relation. Implementors must call self.cache.rename() to preserve cache state. :param self.Relation from_relation: The original relation name :param self.Relation to_relation: The new relation name - :param Optional[str] model_name: The name of the model to use for the - connection. """ raise dbt.exceptions.NotImplementedException( '`rename_relation` is not implemented for this adapter!' @@ -360,12 +442,10 @@ def rename_relation(self, from_relation, to_relation, model_name=None): @abc.abstractmethod @available - def get_columns_in_relation(self, relation, model_name=None): + def get_columns_in_relation(self, relation): """Get a list of the columns in the given Relation. :param self.Relation relation: The relation to query for. - :param Optional[str] model_name: The name of the model to use for the - connection. :return: Information about all columns in the given relation. :rtype: List[self.Column] """ @@ -374,7 +454,7 @@ def get_columns_in_relation(self, relation, model_name=None): ) @available_deprecated('get_columns_in_relation') - def get_columns_in_table(self, schema, identifier, model_name=None): + def get_columns_in_table(self, schema, identifier): """DEPRECATED: Get a list of the columns in the given table.""" relation = self.Relation.create( database=self.config.credentials.database, @@ -382,34 +462,30 @@ def get_columns_in_table(self, schema, identifier, model_name=None): identifier=identifier, quote_policy=self.config.quoting ) - return self.get_columns_in_relation(relation, model_name=model_name) + return self.get_columns_in_relation(relation) @abc.abstractmethod - def expand_column_types(self, goal, current, model_name=None): + def expand_column_types(self, goal, current): """Expand the current table's types to match the goal table. (passable) :param self.Relation goal: A relation that currently exists in the database with columns of the desired types. :param self.Relation current: A relation that currently exists in the database with columns of unspecified types. - :param Optional[str] model_name: The name of the model to use for the - connection. """ raise dbt.exceptions.NotImplementedException( '`expand_target_column_types` is not implemented for this adapter!' ) @abc.abstractmethod - def list_relations_without_caching(self, database, schema, - model_name=None): + def list_relations_without_caching(self, information_schema, schema): """List relations in the given schema, bypassing the cache. This is used as the underlying behavior to fill the cache. - :param str database: The name of the database to list relations from. + :param Relation information_schema: The information schema to list + relations from. :param str schema: The name of the schema to list relations from. - :param Optional[str] model_name: The name of the model to use for the - connection. :return: The relations in schema :retype: List[self.Relation] """ @@ -422,7 +498,7 @@ def list_relations_without_caching(self, database, schema, # Provided methods about relations ### @available - def get_missing_columns(self, from_relation, to_relation, model_name=None): + def get_missing_columns(self, from_relation, to_relation): """Returns dict of {column:type} for columns in from_table that are missing from to_relation """ @@ -442,12 +518,12 @@ def get_missing_columns(self, from_relation, to_relation, model_name=None): from_columns = { col.name: col for col in - self.get_columns_in_relation(from_relation, model_name=model_name) + self.get_columns_in_relation(from_relation) } to_columns = { col.name: col for col in - self.get_columns_in_relation(to_relation, model_name=model_name) + self.get_columns_in_relation(to_relation) } missing_columns = set(from_columns.keys()) - set(to_columns.keys()) @@ -458,8 +534,49 @@ def get_missing_columns(self, from_relation, to_relation, model_name=None): ] @available - def expand_target_column_types(self, temp_table, to_relation, - model_name=None): + def valid_archive_target(self, relation): + """Ensure that the target relation is valid, by making sure it has the + expected columns. + + :param Relation relation: The relation to check + :raises dbt.exceptions.CompilationException: If the columns are + incorrect. + """ + if not isinstance(relation, self.Relation): + dbt.exceptions.invalid_type_error( + method_name='is_existing_old_style_archive', + arg_name='relation', + got_value=relation, + expected_type=self.Relation) + + columns = self.get_columns_in_relation(relation) + names = set(c.name.lower() for c in columns) + expanded_keys = ('scd_id', 'valid_from', 'valid_to') + extra = [] + missing = [] + for legacy in expanded_keys: + desired = 'dbt_' + legacy + if desired not in names: + missing.append(desired) + if legacy in names: + extra.append(legacy) + + if missing: + if extra: + msg = ( + 'Archive target has ("{}") but not ("{}") - is it an ' + 'unmigrated previous version archive?' + .format('", "'.join(extra), '", "'.join(missing)) + ) + else: + msg = ( + 'Archive target is not an archive table (missing "{}")' + .format('", "'.join(missing)) + ) + dbt.exceptions.raise_compiler_error(msg) + + @available + def expand_target_column_types(self, temp_table, to_relation): if not isinstance(to_relation, self.Relation): dbt.exceptions.invalid_type_error( method_name='expand_target_column_types', @@ -474,20 +591,25 @@ def expand_target_column_types(self, temp_table, to_relation, type='table', quote_policy=self.config.quoting ) - self.expand_column_types(goal, to_relation, model_name) + self.expand_column_types(goal, to_relation) - def list_relations(self, database, schema, model_name=None): - if self._schema_is_cached(database, schema, model_name): + def list_relations(self, database, schema): + if self._schema_is_cached(database, schema): return self.cache.get_relations(database, schema) + information_schema = self.Relation.create( + database=database, + schema=schema, + model_name='').information_schema() + # we can't build the relations cache because we don't have a # manifest so we can't run any operations. relations = self.list_relations_without_caching( - database, schema, model_name=model_name + information_schema, schema ) - logger.debug('with schema={}, model_name={}, relations={}' - .format(schema, model_name, relations)) + logger.debug('with database={}, schema={}, relations={}' + .format(database, schema, relations)) return relations def _make_match_kwargs(self, database, schema, identifier): @@ -520,8 +642,8 @@ def _make_match(self, relations_list, database, schema, identifier): return matches @available - def get_relation(self, database, schema, identifier, model_name=None): - relations_list = self.list_relations(database, schema, model_name) + def get_relation(self, database, schema, identifier): + relations_list = self.list_relations(database, schema) matches = self._make_match(relations_list, database, schema, identifier) @@ -542,11 +664,10 @@ def get_relation(self, database, schema, identifier, model_name=None): return None @available_deprecated('get_relation') - def already_exists(self, schema, name, model_name=None): + def already_exists(self, schema, name): """DEPRECATED: Return if a model already exists in the database""" database = self.config.credentials.database - relation = self.get_relation(database, schema, name, - model_name=model_name) + relation = self.get_relation(database, schema, name) return relation is not None ### @@ -555,30 +676,26 @@ def already_exists(self, schema, name, model_name=None): ### @abc.abstractmethod @available - def create_schema(self, database, schema, model_name=None): + def create_schema(self, database, schema): """Create the given schema if it does not exist. :param str schema: The schema name to create. - :param Optional[str] model_name: The name of the model to use for the - connection. """ raise dbt.exceptions.NotImplementedException( '`create_schema` is not implemented for this adapter!' ) @abc.abstractmethod - def drop_schema(self, database, schema, model_name=None): + def drop_schema(self, database, schema): """Drop the given schema (and everything in it) if it exists. :param str schema: The schema name to drop. - :param Optional[str] model_name: The name of the model to use for the - connection. """ raise dbt.exceptions.NotImplementedException( '`drop_schema` is not implemented for this adapter!' ) - @available_raw + @available @abstractclassmethod def quote(cls, identifier): """Quote the given identifier, as appropriate for the database. @@ -592,7 +709,7 @@ def quote(cls, identifier): ) @available - def quote_as_configured(self, identifier, quote_key, model_name=None): + def quote_as_configured(self, identifier, quote_key): """Quote or do not quote the given identifer as configured in the project config for the quote key. @@ -687,7 +804,7 @@ def convert_time_type(cls, agate_table, col_idx): raise dbt.exceptions.NotImplementedException( '`convert_time_type` is not implemented for this adapter!') - @available_raw + @available @classmethod def convert_type(cls, agate_table, col_idx): return cls.convert_agate_type(agate_table, col_idx) @@ -711,8 +828,7 @@ def convert_agate_type(cls, agate_table, col_idx): # Operations involving the manifest ### def execute_macro(self, macro_name, manifest=None, project=None, - context_override=None, kwargs=None, release=False, - connection_name=None): + context_override=None, kwargs=None, release=False): """Look macro_name up in the manifest and execute its results. :param str macro_name: The name of the macro to execute. @@ -726,8 +842,6 @@ def execute_macro(self, macro_name, manifest=None, project=None, :param Optional[dict] kwargs: An optional dict of keyword args used to pass to the macro. :param bool release: If True, release the connection after executing. - :param Optional[str] connection_name: The connection name to use, or - use the macro name. Return an an AttrDict with three attributes: 'table', 'data', and 'status'. 'table' is an agate.Table. @@ -736,27 +850,30 @@ def execute_macro(self, macro_name, manifest=None, project=None, kwargs = {} if context_override is None: context_override = {} - if connection_name is None: - connection_name = macro_name if manifest is None: manifest = self._internal_manifest macro = manifest.find_macro_by_name(macro_name, project) if macro is None: + if project is None: + package_name = 'any package' + else: + package_name = 'the "{}" package'.format(project) + + # The import of dbt.context.runtime below shadows 'dbt' + import dbt.exceptions raise dbt.exceptions.RuntimeException( - 'Could not find macro with name {} in project {}' - .format(macro_name, project) + 'dbt could not find a macro with the name "{}" in {}' + .format(macro_name, package_name) ) - # This causes a reference cycle, as dbt.context.runtime.generate() # ends up calling get_adapter, so the import has to be here. import dbt.context.runtime macro_context = dbt.context.runtime.generate_macro( macro, self.config, - manifest, - connection_name + manifest ) macro_context.update(context_override) @@ -766,7 +883,7 @@ def execute_macro(self, macro_name, manifest=None, project=None, result = macro_function(**kwargs) finally: if release: - self.release_connection(connection_name) + self.release_connection() return result @classmethod @@ -780,10 +897,11 @@ def get_catalog(self, manifest): """Get the catalog for this manifest by running the get catalog macro. Returns an agate.Table of catalog information. """ + information_schemas = list(self._get_cache_schemas(manifest).keys()) # make it a list so macros can index into it. - context = {'databases': list(manifest.get_used_databases())} + kwargs = {'information_schemas': information_schemas} table = self.execute_macro(GET_CATALOG_MACRO_NAME, - context_override=context, + kwargs=kwargs, release=True) results = self._catalog_filter_table(table, manifest) @@ -793,8 +911,7 @@ def cancel_open_connections(self): """Cancel all open connections.""" return self.connections.cancel_open() - def calculate_freshness(self, source, loaded_at_field, manifest=None, - connection_name=None): + def calculate_freshness(self, source, loaded_at_field, manifest=None): """Calculate the freshness of sources in dbt, and return it""" # in the future `source` will be a Relation instead of a string kwargs = { @@ -807,8 +924,7 @@ def calculate_freshness(self, source, loaded_at_field, manifest=None, FRESHNESS_MACRO_NAME, kwargs=kwargs, release=True, - manifest=manifest, - connection_name=connection_name + manifest=manifest ) # now we have a 1-row table of the maximum `loaded_at_field` value and # the current time according to the db. @@ -816,11 +932,12 @@ def calculate_freshness(self, source, loaded_at_field, manifest=None, dbt.exceptions.raise_compiler_error( 'Got an invalid result from "{}" macro: {}'.format( FRESHNESS_MACRO_NAME, [tuple(r) for r in table] - ), - node=node + ) ) - max_loaded_at, snapshotted_at = map(_utc, table[0]) + max_loaded_at = _utc(table[0][0], source, loaded_at_field) + snapshotted_at = _utc(table[0][1], source, loaded_at_field) + age = (snapshotted_at - max_loaded_at).total_seconds() return { 'max_loaded_at': max_loaded_at, diff --git a/core/dbt/adapters/base/meta.py b/core/dbt/adapters/base/meta.py index b7968fe06ba..14201c93563 100644 --- a/core/dbt/adapters/base/meta.py +++ b/core/dbt/adapters/base/meta.py @@ -9,17 +9,6 @@ def available(func): arguments. """ func._is_available_ = True - func._model_name_ = True - return func - - -def available_raw(func): - """A decorator to indicate that a method on the adapter will be exposed to - the database wrapper, and the model name will be injected into the - arguments. - """ - func._is_available_ = True - func._model_name_ = False return func @@ -57,24 +46,16 @@ def __new__(mcls, name, bases, namespace, **kwargs): # dict mapping the method name to whether the model name should be # injected into the arguments. All methods in here are exposed to the # context. - available_model = set() - available_raw = set() + available = set() # collect base class data first for base in bases: - available_model.update(getattr(base, '_available_model_', set())) - available_raw.update(getattr(base, '_available_raw_', set())) + available.update(getattr(base, '_available_', set())) # override with local data if it exists for name, value in namespace.items(): if getattr(value, '_is_available_', False): - if getattr(value, '_model_name_', False): - available_raw.discard(name) - available_model.add(name) - else: - available_model.discard(name) - available_raw.add(name) + available.add(name) - cls._available_model_ = frozenset(available_model) - cls._available_raw_ = frozenset(available_raw) + cls._available_ = frozenset(available) return cls diff --git a/core/dbt/adapters/base/plugin.py b/core/dbt/adapters/base/plugin.py index 523b8a43fa9..a1961a35b98 100644 --- a/core/dbt/adapters/base/plugin.py +++ b/core/dbt/adapters/base/plugin.py @@ -1,5 +1,3 @@ -import os - from dbt.config.project import Project diff --git a/core/dbt/adapters/base/relation.py b/core/dbt/adapters/base/relation.py index 0a40346b5be..2192fed466c 100644 --- a/core/dbt/adapters/base/relation.py +++ b/core/dbt/adapters/base/relation.py @@ -1,5 +1,6 @@ from dbt.api import APIObject from dbt.utils import filter_null_values +from dbt.node_types import NodeType import dbt.exceptions @@ -30,7 +31,7 @@ class BaseRelation(APIObject): 'database': True, 'schema': True, 'identifier': True - } + }, } PATH_SCHEMA = { @@ -38,7 +39,7 @@ class BaseRelation(APIObject): 'properties': { 'database': {'type': ['string', 'null']}, 'schema': {'type': ['string', 'null']}, - 'identifier': {'type': 'string'}, + 'identifier': {'type': ['string', 'null']}, }, 'required': ['database', 'schema', 'identifier'], } @@ -135,6 +136,36 @@ def include(self, database=None, schema=None, identifier=None): return self.incorporate(include_policy=policy) + def information_schema(self, identifier=None): + include_db = self.database is not None + include_policy = filter_null_values({ + 'database': include_db, + 'schema': True, + 'identifier': identifier is not None + }) + quote_policy = filter_null_values({ + 'database': self.quote_policy['database'], + 'schema': False, + 'identifier': False, + }) + + path_update = { + 'schema': 'information_schema', + 'identifier': identifier + } + + return self.incorporate( + quote_policy=quote_policy, + include_policy=include_policy, + path=path_update, + table_name=identifier) + + def information_schema_only(self): + return self.information_schema() + + def information_schema_table(self, identifier): + return self.information_schema(identifier) + def render(self, use_table_name=True): parts = [] @@ -174,15 +205,16 @@ def quoted(self, identifier): @classmethod def create_from_source(cls, source, **kwargs): + quote_policy = dbt.utils.deep_merge( + cls.DEFAULTS['quote_policy'], + source.quoting, + kwargs.get('quote_policy', {}) + ) return cls.create( database=source.database, schema=source.schema, identifier=source.identifier, - quote_policy={ - 'database': True, - 'schema': True, - 'identifier': True, - }, + quote_policy=quote_policy, **kwargs ) @@ -202,6 +234,13 @@ def create_from_node(cls, config, node, table_name=None, quote_policy=None, quote_policy=quote_policy, **kwargs) + @classmethod + def create_from(cls, config, node, **kwargs): + if node.resource_type == NodeType.Source: + return cls.create_from_source(node, **kwargs) + else: + return cls.create_from_node(config, node, **kwargs) + @classmethod def create(cls, database=None, schema=None, identifier=None, table_name=None, @@ -264,3 +303,91 @@ def is_cte(self): @property def is_view(self): return self.type == self.View + + +class Column(object): + TYPE_LABELS = { + 'STRING': 'TEXT', + 'TIMESTAMP': 'TIMESTAMP', + 'FLOAT': 'FLOAT', + 'INTEGER': 'INT' + } + + def __init__(self, column, dtype, char_size=None, numeric_precision=None, + numeric_scale=None): + self.column = column + self.dtype = dtype + self.char_size = char_size + self.numeric_precision = numeric_precision + self.numeric_scale = numeric_scale + + @classmethod + def translate_type(cls, dtype): + return cls.TYPE_LABELS.get(dtype.upper(), dtype) + + @classmethod + def create(cls, name, label_or_dtype): + column_type = cls.translate_type(label_or_dtype) + return cls(name, column_type) + + @property + def name(self): + return self.column + + @property + def quoted(self): + return '"{}"'.format(self.column) + + @property + def data_type(self): + if self.is_string(): + return Column.string_type(self.string_size()) + elif self.is_numeric(): + return Column.numeric_type(self.dtype, self.numeric_precision, + self.numeric_scale) + else: + return self.dtype + + def is_string(self): + return self.dtype.lower() in ['text', 'character varying', 'character', + 'varchar'] + + def is_numeric(self): + return self.dtype.lower() in ['numeric', 'number'] + + def string_size(self): + if not self.is_string(): + raise RuntimeError("Called string_size() on non-string field!") + + if self.dtype == 'text' or self.char_size is None: + # char_size should never be None. Handle it reasonably just in case + return 255 + else: + return int(self.char_size) + + def can_expand_to(self, other_column): + """returns True if this column can be expanded to the size of the + other column""" + if not self.is_string() or not other_column.is_string(): + return False + + return other_column.string_size() > self.string_size() + + def literal(self, value): + return "{}::{}".format(value, self.data_type) + + @classmethod + def string_type(cls, size): + return "character varying({})".format(size) + + @classmethod + def numeric_type(cls, dtype, precision, scale): + # This could be decimal(...), numeric(...), number(...) + # Just use whatever was fed in here -- don't try to get too clever + if precision is None or scale is None: + return dtype + else: + return "{}({},{})".format(dtype, precision, scale) + + def __repr__(self): + return "".format(self.name, self.data_type) diff --git a/core/dbt/adapters/factory.py b/core/dbt/adapters/factory.py index 2cbe2dc7ac6..39ba9d070c8 100644 --- a/core/dbt/adapters/factory.py +++ b/core/dbt/adapters/factory.py @@ -1,5 +1,3 @@ -from dbt.logger import GLOBAL_LOGGER as logger - import dbt.exceptions from importlib import import_module from dbt.include.global_project import PACKAGES @@ -30,7 +28,7 @@ def get_relation_class_by_name(adapter_name): def load_plugin(adapter_name): try: - mod = import_module('.'+adapter_name, 'dbt.adapters') + mod = import_module('.' + adapter_name, 'dbt.adapters') except ImportError: raise dbt.exceptions.RuntimeException( "Could not find adapter type {}!".format(adapter_name) diff --git a/core/dbt/adapters/sql/__init__.py b/core/dbt/adapters/sql/__init__.py index e73e49ff99d..3535806364d 100644 --- a/core/dbt/adapters/sql/__init__.py +++ b/core/dbt/adapters/sql/__init__.py @@ -1,2 +1,3 @@ -from dbt.adapters.sql.connections import SQLConnectionManager -from dbt.adapters.sql.impl import SQLAdapter +# these are all just exports, #noqa them so flake8 will be happy +from dbt.adapters.sql.connections import SQLConnectionManager # noqa +from dbt.adapters.sql.impl import SQLAdapter # noqa diff --git a/core/dbt/adapters/sql/connections.py b/core/dbt/adapters/sql/connections.py index a0c7bedf2ed..a6db10d1215 100644 --- a/core/dbt/adapters/sql/connections.py +++ b/core/dbt/adapters/sql/connections.py @@ -30,31 +30,30 @@ def cancel(self, connection): def cancel_open(self): names = [] + this_connection = self.get_if_exists() with self.lock: - for name, connection in self.in_use.items(): - if name == 'master': + for connection in self.thread_connections.values(): + if connection is this_connection: continue self.cancel(connection) - names.append(name) + names.append(connection.name) return names - def add_query(self, sql, name=None, auto_begin=True, bindings=None, + def add_query(self, sql, auto_begin=True, bindings=None, abridge_sql_log=False): - connection = self.get(name) - connection_name = connection.name - + connection = self.get_thread_connection() if auto_begin and connection.transaction_open is False: - self.begin(connection_name) + self.begin() logger.debug('Using {} connection "{}".' - .format(self.TYPE, connection_name)) + .format(self.TYPE, connection.name)) - with self.exception_handler(sql, connection_name): + with self.exception_handler(sql): if abridge_sql_log: - logger.debug('On %s: %s....', connection_name, sql[0:512]) + logger.debug('On %s: %s....', connection.name, sql[0:512]) else: - logger.debug('On %s: %s', connection_name, sql) + logger.debug('On %s: %s', connection.name, sql) pre = time.time() cursor = connection.handle.cursor() @@ -90,9 +89,8 @@ def get_result_from_cursor(cls, cursor): return dbt.clients.agate_helper.table_from_data(data, column_names) - def execute(self, sql, name=None, auto_begin=False, fetch=False): - self.get(name) - _, cursor = self.add_query(sql, name, auto_begin) + def execute(self, sql, auto_begin=False, fetch=False): + _, cursor = self.add_query(sql, auto_begin) status = self.get_status(cursor) if fetch: table = self.get_result_from_cursor(cursor) @@ -100,14 +98,14 @@ def execute(self, sql, name=None, auto_begin=False, fetch=False): table = dbt.clients.agate_helper.empty_table() return status, table - def add_begin_query(self, name): - return self.add_query('BEGIN', name, auto_begin=False) + def add_begin_query(self): + return self.add_query('BEGIN', auto_begin=False) - def add_commit_query(self, name): - return self.add_query('COMMIT', name, auto_begin=False) + def add_commit_query(self): + return self.add_query('COMMIT', auto_begin=False) - def begin(self, name): - connection = self.get(name) + def begin(self): + connection = self.get_thread_connection() if dbt.flags.STRICT_MODE: assert isinstance(connection, Connection) @@ -117,29 +115,24 @@ def begin(self, name): 'Tried to begin a new transaction on connection "{}", but ' 'it already had one open!'.format(connection.get('name'))) - self.add_begin_query(name) + self.add_begin_query() connection.transaction_open = True - self.in_use[name] = connection - return connection - def commit(self, connection): - + def commit(self): + connection = self.get_thread_connection() if dbt.flags.STRICT_MODE: assert isinstance(connection, Connection) - connection = self.get(connection.name) - if connection.transaction_open is False: raise dbt.exceptions.InternalException( 'Tried to commit transaction on connection "{}", but ' 'it does not have one open!'.format(connection.name)) logger.debug('On {}: COMMIT'.format(connection.name)) - self.add_commit_query(connection.name) + self.add_commit_query() connection.transaction_open = False - self.in_use[connection.name] = connection return connection diff --git a/core/dbt/adapters/sql/impl.py b/core/dbt/adapters/sql/impl.py index c7fa6d79b7f..245b812def1 100644 --- a/core/dbt/adapters/sql/impl.py +++ b/core/dbt/adapters/sql/impl.py @@ -1,15 +1,10 @@ -import abc -import time - import agate -import six import dbt.clients.agate_helper import dbt.exceptions import dbt.flags from dbt.adapters.base import BaseAdapter, available from dbt.logger import GLOBAL_LOGGER as logger -from dbt.compat import abstractclassmethod LIST_RELATIONS_MACRO_NAME = 'list_relations_without_caching' @@ -41,14 +36,12 @@ class SQLAdapter(BaseAdapter): - get_columns_in_relation """ @available - def add_query(self, sql, model_name=None, auto_begin=True, bindings=None, + def add_query(self, sql, auto_begin=True, bindings=None, abridge_sql_log=False): """Add a query to the current transaction. A thin wrapper around ConnectionManager.add_query. :param str sql: The SQL query to add - :param Optional[str] model_name: The name of the connection the - transaction is on :param bool auto_begin: If set and there is no transaction in progress, begin a new one. :param Optional[List[object]]: An optional list of bindings for the @@ -56,8 +49,8 @@ def add_query(self, sql, model_name=None, auto_begin=True, bindings=None, :param bool abridge_sql_log: If set, limit the raw sql logged to 512 characters """ - return self.connections.add_query(sql, model_name, auto_begin, - bindings, abridge_sql_log) + return self.connections.add_query(sql, auto_begin, bindings, + abridge_sql_log) @classmethod def convert_text_type(cls, agate_table, col_idx): @@ -88,15 +81,15 @@ def convert_time_type(cls, agate_table, col_idx): def is_cancelable(cls): return True - def expand_column_types(self, goal, current, model_name=None): + def expand_column_types(self, goal, current): reference_columns = { c.name: c for c in - self.get_columns_in_relation(goal, model_name=model_name) + self.get_columns_in_relation(goal) } target_columns = { c.name: c for c - in self.get_columns_in_relation(current, model_name=model_name) + in self.get_columns_in_relation(current) } for column_name, reference_column in reference_columns.items(): @@ -109,14 +102,9 @@ def expand_column_types(self, goal, current, model_name=None): logger.debug("Changing col type from %s to %s in table %s", target_column.data_type, new_type, current) - self.alter_column_type(current, column_name, new_type, - model_name=model_name) - - if model_name is None: - self.release_connection('master') + self.alter_column_type(current, column_name, new_type) - def alter_column_type(self, relation, column_name, new_column_type, - model_name=None): + def alter_column_type(self, relation, column_name, new_column_type): """ 1. Create a new column (w/ temp name and correct type) 2. Copy data over to it @@ -130,11 +118,10 @@ def alter_column_type(self, relation, column_name, new_column_type, } self.execute_macro( ALTER_COLUMN_TYPE_MACRO_NAME, - kwargs=kwargs, - connection_name=model_name + kwargs=kwargs ) - def drop_relation(self, relation, model_name=None): + def drop_relation(self, relation): if dbt.flags.USE_CACHE: self.cache.drop(relation) if relation.type is None: @@ -144,65 +131,54 @@ def drop_relation(self, relation, model_name=None): self.execute_macro( DROP_RELATION_MACRO_NAME, - kwargs={'relation': relation}, - connection_name=model_name + kwargs={'relation': relation} ) - def truncate_relation(self, relation, model_name=None): + def truncate_relation(self, relation): self.execute_macro( TRUNCATE_RELATION_MACRO_NAME, - kwargs={'relation': relation}, - connection_name=model_name + kwargs={'relation': relation} ) - def rename_relation(self, from_relation, to_relation, model_name=None): + def rename_relation(self, from_relation, to_relation): if dbt.flags.USE_CACHE: self.cache.rename(from_relation, to_relation) kwargs = {'from_relation': from_relation, 'to_relation': to_relation} self.execute_macro( RENAME_RELATION_MACRO_NAME, - kwargs=kwargs, - connection_name=model_name + kwargs=kwargs ) - def get_columns_in_relation(self, relation, model_name=None): + def get_columns_in_relation(self, relation): return self.execute_macro( GET_COLUMNS_IN_RELATION_MACRO_NAME, - kwargs={'relation': relation}, - connection_name=model_name + kwargs={'relation': relation} ) - def create_schema(self, database, schema, model_name=None): + def create_schema(self, database, schema): logger.debug('Creating schema "%s"."%s".', database, schema) - if model_name is None: - model_name = 'master' kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } - self.execute_macro(CREATE_SCHEMA_MACRO_NAME, - kwargs=kwargs, - connection_name=model_name) - self.commit_if_has_connection(model_name) + self.execute_macro(CREATE_SCHEMA_MACRO_NAME, kwargs=kwargs) + self.commit_if_has_connection() - def drop_schema(self, database, schema, model_name=None): + def drop_schema(self, database, schema): logger.debug('Dropping schema "%s"."%s".', database, schema) kwargs = { 'database_name': self.quote_as_configured(database, 'database'), 'schema_name': self.quote_as_configured(schema, 'schema'), } self.execute_macro(DROP_SCHEMA_MACRO_NAME, - kwargs=kwargs, - connection_name=model_name) + kwargs=kwargs) - def list_relations_without_caching(self, database, schema, - model_name=None): + def list_relations_without_caching(self, information_schema, schema): + kwargs = {'information_schema': information_schema, 'schema': schema} results = self.execute_macro( LIST_RELATIONS_MACRO_NAME, - kwargs={'database': database, 'schema': schema}, - connection_name=model_name, - release=True + kwargs=kwargs ) relations = [] @@ -223,22 +199,22 @@ def list_relations_without_caching(self, database, schema, def quote(cls, identifier): return '"{}"'.format(identifier) - def list_schemas(self, database, model_name=None): + def list_schemas(self, database): results = self.execute_macro( LIST_SCHEMAS_MACRO_NAME, - kwargs={'database': database}, - connection_name=model_name, - # release when the model_name is none, as that implies we were - # called by node_runners.py. - release=(model_name is None) + kwargs={'database': database} ) return [row[0] for row in results] - def check_schema_exists(self, database, schema, model_name=None): + def check_schema_exists(self, database, schema): + information_schema = self.Relation.create( + database=database, schema=schema + ).information_schema() + + kwargs = {'information_schema': information_schema, 'schema': schema} results = self.execute_macro( CHECK_SCHEMA_EXISTS_MACRO_NAME, - kwargs={'database': database, 'schema': schema}, - connection_name=model_name + kwargs=kwargs ) return results[0][0] > 0 diff --git a/core/dbt/api/object.py b/core/dbt/api/object.py index b12e37a3652..771d13e9919 100644 --- a/core/dbt/api/object.py +++ b/core/dbt/api/object.py @@ -1,6 +1,6 @@ import copy from collections import Mapping -from jsonschema import Draft4Validator +from jsonschema import Draft7Validator from dbt.exceptions import JSONValidationException from dbt.utils import deep_merge @@ -79,7 +79,7 @@ def validate(self): of this instance. If any attributes are missing or invalid, raise a ValidationException. """ - validator = Draft4Validator(self.SCHEMA) + validator = Draft7Validator(self.SCHEMA) errors = set() # make errors a set to avoid duplicates diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py new file mode 100644 index 00000000000..6207ab4956c --- /dev/null +++ b/core/dbt/clients/_jinja_blocks.py @@ -0,0 +1,481 @@ +import re + +import dbt.exceptions + + +def regex(pat): + return re.compile(pat, re.DOTALL | re.MULTILINE) + + +class BlockData(object): + """raw plaintext data from the top level of the file.""" + def __init__(self, contents): + self.block_type_name = '__dbt__data' + self.contents = contents + self.full_block = contents + + +class BlockTag(object): + def __init__(self, block_type_name, block_name, contents=None, + full_block=None, **kw): + self.block_type_name = block_type_name + self.block_name = block_name + self.contents = contents + self.full_block = full_block + + def __str__(self): + return 'BlockTag({!r}, {!r})'.format(self.block_type_name, + self.block_name) + + def __repr__(self): + return str(self) + + @property + def end_block_type_name(self): + return 'end{}'.format(self.block_type_name) + + def end_pat(self): + # we don't want to use string formatting here because jinja uses most + # of the string formatting operators in its syntax... + pattern = ''.join(( + r'(?P((?:\s*\{\%\-|\{\%)\s*', + self.end_block_type_name, + r'\s*(?:\-\%\}\s*|\%\})))', + )) + return regex(pattern) + + +_NAME_PATTERN = r'[A-Za-z_][A-Za-z_0-9]*' + +COMMENT_START_PATTERN = regex(r'(?:(?P(\s*\{\#)))') +COMMENT_END_PATTERN = regex(r'(.*?)(\s*\#\})') +RAW_START_PATTERN = regex( + r'(?:\s*\{\%\-|\{\%)\s*(?P(raw))\s*(?:\-\%\}\s*|\%\})' +) + +BLOCK_START_PATTERN = regex(''.join(( + r'(?:\s*\{\%\-|\{\%)\s*', + r'(?P({}))'.format(_NAME_PATTERN), + # some blocks have a 'block name'. + r'(?:\s+(?P({})))?'.format(_NAME_PATTERN), +))) + +TAG_CLOSE_PATTERN = regex(r'(?:(?P(\-\%\}\s*|\%\})))') +# if you do {% materialization foo, adapter="myadapter' %} and end up with +# mismatched quotes this will still match, but jinja will fail somewhere +# since the adapter= argument has to be an adapter name, and none have quotes +# or anything else in them. So this should be fine. +MATERIALIZATION_ARGS_PATTERN = regex( + r'\s*,\s*' + r'''(?P(adapter=(?:['"]{}['"])|default))''' + .format(_NAME_PATTERN) +) +# macros an stuff like macros get open parents, followed by a very complicated +# argument spec! In fact, it's easiest to parse it in tiny little chunks +# because we have to handle awful stuff like string parsing ;_; +MACRO_ARGS_START_PATTERN = regex(r'\s*(?P\()\s*') +MACRO_ARGS_END_PATTERN = regex(r'\s*(?P(\)))\s*') + +# macros can be like {% macro foo(bar) %} or {% macro foo(bar, baz) %} or +# {% macro foo(bar, baz="quux") %} or ... +# I think jinja disallows default values after required (like Python), but we +# can ignore that and let jinja deal +MACRO_ARG_PATTERN = regex(''.join(( + r'\s*(?P({}))\s*', + r'((?P=)|(?P,)?)\s*'.format(_NAME_PATTERN), +))) + +# stolen from jinja's lexer. Note that we've consumed all prefix whitespace by +# the time we want to use this. +STRING_PATTERN = regex( + r"(?P('([^'\\]*(?:\\.[^'\\]*)*)'|" + r'"([^"\\]*(?:\\.[^"\\]*)*)"))' +) + +# any number of non-quote characters, followed by: +# - quote: a quote mark indicating start of a string (you'll want to backtrack +# the regex end on quotes and then match with the string pattern) +# - a comma (so there will be another full argument) +# - a closing parenthesis (you can now expect a closing tag) +NON_STRING_MACRO_ARGS_PATTERN = regex( + # anything, followed by a quote, open/close paren, or comma + r'''(.*?)''' + r'''((?P(['"]))|(?P(\())|(?P(\)))|(?P(\,)))''' +) + + +NON_STRING_DO_BLOCK_MEMBER_PATTERN = regex( + # anything, followed by a quote, paren, or a tag end + r'''(.*?)''' + r'''((?P(['"]))|(?P(\())|(?P(\))))''' +) + + +class BlockIterator(object): + def __init__(self, data): + self.data = data + self.blocks = [] + self._block_contents = None + self._parenthesis_stack = [] + self.pos = 0 + + def advance(self, new_position): + blk = self.data[self.pos:new_position] + + if self._block_contents is not None: + self._block_contents += blk + + self.pos = new_position + + def rewind(self, amount=1): + if self._block_contents is not None: + self._block_contents = self._block_contents[:-amount] + + self.pos -= amount + + def _search(self, pattern): + return pattern.search(self.data, self.pos) + + def _match(self, pattern): + return pattern.match(self.data, self.pos) + + def expect_comment_end(self): + """Expect a comment end and return the match object. + """ + match = self._expect_match('#}', COMMENT_END_PATTERN) + self.advance(match.end()) + + def expect_raw_end(self): + end_pat = BlockTag('raw', None).end_pat() + match = self._search(end_pat) + if match is None: + dbt.exceptions.raise_compiler_error( + 'unexpected EOF, expected {% endraw %}' + ) + self.advance(match.end()) + + def _first_match(self, *patterns, **kwargs): + matches = [] + for pattern in patterns: + # default to 'search', but sometimes we want to 'match'. + if kwargs.get('method', 'search') == 'search': + match = self._search(pattern) + else: + match = self._match(pattern) + if match: + matches.append(match) + if not matches: + return None + # if there are multiple matches, pick the least greedy match + # TODO: do I need to account for m.start(), or is this ok? + return min(matches, key=lambda m: m.end()) + + def _expect_match(self, expected_name, *patterns, **kwargs): + match = self._first_match(*patterns, **kwargs) + if match is None: + msg = 'unexpected EOF, expected {}, got "{}"'.format( + expected_name, self.data[self.pos:] + ) + dbt.exceptions.raise_compiler_error(msg) + return match + + def handle_block(self, match, block_start=None): + """Handle a block. The current state of the parser should be after the + open block is completed: + {% blk foo %}my data {% endblk %} + ^ right here + """ + # we have to handle comments inside blocks because you could do this: + # {% blk foo %}asdf {# {% endblk %} #} {%endblk%} + # they still end up in the data/raw_data of the block itself, but we + # have to know to ignore stuff until the end comment marker! + found = BlockTag(**match.groupdict()) + # the full block started at the given match start, which may include + # prefixed whitespace! we'll strip it later + if block_start is None: + block_start = match.start() + + self._block_contents = '' + + # you can have as many comments in your block as you'd like! + while True: + match = self._expect_match( + '"{}"'.format(found.end_block_type_name), + found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN, + regex('''(?P(['"]))''') + ) + groups = match.groupdict() + if groups.get('endblock') is not None: + break + + self.advance(match.end()) + + if groups.get('comment_start') is not None: + self.expect_comment_end() + elif groups.get('raw_start') is not None: + self.expect_raw_end() + elif groups.get('quote') is not None: + self.rewind() + match = self._expect_match('any string', STRING_PATTERN) + self.advance(match.end()) + else: + raise dbt.exceptions.InternalException( + 'unhandled regex in handle_block, no match: {}' + .format(groups) + ) + + # we want to advance to just the end tag at first, to extract the + # contents + self.advance(match.start()) + found.contents = self._block_contents + self._block_contents = None + # now advance to the end + self.advance(match.end()) + found.full_block = self.data[block_start:self.pos] + return found + + def handle_materialization(self, match): + self._expect_match('materialization args', + MATERIALIZATION_ARGS_PATTERN) + endtag = self._expect_match('%}', TAG_CLOSE_PATTERN) + self.advance(endtag.end()) + # handle the block we started with! + self.blocks.append(self.handle_block(match)) + + def handle_do(self, match, expect_block): + if expect_block: + # we might be wrong to expect a block ({% do (...) %}, for example) + # so see if there's more data before the tag closes. if there + # isn't, we expect a block. + close_match = self._expect_match('%}', TAG_CLOSE_PATTERN) + unprocessed = self.data[match.end():close_match.start()].strip() + expect_block = not unprocessed + + if expect_block: + # if we're here, expect_block is True and we must have set + # close_match + self.advance(close_match.end()) + block = self.handle_block(match) + else: + # we have a do-statement like {% do thing() %}, so no {% enddo %} + # also, we don't want to advance to the end of the match, as it + # might be inside a string or something! So go back and figure out + self._process_rval_components() + block = BlockTag('do', None, + full_block=self.data[match.start():self.pos]) + self.blocks.append(block) + + def handle_set(self, match): + equal_or_close = self._expect_match('%} or =', + TAG_CLOSE_PATTERN, regex(r'=')) + self.advance(equal_or_close.end()) + if equal_or_close.groupdict().get('tag_close') is None: + # it's an equals sign, must be like {% set x = 1 %} + self._process_rval_components() + # watch out, order matters here on python 2 + block = BlockTag(full_block=self.data[match.start():self.pos], + **match.groupdict()) + else: + # it's a tag close, must be like {% set x %}...{% endset %} + block = self.handle_block(match) + self.blocks.append(block) + + def find_block(self): + open_block = ( + r'(?:\s*\{\%\-|\{\%)\s*' + r'(?P([A-Za-z_][A-Za-z_0-9]*))' + # some blocks have a 'block name'. + r'(?:\s+(?P([A-Za-z_][A-Za-z_0-9]*)))?' + ) + + match = self._first_match(regex(open_block), COMMENT_START_PATTERN) + if match is None: + return False + + raw_toplevel = self.data[self.pos:match.start()] + if len(raw_toplevel) > 0: + self.blocks.append(BlockData(raw_toplevel)) + + matchgroups = match.groupdict() + + # comments are easy + if matchgroups.get('comment_start') is not None: + start = match.start() + self.advance(match.end()) + self.expect_comment_end() + self.blocks.append(BlockData(self.data[start:self.pos])) + return True + + block_type_name = matchgroups.get('block_type_name') + + if block_type_name == 'raw': + start = match.start() + self.expect_raw_end() + self.blocks.append(BlockData(self.data[start:self.pos])) + return True + + if block_type_name == 'materialization': + self.advance(match.end()) + self.handle_materialization(match) + return True + + if block_type_name == 'do': + # if there is a "block_name" in the match groups, we don't expect a + # block as the "block name" is actually part of the do-statement. + # we need to do this to handle the (weird and probably wrong!) case + # of a do-statement that is only a single identifier - techincally + # allowed in jinja. (for example, {% do thing %}) + expect_block = matchgroups.get('block_name') is None + self.handle_do(match, expect_block=expect_block) + return True + + if block_type_name == 'set': + self.advance(match.end()) + self.handle_set(match) + return True + + # we're somewhere like this {% block_type_name block_type + # we've either got arguments, a close of tag (%}), or bad input. + # we've handled materializations already (they're weird!) + # thankfully, comments aren't allowed *inside* a block def... + block_end_match = self._expect_match('%} or (...)', + TAG_CLOSE_PATTERN, + MACRO_ARGS_START_PATTERN) + self.advance(block_end_match.end()) + if block_end_match.groupdict().get('macro_start') is not None: + # we've hit our first parenthesis! + self._parenthesis_stack = [True] + self._process_macro_args() + self.advance(self._expect_match('%}', TAG_CLOSE_PATTERN).end()) + + # tag close time! + self.blocks.append(self.handle_block(match)) + return True + + def _process_rval_components(self): + """This is suspiciously similar to _process_macro_default_arg, probably + want to figure out how to merge the two. + + Process the rval of an assignment statement or a do-block + """ + while True: + match = self._expect_match( + 'do block component', + # you could have a string, though that would be weird + STRING_PATTERN, + # a quote or an open/close parenthesis + NON_STRING_DO_BLOCK_MEMBER_PATTERN, + # a tag close + TAG_CLOSE_PATTERN + ) + matchgroups = match.groupdict() + self.advance(match.end()) + if matchgroups.get('string') is not None: + continue + elif matchgroups.get('quote') is not None: + self.rewind() + # now look for a string + match = self._expect_match('any string', STRING_PATTERN) + self.advance(match.end()) + elif matchgroups.get('open'): + self._parenthesis_stack.append(True) + elif matchgroups.get('close'): + self._parenthesis_stack.pop() + elif matchgroups.get('tag_close'): + if self._parenthesis_stack: + msg = ('Found "%}", expected ")"') + dbt.exceptions.raise_compiler_error(msg) + return + # else whitespace + + def _process_macro_default_arg(self): + """Handle the bit after an '=' in a macro default argument. This is + probably the trickiest thing. The goal here is to accept all strings + jinja would accept and always handle block start/end correctly: It's + fine to have false positives, jinja can fail later. + + Return True if there are more arguments expected. + """ + while self._parenthesis_stack: + match = self._expect_match( + 'macro argument', + # you could have a string + STRING_PATTERN, + # a quote, a comma, or a open/close parenthesis + NON_STRING_MACRO_ARGS_PATTERN, + # we want to "match", not "search" + method='match' + ) + matchgroups = match.groupdict() + self.advance(match.end()) + if matchgroups.get('string') is not None: + # we got a string value. There could be more data. + continue + elif matchgroups.get('quote') is not None: + # we got a bunch of data and then a string opening value. + # put the quote back on the menu + self.rewind() + # now look for a string + match = self._expect_match('any string', STRING_PATTERN) + self.advance(match.end()) + elif matchgroups.get('comma') is not None: + # small hack: if we hit a comma and there is one parenthesis + # left, return to look for a new name. otherwise we're still + # looking for the parameter close. + if len(self._parenthesis_stack) == 1: + return + elif matchgroups.get('open'): + self._parenthesis_stack.append(True) + elif matchgroups.get('close'): + self._parenthesis_stack.pop() + else: + raise dbt.exceptions.InternalException( + 'unhandled regex in _process_macro_default_arg(), no match' + ': {}'.format(matchgroups) + ) + + def _process_macro_args(self): + """Macro args are pretty tricky! Arg names themselves are simple, but + you can set arbitrary default values, including doing stuff like: + {% macro my_macro(arg="x" + ("}% {# {% endmacro %}" * 2)) %} + + Which makes you a jerk, but is valid jinja. + """ + # we are currently after the first parenthesis (+ any whitespace) after + # the macro args started. You can either have the close paren, or a + # name. + while self._parenthesis_stack: + match = self._expect_match('macro arguments', + MACRO_ARGS_END_PATTERN, + MACRO_ARG_PATTERN) + self.advance(match.end()) + matchgroups = match.groupdict() + if matchgroups.get('macro_end') is not None: + self._parenthesis_stack.pop() + # we got an argument. let's see what it has + elif matchgroups.get('value') is not None: + # we have to process a single macro argument. This mutates + # the parenthesis stack! If it finds a comma, it will continue + # the loop. + self._process_macro_default_arg() + elif matchgroups.get('more_args') is not None: + continue + else: + raise dbt.exceptions.InternalException( + 'unhandled regex in _process_macro_args(), no match: {}' + .format(matchgroups) + ) + # if there are more arguments or a macro arg end we'll catch them + # on the next loop around + + def lex_for_blocks(self): + while self.data[self.pos:]: + found = self.find_block() + if not found: + break + + raw_toplevel = self.data[self.pos:] + if len(raw_toplevel) > 0: + self.blocks.append(BlockData(raw_toplevel)) + + return self.blocks diff --git a/core/dbt/clients/jinja.py b/core/dbt/clients/jinja.py index f83076a1709..156017a38cb 100644 --- a/core/dbt/clients/jinja.py +++ b/core/dbt/clients/jinja.py @@ -11,9 +11,9 @@ import dbt.compat import dbt.exceptions +import dbt.utils -from dbt.node_types import NodeType -from dbt.utils import AttrDict +from dbt.clients._jinja_blocks import BlockIterator from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -56,7 +56,7 @@ def _compile(self, source, filename): linecache.cache[filename] = ( len(source), None, - [line+'\n' for line in source.splitlines()], + [line + '\n' for line in source.splitlines()], filename ) @@ -281,3 +281,7 @@ def get_rendered(string, ctx, node=None, def undefined_error(msg): raise jinja2.exceptions.UndefinedError(msg) + + +def extract_toplevel_blocks(data): + return BlockIterator(data).lex_for_blocks() diff --git a/core/dbt/clients/registry.py b/core/dbt/clients/registry.py index 0873cc509ad..4ba817d0fcf 100644 --- a/core/dbt/clients/registry.py +++ b/core/dbt/clients/registry.py @@ -3,6 +3,7 @@ import requests from dbt.exceptions import RegistryException from dbt.utils import memoized +from dbt.logger import GLOBAL_LOGGER as logger import os if os.getenv('DBT_PACKAGE_HUB_URL'): @@ -32,7 +33,10 @@ def wrapper(*args, **kwargs): @_wrap_exceptions def _get(path, registry_base_url=None): url = _get_url(path, registry_base_url) + logger.debug('Making package registry request: GET {}'.format(url)) resp = requests.get(url) + logger.debug('Response from registry: GET {} {}'.format(url, + resp.status_code)) resp.raise_for_status() return resp.json() diff --git a/core/dbt/clients/system.py b/core/dbt/clients/system.py index e3733189f8f..5a3b8353b95 100644 --- a/core/dbt/clients/system.py +++ b/core/dbt/clients/system.py @@ -35,6 +35,7 @@ def find_matching(root_path, 'searched_path': 'models' } ] """ matching = [] + root_path = os.path.normpath(root_path) for relative_path_to_search in relative_paths_to_search: absolute_path_to_search = os.path.join( @@ -240,12 +241,17 @@ def _handle_windows_error(exc, cwd, cmd): message = ("Could not find command, ensure it is in the user's PATH " "and that the user has permissions to run it") cls = dbt.exceptions.ExecutableError + elif exc.errno == errno.ENOEXEC: + message = ('Command was not executable, ensure it is valid') + cls = dbt.exceptions.ExecutableError elif exc.errno == errno.ENOTDIR: message = ('Unable to cd: path does not exist, user does not have' ' permissions, or not a directory') cls = dbt.exceptions.WorkingDirectoryError else: - message = 'Unknown error: {}'.format(str(exc)) + message = 'Unknown error: {} (errno={}: "{}")'.format( + str(exc), exc.errno, errno.errorcode.get(exc.errno, '') + ) raise cls(cwd, cmd, message) @@ -306,7 +312,7 @@ def run_cmd(cwd, cmd, env=None): def download(url, path): response = requests.get(url) with open(path, 'wb') as handle: - for block in response.iter_content(1024*64): + for block in response.iter_content(1024 * 64): handle.write(block) @@ -376,7 +382,7 @@ def move(src, dst): except OSError: # probably different drives if os.path.isdir(src): - if _absnorm(dst+'\\').startswith(_absnorm(src+'\\')): + if _absnorm(dst + '\\').startswith(_absnorm(src + '\\')): # dst is inside src raise EnvironmentError( "Cannot move a directory '{}' into itself '{}'" diff --git a/core/dbt/compat.py b/core/dbt/compat.py index a3fe87d273f..50f9c217914 100644 --- a/core/dbt/compat.py +++ b/core/dbt/compat.py @@ -1,6 +1,7 @@ +# flake8: noqa + import abc import codecs -import json import warnings import decimal @@ -33,11 +34,13 @@ if WHICH_PYTHON == 2: from SimpleHTTPServer import SimpleHTTPRequestHandler from SocketServer import TCPServer - from Queue import PriorityQueue + from Queue import PriorityQueue, Empty as QueueEmpty + from thread import get_ident else: from http.server import SimpleHTTPRequestHandler from socketserver import TCPServer - from queue import PriorityQueue + from queue import PriorityQueue, Empty as QueueEmpty + from threading import get_ident def to_unicode(s): diff --git a/core/dbt/compilation.py b/core/dbt/compilation.py index 9f101c39222..86d2fc23fae 100644 --- a/core/dbt/compilation.py +++ b/core/dbt/compilation.py @@ -1,14 +1,11 @@ import itertools import os -import json -from collections import OrderedDict, defaultdict -import sqlparse +from collections import defaultdict import dbt.utils import dbt.include import dbt.tracking -from dbt import deprecations from dbt.utils import get_materialization, NodeType, is_type from dbt.linker import Linker @@ -19,9 +16,8 @@ import dbt.flags import dbt.loader import dbt.config -from dbt.contracts.graph.compiled import CompiledNode, CompiledGraph +from dbt.contracts.graph.compiled import CompiledNode -from dbt.clients.system import write_json from dbt.logger import GLOBAL_LOGGER as logger graph_file_name = 'graph.gpickle' @@ -36,6 +32,7 @@ def print_compile_stats(stats): NodeType.Macro: 'macros', NodeType.Operation: 'operations', NodeType.Seed: 'seed files', + NodeType.Source: 'sources', } results = {k: 0 for k in names.keys()} @@ -44,7 +41,7 @@ def print_compile_stats(stats): stat_line = ", ".join( ["{} {}".format(ct, names.get(t)) for t, ct in results.items()]) - logger.info("Found {}".format(stat_line)) + logger.notice("Found {}".format(stat_line)) def _add_prepended_cte(prepended_ctes, new_cte): @@ -183,7 +180,7 @@ def link_graph(self, linker, manifest): if cycle: raise RuntimeError("Found a cycle: {}".format(cycle)) - def compile(self, manifest): + def compile(self, manifest, write=True): linker = Linker() self.link_graph(linker, manifest) @@ -195,25 +192,35 @@ def compile(self, manifest): manifest.macros.items()): stats[node.resource_type] += 1 - self.write_graph_file(linker, manifest) + if write: + self.write_graph_file(linker, manifest) print_compile_stats(stats) return linker -def compile_manifest(config, manifest): +def compile_manifest(config, manifest, write=True): compiler = Compiler(config) compiler.initialize() - return compiler.compile(manifest) + return compiler.compile(manifest, write=write) -def compile_node(adapter, config, node, manifest, extra_context): +def _is_writable(node): + if not node.injected_sql: + return False + + if dbt.utils.is_type(node, NodeType.Archive): + return False + + return True + + +def compile_node(adapter, config, node, manifest, extra_context, write=True): compiler = Compiler(config) node = compiler.compile_node(node, manifest, extra_context) node = _inject_runtime_config(adapter, node, extra_context) - if(node.injected_sql is not None and - not (dbt.utils.is_type(node, NodeType.Archive))): + if write and _is_writable(node): logger.debug('Writing injected SQL for node "{}"'.format( node.unique_id)) diff --git a/core/dbt/config/__init__.py b/core/dbt/config/__init__.py index b5280511ef7..d20916525ee 100644 --- a/core/dbt/config/__init__.py +++ b/core/dbt/config/__init__.py @@ -1,22 +1,5 @@ - -from .renderer import ConfigRenderer -from .profile import Profile, UserConfig -from .project import Project -from .profile import read_profile -from .profile import PROFILES_DIR -from .runtime import RuntimeConfig - - -def read_profiles(profiles_dir=None): - """This is only used in main, for some error handling""" - if profiles_dir is None: - profiles_dir = PROFILES_DIR - - raw_profiles = read_profile(profiles_dir) - - if raw_profiles is None: - profiles = {} - else: - profiles = {k: v for (k, v) in raw_profiles.items() if k != 'config'} - - return profiles +# all these are just exports, they need "noqa" so flake8 will not complain. +from .renderer import ConfigRenderer # noqa +from .profile import Profile, UserConfig, PROFILES_DIR # noqa +from .project import Project # noqa +from .runtime import RuntimeConfig # noqa diff --git a/core/dbt/config/profile.py b/core/dbt/config/profile.py index bb5c91cc246..835718ea25a 100644 --- a/core/dbt/config/profile.py +++ b/core/dbt/config/profile.py @@ -335,14 +335,12 @@ def from_raw_profiles(cls, raw_profiles, profile_name, cli_vars, ) @classmethod - def from_args(cls, args, project_profile_name=None, cli_vars=None): + def from_args(cls, args, project_profile_name=None): """Given the raw profiles as read from disk and the name of the desired profile if specified, return the profile component of the runtime config. :param args argparse.Namespace: The arguments as parsed from the cli. - :param cli_vars dict: The command-line variables passed as arguments, - as a dict. :param project_profile_name Optional[str]: The profile name, if specified in a project. :raises DbtProjectError: If there is no profile name specified in the @@ -352,9 +350,7 @@ def from_args(cls, args, project_profile_name=None, cli_vars=None): target could not be found. :returns Profile: The new Profile object. """ - if cli_vars is None: - cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) - + cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) threads_override = getattr(args, 'threads', None) target_override = getattr(args, 'target', None) raw_profiles = read_profile(args.profiles_dir) diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index 13d01599143..17729196cd3 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -1,4 +1,3 @@ - from copy import deepcopy import hashlib import os @@ -14,7 +13,6 @@ from dbt.exceptions import SemverException from dbt.exceptions import ValidationException from dbt.exceptions import warn_or_error -from dbt.logger import GLOBAL_LOGGER as logger from dbt.semver import VersionSpecifier from dbt.semver import versions_compatible from dbt.version import get_installed_version @@ -145,9 +143,10 @@ def _parse_versions(versions): class Project(object): def __init__(self, project_name, version, project_root, profile_name, source_paths, macro_paths, data_paths, test_paths, - analysis_paths, docs_paths, target_path, clean_targets, - log_path, modules_path, quoting, models, on_run_start, - on_run_end, archive, seeds, dbt_version, packages): + analysis_paths, docs_paths, target_path, archive_paths, + clean_targets, log_path, modules_path, quoting, models, + on_run_start, on_run_end, archive, seeds, dbt_version, + packages): self.project_name = project_name self.version = version self.project_root = project_root @@ -159,6 +158,7 @@ def __init__(self, project_name, version, project_root, profile_name, self.analysis_paths = analysis_paths self.docs_paths = docs_paths self.target_path = target_path + self.archive_paths = archive_paths self.clean_targets = clean_targets self.log_path = log_path self.modules_path = modules_path @@ -241,6 +241,7 @@ def from_project_config(cls, project_dict, packages_dict=None): analysis_paths = project_dict.get('analysis-paths', []) docs_paths = project_dict.get('docs-paths', source_paths[:]) target_path = project_dict.get('target-path', 'target') + archive_paths = project_dict.get('archive-paths', ['archives']) # should this also include the modules path by default? clean_targets = project_dict.get('clean-targets', [target_path]) log_path = project_dict.get('log-path', 'logs') @@ -274,6 +275,7 @@ def from_project_config(cls, project_dict, packages_dict=None): analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, + archive_paths=archive_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, @@ -321,6 +323,7 @@ def to_project_config(self, with_packages=False): 'analysis-paths': self.analysis_paths, 'docs-paths': self.docs_paths, 'target-path': self.target_path, + 'archive-paths': self.archive_paths, 'clean-targets': self.clean_targets, 'log-path': self.log_path, 'quoting': self.quoting, @@ -377,6 +380,10 @@ def from_project_root(cls, project_root, cli_vars): def from_current_directory(cls, cli_vars): return cls.from_project_root(os.getcwd(), cli_vars) + @classmethod + def from_args(cls, args): + return cls.from_current_directory(getattr(args, 'vars', '{}')) + def hashed_name(self): return hashlib.md5(self.project_name.encode('utf-8')).hexdigest() diff --git a/core/dbt/config/runtime.py b/core/dbt/config/runtime.py index ee654474a5b..23a0e4c81d5 100644 --- a/core/dbt/config/runtime.py +++ b/core/dbt/config/runtime.py @@ -18,10 +18,10 @@ class RuntimeConfig(Project, Profile): """ def __init__(self, project_name, version, project_root, source_paths, macro_paths, data_paths, test_paths, analysis_paths, - docs_paths, target_path, clean_targets, log_path, - modules_path, quoting, models, on_run_start, on_run_end, - archive, seeds, dbt_version, profile_name, target_name, - config, threads, credentials, packages, args): + docs_paths, target_path, archive_paths, clean_targets, + log_path, modules_path, quoting, models, on_run_start, + on_run_end, archive, seeds, dbt_version, profile_name, + target_name, config, threads, credentials, packages, args): # 'vars' self.args = args self.cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) @@ -39,6 +39,7 @@ def __init__(self, project_name, version, project_root, source_paths, analysis_paths=analysis_paths, docs_paths=docs_paths, target_path=target_path, + archive_paths=archive_paths, clean_targets=clean_targets, log_path=log_path, modules_path=modules_path, @@ -87,6 +88,7 @@ def from_parts(cls, project, profile, args): analysis_paths=project.analysis_paths, docs_paths=project.docs_paths, target_path=project.target_path, + archive_paths=project.archive_paths, clean_targets=project.clean_targets, log_path=project.log_path, modules_path=project.modules_path, @@ -171,16 +173,13 @@ def from_args(cls, args): :raises DbtProfileError: If the profile is invalid or missing. :raises ValidationException: If the cli variables are invalid. """ - cli_vars = parse_cli_vars(getattr(args, 'vars', '{}')) - # build the project and read in packages.yml - project = Project.from_current_directory(cli_vars) + project = Project.from_args(args) # build the profile profile = Profile.from_args( args=args, - project_profile_name=project.profile_name, - cli_vars=cli_vars + project_profile_name=project.profile_name ) return cls.from_parts( diff --git a/core/dbt/context/common.py b/core/dbt/context/common.py index f528af89ad3..6361d675ad4 100644 --- a/core/dbt/context/common.py +++ b/core/dbt/context/common.py @@ -1,5 +1,3 @@ -import copy -import functools import json import os @@ -13,12 +11,10 @@ import dbt.clients.jinja import dbt.clients.agate_helper import dbt.flags -import dbt.schema import dbt.tracking +import dbt.writer import dbt.utils -import dbt.hooks - from dbt.logger import GLOBAL_LOGGER as logger # noqa @@ -36,6 +32,11 @@ def __init__(self, adapter): def __getattr__(self, key): return getattr(self.relation_type, key) + def create_from_source(self, *args, **kwargs): + # bypass our create when creating from source so as not to mess up + # the source quoting + return self.relation_type.create_from_source(*args, **kwargs) + def create(self, *args, **kwargs): kwargs['quote_policy'] = dbt.utils.merge( self.quoting_config, @@ -46,27 +47,15 @@ def create(self, *args, **kwargs): class DatabaseWrapper(object): """ - Wrapper for runtime database interaction. Mostly a compatibility layer now. + Wrapper for runtime database interaction. Applies the runtime quote policy + via a relation proxy. """ - def __init__(self, connection_name, adapter): - self.connection_name = connection_name + def __init__(self, adapter): self.adapter = adapter self.Relation = RelationProxy(adapter) - def wrap(self, name): - func = getattr(self.adapter, name) - - @functools.wraps(func) - def wrapped(*args, **kwargs): - kwargs['model_name'] = self.connection_name - return func(*args, **kwargs) - - return wrapped - def __getattr__(self, name): - if name in self.adapter._available_model_: - return self.wrap(name) - elif name in self.adapter._available_raw_: + if name in self.adapter._available_: return getattr(self.adapter, name) else: raise AttributeError( @@ -83,7 +72,7 @@ def type(self): return self.adapter.type() def commit(self): - return self.adapter.commit_if_has_connection(self.connection_name) + return self.adapter.commit_if_has_connection() def _add_macro_map(context, package_name, macro_map): @@ -308,14 +297,14 @@ def fn(string): def fromjson(string, default=None): try: return json.loads(string) - except ValueError as e: + except ValueError: return default def tojson(value, default=None): try: return json.dumps(value) - except ValueError as e: + except ValueError: return default @@ -323,7 +312,7 @@ def try_or_compiler_error(model): def impl(message_if_exception, func, *args, **kwargs): try: return func(*args, **kwargs) - except Exception as e: + except Exception: dbt.exceptions.raise_compiler_error(message_if_exception, model) return impl @@ -359,7 +348,7 @@ def get_datetime_module_context(): def generate_base(model, model_dict, config, manifest, source_config, - provider, connection_name): + provider, adapter=None): """Generate the common aspects of the config dict.""" if provider is None: raise dbt.exceptions.InternalException( @@ -372,6 +361,7 @@ def generate_base(model, model_dict, config, manifest, source_config, target['type'] = config.credentials.type target.pop('pass', None) target['name'] = target_name + adapter = get_adapter(config) context = {'env': target} @@ -379,7 +369,7 @@ def generate_base(model, model_dict, config, manifest, source_config, pre_hooks = None post_hooks = None - db_wrapper = DatabaseWrapper(connection_name, adapter) + db_wrapper = DatabaseWrapper(adapter) context = dbt.utils.merge(context, { "adapter": db_wrapper, @@ -391,7 +381,7 @@ def generate_base(model, model_dict, config, manifest, source_config, "config": provider.Config(model_dict, source_config), "database": config.credentials.database, "env_var": env_var, - "exceptions": dbt.exceptions.CONTEXT_EXPORTS, + "exceptions": dbt.exceptions.wrapped_exports(model), "execute": provider.execute, "flags": dbt.flags, # TODO: Do we have to leave this in? @@ -438,7 +428,7 @@ def modify_generated_context(context, model, model_dict, config, manifest): return context -def generate_execute_macro(model, config, manifest, provider, connection_name): +def generate_execute_macro(model, config, manifest, provider): """Internally, macros can be executed like nodes, with some restrictions: - they don't have have all values available that nodes do: @@ -447,8 +437,8 @@ def generate_execute_macro(model, config, manifest, provider, connection_name): - they can't be configured with config() directives """ model_dict = model.serialize() - context = generate_base(model, model_dict, config, manifest, - None, provider, connection_name) + context = generate_base(model, model_dict, config, manifest, None, + provider) return modify_generated_context(context, model, model_dict, config, manifest) @@ -457,7 +447,7 @@ def generate_execute_macro(model, config, manifest, provider, connection_name): def generate_model(model, config, manifest, source_config, provider): model_dict = model.to_dict() context = generate_base(model, model_dict, config, manifest, - source_config, provider, model.get('name')) + source_config, provider) # operations (hooks) don't get a 'this' if model.resource_type != NodeType.Operation: this = get_this_relation(context['adapter'], config, model_dict) @@ -482,5 +472,4 @@ def generate(model, config, manifest, source_config=None, provider=None): or dbt.context.runtime.generate """ - return generate_model(model, config, manifest, source_config, - provider) + return generate_model(model, config, manifest, source_config, provider) diff --git a/core/dbt/context/parser.py b/core/dbt/context/parser.py index 2a9d2a87881..3d2a8da5d78 100644 --- a/core/dbt/context/parser.py +++ b/core/dbt/context/parser.py @@ -1,6 +1,7 @@ import dbt.exceptions import dbt.context.common +from dbt.adapters.factory import get_adapter execute = False @@ -48,7 +49,7 @@ def do_docs(*args): def source(db_wrapper, model, config, manifest): def do_source(source_name, table_name): model.sources.append([source_name, table_name]) - return '' + return db_wrapper.adapter.Relation.create_from_node(config, model) return do_source @@ -97,12 +98,17 @@ def get(self, name, validator=None, default=None): def generate(model, runtime_config, manifest, source_config): - return dbt.context.common.generate( - model, runtime_config, manifest, source_config, dbt.context.parser) + # during parsing, we don't have a connection, but we might need one, so we + # have to acquire it. + # In the future, it would be nice to lazily open the connection, as in some + # projects it would be possible to parse without connecting to the db + with get_adapter(runtime_config).connection_named(model.get('name')): + return dbt.context.common.generate( + model, runtime_config, manifest, source_config, dbt.context.parser + ) -def generate_macro(model, runtime_config, manifest, connection_name): +def generate_macro(model, runtime_config, manifest): return dbt.context.common.generate_execute_macro( - model, runtime_config, manifest, dbt.context.parser, - connection_name + model, runtime_config, manifest, dbt.context.parser ) diff --git a/core/dbt/context/runtime.py b/core/dbt/context/runtime.py index 40dcb77e73f..2fc7b32cddb 100644 --- a/core/dbt/context/runtime.py +++ b/core/dbt/context/runtime.py @@ -123,8 +123,7 @@ def generate(model, runtime_config, manifest): model, runtime_config, manifest, None, dbt.context.runtime) -def generate_macro(model, runtime_config, manifest, connection_name): +def generate_macro(model, runtime_config, manifest): return dbt.context.common.generate_execute_macro( - model, runtime_config, manifest, dbt.context.runtime, - connection_name + model, runtime_config, manifest, dbt.context.runtime ) diff --git a/core/dbt/contracts/connection.py b/core/dbt/contracts/connection.py index 84572c23ab6..0a79186e6c8 100644 --- a/core/dbt/contracts/connection.py +++ b/core/dbt/contracts/connection.py @@ -1,7 +1,5 @@ -import dbt.exceptions from dbt.api.object import APIObject from dbt.contracts.common import named_property -from dbt.logger import GLOBAL_LOGGER as logger # noqa CONNECTION_CONTRACT = { diff --git a/core/dbt/contracts/graph/compiled.py b/core/dbt/contracts/graph/compiled.py index 7030bdf9e5f..f4e44040872 100644 --- a/core/dbt/contracts/graph/compiled.py +++ b/core/dbt/contracts/graph/compiled.py @@ -1,7 +1,4 @@ -from copy import copy, deepcopy - from dbt.api import APIObject -from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import deep_merge from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \ PARSED_MACRO_CONTRACT, ParsedNode diff --git a/core/dbt/contracts/graph/manifest.py b/core/dbt/contracts/graph/manifest.py index 99866cd8ddb..7002b5af512 100644 --- a/core/dbt/contracts/graph/manifest.py +++ b/core/dbt/contracts/graph/manifest.py @@ -1,10 +1,9 @@ from dbt.api import APIObject -from dbt.contracts.graph.unparsed import UNPARSED_NODE_CONTRACT from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \ PARSED_MACRO_CONTRACT, PARSED_DOCUMENTATION_CONTRACT, \ PARSED_SOURCE_DEFINITION_CONTRACT from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT, CompiledNode -from dbt.exceptions import ValidationException +from dbt.exceptions import raise_duplicate_resource_name from dbt.node_types import NodeType from dbt.logger import GLOBAL_LOGGER as logger from dbt import tracking @@ -401,11 +400,22 @@ def __getattr__(self, name): type(self).__name__, name) ) - def get_used_schemas(self): + def get_used_schemas(self, resource_types=None): return frozenset({ (node.database, node.schema) for node in self.nodes.values() + if not resource_types or node.resource_type in resource_types }) def get_used_databases(self): return frozenset(node.database for node in self.nodes.values()) + + def deepcopy(self, config=None): + return Manifest( + nodes={k: v.incorporate() for k, v in self.nodes.items()}, + macros={k: v.incorporate() for k, v in self.macros.items()}, + docs={k: v.incorporate() for k, v in self.docs.items()}, + generated_at=self.generated_at, + disabled=[n.incorporate() for n in self.disabled], + config=config + ) diff --git a/core/dbt/contracts/graph/parsed.py b/core/dbt/contracts/graph/parsed.py index 966ba0f90f4..d0e77c20ab5 100644 --- a/core/dbt/contracts/graph/parsed.py +++ b/core/dbt/contracts/graph/parsed.py @@ -1,8 +1,6 @@ from dbt.api import APIObject from dbt.utils import deep_merge from dbt.node_types import NodeType -from dbt.exceptions import raise_duplicate_resource_name, \ - raise_patch_targets_not_found import dbt.clients.jinja @@ -443,6 +441,79 @@ def config(self, value): self._contents['config'] = value +ARCHIVE_CONFIG_CONTRACT = { + 'properties': { + 'target_database': { + 'type': 'string', + }, + 'target_schema': { + 'type': 'string', + }, + 'unique_key': { + 'type': 'string', + }, + 'anyOf': [ + { + 'properties': { + 'strategy': { + 'enum': ['timestamp'], + }, + 'updated_at': { + 'type': 'string', + 'description': ( + 'The column name with the timestamp to compare' + ), + }, + }, + 'required': ['updated_at'], + }, + { + 'properties': { + 'strategy': { + 'enum': ['check'], + }, + 'check_cols': { + 'oneOf': [ + { + 'type': 'array', + 'items': {'type': 'string'}, + 'description': 'The columns to check', + 'minLength': 1, + }, + { + 'enum': ['all'], + 'description': 'Check all columns', + }, + ], + }, + }, + 'required': ['check_cols'], + } + ] + }, + 'required': [ + 'target_database', 'target_schema', 'unique_key', 'strategy', + ], +} + + +PARSED_ARCHIVE_NODE_CONTRACT = deep_merge( + PARSED_NODE_CONTRACT, + { + 'properties': { + 'config': ARCHIVE_CONFIG_CONTRACT, + 'resource_type': { + 'enum': [NodeType.Archive], + }, + }, + } +) + + +class ParsedArchiveNode(ParsedNode): + SCHEMA = PARSED_ARCHIVE_NODE_CONTRACT + + # The parsed node update is only the 'patch', not the test. The test became a # regular parsed node. Note that description and columns must be present, but # may be empty. @@ -558,6 +629,7 @@ def generator(self): # available in this class. should we just generate this here? return dbt.clients.jinja.macro_generator(self._contents) + # This is just the file + its ID PARSED_DOCUMENTATION_CONTRACT = deep_merge( UNPARSED_DOCUMENTATION_FILE_CONTRACT, @@ -634,9 +706,26 @@ class Hook(APIObject): } +QUOTING_CONTRACT = { + 'properties': { + 'quoting': { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'database': {'type': 'boolean'}, + 'schema': {'type': 'boolean'}, + 'identifier': {'type': 'boolean'}, + }, + }, + }, + 'required': ['quoting'], +} + + PARSED_SOURCE_DEFINITION_CONTRACT = deep_merge( UNPARSED_BASE_CONTRACT, FRESHNESS_CONTRACT, + QUOTING_CONTRACT, HAS_DESCRIPTION_CONTRACT, HAS_UNIQUE_ID_CONTRACT, HAS_DOCREFS_CONTRACT, @@ -676,7 +765,7 @@ class Hook(APIObject): # the manifest search stuff really requires this, sadly 'resource_type': { 'enum': [NodeType.Source], - } + }, }, # note that while required, loaded_at_field and freshness may be null 'required': [ diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 30de42ef695..c6e6bbbd75b 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -75,6 +75,7 @@ NodeType.Seed, # we need this if parse_node is going to handle archives. NodeType.Archive, + NodeType.RPCCall, ] }, }, @@ -218,6 +219,29 @@ class UnparsedNodeUpdate(APIObject): } +_QUOTING_CONTRACT = { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'database': {'type': 'boolean'}, + 'schema': {'type': 'boolean'}, + 'identifier': {'type': 'boolean'}, + }, +} + + +QUOTING_CONTRACT = { + 'properties': { + 'quoting': { + 'anyOf': [ + {'type': 'null'}, + _QUOTING_CONTRACT, + ], + }, + }, +} + + FRESHNESS_CONTRACT = { 'properties': { 'loaded_at_field': { @@ -238,6 +262,7 @@ class UnparsedNodeUpdate(APIObject): UNPARSED_NODE_DESCRIPTION_CONTRACT, UNPARSED_COLUMN_DESCRIPTION_CONTRACT, FRESHNESS_CONTRACT, + QUOTING_CONTRACT, { 'description': ( 'A source table definition, as provided in the "tables" ' @@ -256,6 +281,7 @@ class UnparsedNodeUpdate(APIObject): UNPARSED_SOURCE_DEFINITION_CONTRACT = deep_merge( FRESHNESS_CONTRACT, + QUOTING_CONTRACT, { 'type': 'object', 'additionalProperties': False, @@ -335,7 +361,7 @@ def tables(self): 'type': 'string', 'description': ( 'Relative path to the originating file from the project root.' - ), + ), }, 'file_contents': { 'type': 'string', diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 58e884abcea..9e79101f52a 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -91,6 +91,10 @@ 'target-path': { 'type': 'string', }, + 'archive-paths': { + 'type': 'array', + 'items': {'type': 'string'}, + }, 'clean-targets': { 'type': 'array', 'items': {'type': 'string'}, diff --git a/core/dbt/contracts/results.py b/core/dbt/contracts/results.py index 0a991c89417..e43cc8eb56e 100644 --- a/core/dbt/contracts/results.py +++ b/core/dbt/contracts/results.py @@ -3,10 +3,7 @@ from dbt.contracts.common import named_property from dbt.contracts.graph.manifest import COMPILE_RESULT_NODE_CONTRACT from dbt.contracts.graph.unparsed import TIME_CONTRACT -from dbt.contracts.graph.parsed import PARSED_NODE_CONTRACT, \ - PARSED_SOURCE_DEFINITION_CONTRACT -from dbt.contracts.graph.compiled import COMPILED_NODE_CONTRACT -from dbt.contracts.graph.manifest import PARSED_MANIFEST_CONTRACT +from dbt.contracts.graph.parsed import PARSED_SOURCE_DEFINITION_CONTRACT TIMING_INFO_CONTRACT = { @@ -197,8 +194,8 @@ def skipped(self): 'type': 'array', 'items': { 'anyOf': [ - RUN_MODEL_RESULT_CONTRACT, - PARTIAL_RESULT_CONTRACT, + RUN_MODEL_RESULT_CONTRACT, + PARTIAL_RESULT_CONTRACT, ] }, 'description': 'An array of results, one per model', @@ -261,8 +258,8 @@ class SourceFreshnessResult(NodeSerializable): def __init__(self, node, max_loaded_at, snapshotted_at, age, status, thread_id, error=None, timing=None, execution_time=0): - max_loaded_at = max_loaded_at.isoformat() + 'Z' - snapshotted_at = snapshotted_at.isoformat() + 'Z' + max_loaded_at = max_loaded_at.isoformat() + snapshotted_at = snapshotted_at.isoformat() if timing is None: timing = [] super(SourceFreshnessResult, self).__init__( @@ -458,3 +455,78 @@ class FreshnessRunOutput(APIObject): def __init__(self, meta, sources): super(FreshnessRunOutput, self).__init__(meta=meta, sources=sources) + + +REMOTE_COMPILE_RESULT_CONTRACT = { + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'raw_sql': { + 'type': 'string', + }, + 'compiled_sql': { + 'type': 'string', + }, + 'timing': { + 'type': 'array', + 'items': TIMING_INFO_CONTRACT, + }, + }, + 'required': ['raw_sql', 'compiled_sql', 'timing'] +} + + +class RemoteCompileResult(APIObject): + SCHEMA = REMOTE_COMPILE_RESULT_CONTRACT + + def __init__(self, raw_sql, compiled_sql, node, timing=None, **kwargs): + if timing is None: + timing = [] + # this should not show up in the serialized output. + self.node = node + super(RemoteCompileResult, self).__init__( + raw_sql=raw_sql, + compiled_sql=compiled_sql, + timing=timing, + **kwargs + ) + + @property + def error(self): + return None + + +REMOTE_RUN_RESULT_CONTRACT = deep_merge(REMOTE_COMPILE_RESULT_CONTRACT, { + 'properties': { + 'table': { + 'type': 'object', + 'properties': { + 'column_names': { + 'type': 'array', + 'items': {'type': 'string'}, + }, + 'rows': { + 'type': 'array', + # any item type is ok + }, + }, + 'required': ['rows', 'column_names'], + }, + }, + 'required': ['table'], +}) + + +class RemoteRunResult(RemoteCompileResult): + SCHEMA = REMOTE_RUN_RESULT_CONTRACT + + def __init__(self, raw_sql, compiled_sql, node, timing=None, table=None): + if table is None: + table = [] + super(RemoteRunResult, self).__init__( + raw_sql=raw_sql, + compiled_sql=compiled_sql, + timing=timing, + table=table, + node=node + ) diff --git a/core/dbt/deprecations.py b/core/dbt/deprecations.py index 25e27d3ab8a..baf92043cf5 100644 --- a/core/dbt/deprecations.py +++ b/core/dbt/deprecations.py @@ -1,4 +1,3 @@ -from dbt.logger import GLOBAL_LOGGER as logger import dbt.links import dbt.flags @@ -30,16 +29,6 @@ class DBTRepositoriesDeprecation(DBTDeprecation): """ -class SqlWhereDeprecation(DBTDeprecation): - name = "sql_where" - description = """\ -The `sql_where` option for incremental models is deprecated and will be - removed in a future release. Check the docs for more information - - {} - """.format(dbt.links.IncrementalDocs) - - class SeedDropExistingDeprecation(DBTDeprecation): name = 'drop-existing' description = """The --drop-existing argument to `dbt seed` has been @@ -83,7 +72,6 @@ def warn(name, *args, **kwargs): deprecations_list = [ DBTRepositoriesDeprecation(), SeedDropExistingDeprecation(), - SqlWhereDeprecation(), ] deprecations = {d.name: d for d in deprecations_list} diff --git a/core/dbt/exceptions.py b/core/dbt/exceptions.py index 10e80c6422a..2319e748cc2 100644 --- a/core/dbt/exceptions.py +++ b/core/dbt/exceptions.py @@ -1,11 +1,22 @@ -from dbt.compat import basestring, builtins +import sys +import six +import functools + +from dbt.compat import builtins from dbt.logger import GLOBAL_LOGGER as logger import dbt.flags -import re class Exception(builtins.Exception): - pass + CODE = -32000 + MESSAGE = "Server Error" + + def data(self): + # if overriding, make sure the result is json-serializable. + return { + 'type': self.__class__.__name__, + 'message': str(self), + } class MacroReturn(builtins.BaseException): @@ -22,6 +33,9 @@ class InternalException(Exception): class RuntimeException(RuntimeError, Exception): + CODE = 10001 + MESSAGE = "Runtime error" + def __init__(self, msg, node=None): self.stack = [] self.node = node @@ -81,8 +95,59 @@ def __str__(self, prefix="! "): return lines[0] + "\n" + "\n".join( [" " + line for line in lines[1:]]) + def data(self): + result = Exception.data(self) + if self.node is None: + return result + + result.update({ + 'raw_sql': self.node.get('raw_sql'), + 'compiled_sql': self.node.get('injected_sql'), + }) + return result + + +class RPCFailureResult(RuntimeException): + CODE = 10002 + MESSAGE = "RPC execution error" + + +class RPCTimeoutException(RuntimeException): + CODE = 10008 + MESSAGE = 'RPC timeout error' + + def __init__(self, timeout): + super(RPCTimeoutException, self).__init__(self.MESSAGE) + self.timeout = timeout + + def data(self): + result = super(RPCTimeoutException, self).data() + result.update({ + 'timeout': self.timeout, + 'message': 'RPC timed out after {}s'.format(self.timeout), + }) + return result + + +class RPCKilledException(RuntimeException): + CODE = 10009 + MESSAGE = 'RPC process killed' + + def __init__(self, signum): + self.signum = signum + self.message = 'RPC process killed by signal {}'.format(self.signum) + super(RPCKilledException, self).__init__(self.message) + + def data(self): + return { + 'signum': self.signum, + 'message': self.message, + } + class DatabaseException(RuntimeException): + CODE = 10003 + MESSAGE = "Database Error" def process_stack(self): lines = [] @@ -99,6 +164,9 @@ def type(self): class CompilationException(RuntimeException): + CODE = 10004 + MESSAGE = "Compilation Error" + @property def type(self): return 'Compilation' @@ -109,7 +177,8 @@ class RecursionException(RuntimeException): class ValidationException(RuntimeException): - pass + CODE = 10005 + MESSAGE = "Validation Error" class JSONValidationException(ValidationException): @@ -117,8 +186,9 @@ def __init__(self, typename, errors): self.typename = typename self.errors = errors self.errors_message = ', '.join(errors) - msg = ('Invalid arguments passed to "{}" instance: {}'.format( - self.typename, self.errors_message)) + msg = 'Invalid arguments passed to "{}" instance: {}'.format( + self.typename, self.errors_message + ) super(JSONValidationException, self).__init__(msg) def __reduce__(self): @@ -130,15 +200,16 @@ class AliasException(ValidationException): pass -class ParsingException(Exception): - pass - - class DependencyException(Exception): - pass + # this can happen due to raise_dependency_error and its callers + CODE = 10006 + MESSAGE = "Dependency Error" class DbtConfigError(RuntimeException): + CODE = 10007 + MESSAGE = "DBT Configuration Error" + def __init__(self, message, project=None, result_type='invalid_project'): self.project = project super(DbtConfigError, self).__init__(message) @@ -522,8 +593,8 @@ def raise_ambiguous_catalog_match(unique_id, match_1, match_2): def get_match_string(match): return "{}.{}".format( - match.get('metadata', {}).get('schema'), - match.get('metadata', {}).get('name')) + match.get('metadata', {}).get('schema'), + match.get('metadata', {}).get('name')) raise_compiler_error( 'dbt found two relations in your warehouse with similar database ' @@ -612,3 +683,26 @@ def warn_or_error(msg, node=None, log_fmt=None): relation_wrong_type, ] } + + +def wrapper(model): + def wrap(func): + @functools.wraps(func) + def inner(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception: + exc_type, exc, exc_tb = sys.exc_info() + if hasattr(exc, 'node') and exc.node is None: + exc.node = model + six.reraise(exc_type, exc, exc_tb) + + return inner + return wrap + + +def wrapped_exports(model): + wrap = wrapper(model) + return { + name: wrap(export) for name, export in CONTEXT_EXPORTS.items() + } diff --git a/core/dbt/flags.py b/core/dbt/flags.py index 8bf43049a49..0d905598447 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -3,13 +3,16 @@ FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False +TEST_NEW_PARSER = False def reset(): - global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR + global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \ + TEST_NEW_PARSER STRICT_MODE = False NON_DESTRUCTIVE = False FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False + TEST_NEW_PARSER = False diff --git a/core/dbt/graph/selector.py b/core/dbt/graph/selector.py index 2b5fc24a838..6282d47d4aa 100644 --- a/core/dbt/graph/selector.py +++ b/core/dbt/graph/selector.py @@ -3,7 +3,6 @@ from dbt.utils import is_enabled, get_materialization, coalesce from dbt.node_types import NodeType -from dbt.contracts.graph.parsed import ParsedNode import dbt.exceptions SELECTOR_PARENTS = '+' diff --git a/core/dbt/hooks.py b/core/dbt/hooks.py index c673b59a8c5..2434f762ca3 100644 --- a/core/dbt/hooks.py +++ b/core/dbt/hooks.py @@ -12,7 +12,7 @@ class ModelHookType: def _parse_hook_to_dict(hook_string): try: hook_dict = json.loads(hook_string) - except ValueError as e: + except ValueError: hook_dict = {"sql": hook_string} if 'transaction' not in hook_dict: diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index 254cc5ba46b..a24e707f850 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -94,11 +94,11 @@ {% endmacro %} -{% macro get_catalog() -%} - {{ return(adapter_macro('get_catalog')) }} +{% macro get_catalog(information_schemas) -%} + {{ return(adapter_macro('get_catalog', information_schemas)) }} {%- endmacro %} -{% macro default__get_catalog() -%} +{% macro default__get_catalog(information_schemas) -%} {% set typename = adapter.type() %} {% set msg -%} @@ -210,27 +210,27 @@ {% endmacro %} -{% macro check_schema_exists(database, schema) -%} - {{ return(adapter_macro('check_schema_exists', database, schema)) }} +{% macro check_schema_exists(information_schema, schema) -%} + {{ return(adapter_macro('check_schema_exists', information_schema, schema)) }} {% endmacro %} -{% macro default__check_schema_exists(database, schema) -%} +{% macro default__check_schema_exists(information_schema, schema) -%} {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) -%} select count(*) - from {{ information_schema_name(database) }}.schemata - where catalog_name='{{ database }}' + from {{ information_schema }}.schemata + where catalog_name='{{ information_schema.database }}' and schema_name='{{ schema }}' {%- endcall %} {{ return(load_result('check_schema_exists').table) }} {% endmacro %} -{% macro list_relations_without_caching(database, schema) %} - {{ return(adapter_macro('list_relations_without_caching', database, schema)) }} +{% macro list_relations_without_caching(information_schema, schema) %} + {{ return(adapter_macro('list_relations_without_caching', information_schema, schema)) }} {% endmacro %} -{% macro default__list_relations_without_caching(database, schema) %} +{% macro default__list_relations_without_caching(information_schema, schema) %} {{ dbt.exceptions.raise_not_implemented( 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }} {% endmacro %} diff --git a/core/dbt/include/global_project/macros/etc/get_custom_alias.sql b/core/dbt/include/global_project/macros/etc/get_custom_alias.sql new file mode 100644 index 00000000000..7a382a58310 --- /dev/null +++ b/core/dbt/include/global_project/macros/etc/get_custom_alias.sql @@ -0,0 +1,26 @@ + +{# + Renders a alias name given a custom alias name. If the custom + alias name is none, then the resulting alias is just the filename of the + model. If a alias override is specified, then that is used. + + This macro can be overriden in projects to define different semantics + for rendering a alias name. + + Arguments: + custom_alias_name: The custom alias name specified for a model, or none + +#} +{% macro generate_alias_name(node, custom_alias_name=none) -%} + + {%- if custom_alias_name is none -%} + + {{ node.name }} + + {%- else -%} + + {{ custom_alias_name | trim }} + + {%- endif -%} + +{%- endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/archive/archive.sql b/core/dbt/include/global_project/macros/materializations/archive/archive.sql index 604b6492e03..ead07b657d7 100644 --- a/core/dbt/include/global_project/macros/materializations/archive/archive.sql +++ b/core/dbt/include/global_project/macros/materializations/archive/archive.sql @@ -2,12 +2,12 @@ Create SCD Hash SQL fields cross-db #} -{% macro archive_scd_hash() %} - {{ adapter_macro('archive_scd_hash') }} +{% macro archive_hash_arguments(args) %} + {{ adapter_macro('archive_hash_arguments', args) }} {% endmacro %} -{% macro default__archive_scd_hash() %} - md5("dbt_pk" || '|' || "dbt_updated_at") +{% macro default__archive_hash_arguments(args) %} + md5({% for arg in args %}coalesce(cast({{ arg }} as varchar ), '') {% if not loop.last %} || '|' || {% endif %}{% endfor %}) {% endmacro %} {% macro create_temporary_table(sql, relation) %} @@ -48,44 +48,74 @@ {% macro default__archive_update(target_relation, tmp_relation) %} update {{ target_relation }} - set {{ adapter.quote('valid_to') }} = tmp.{{ adapter.quote('valid_to') }} + set dbt_valid_to = tmp.dbt_valid_to from {{ tmp_relation }} as tmp - where tmp.{{ adapter.quote('scd_id') }} = {{ target_relation }}.{{ adapter.quote('scd_id') }} - and {{ adapter.quote('change_type') }} = 'update'; + where tmp.dbt_scd_id = {{ target_relation }}.dbt_scd_id + and change_type = 'update'; {% endmacro %} +{% macro archive_get_time() -%} + {{ adapter_macro('archive_get_time') }} +{%- endmacro %} + +{% macro default__archive_get_time() -%} + {{ current_timestamp() }} +{%- endmacro %} + +{% macro snowflake__archive_get_time() -%} + to_timestamp_ntz({{ current_timestamp() }}) +{%- endmacro %} + + +{% macro archive_select_generic(source_sql, target_relation, transforms, scd_hash) -%} + with source as ( + {{ source_sql }} + ), + {{ transforms }} + merged as ( + + select *, 'update' as change_type from updates + union all + select *, 'insert' as change_type from insertions + + ) + + select *, + {{ scd_hash }} as dbt_scd_id + from merged + +{%- endmacro %} + {# Cross-db compatible archival implementation #} -{% macro archive_select(source_relation, target_relation, source_columns, unique_key, updated_at) %} - +{% macro archive_select_timestamp(source_sql, target_relation, source_columns, unique_key, updated_at) -%} {% set timestamp_column = api.Column.create('_', 'timestamp') %} - - with current_data as ( + {% set transforms -%} + current_data as ( select {% for col in source_columns %} - {{ adapter.quote(col.name) }} {% if not loop.last %},{% endif %} + {{ col.name }} {% if not loop.last %},{% endif %} {% endfor %}, - {{ updated_at }} as {{ adapter.quote('dbt_updated_at') }}, - {{ unique_key }} as {{ adapter.quote('dbt_pk') }}, - {{ updated_at }} as {{ adapter.quote('valid_from') }}, - {{ timestamp_column.literal('null') }} as {{ adapter.quote('tmp_valid_to') }} - from {{ source_relation }} - + {{ updated_at }} as dbt_updated_at, + {{ unique_key }} as dbt_pk, + {{ updated_at }} as dbt_valid_from, + {{ timestamp_column.literal('null') }} as tmp_valid_to + from source ), archived_data as ( select {% for col in source_columns %} - {{ adapter.quote(col.name) }}, + {{ col.name }}, {% endfor %} - {{ updated_at }} as {{ adapter.quote('dbt_updated_at') }}, - {{ unique_key }} as {{ adapter.quote('dbt_pk') }}, - {{ adapter.quote('valid_from') }}, - {{ adapter.quote('valid_to') }} as {{ adapter.quote('tmp_valid_to') }} + {{ updated_at }} as dbt_updated_at, + {{ unique_key }} as dbt_pk, + dbt_valid_from, + dbt_valid_to as tmp_valid_to from {{ target_relation }} ), @@ -94,14 +124,16 @@ select current_data.*, - {{ timestamp_column.literal('null') }} as {{ adapter.quote('valid_to') }} + {{ timestamp_column.literal('null') }} as dbt_valid_to from current_data left outer join archived_data - on archived_data.{{ adapter.quote('dbt_pk') }} = current_data.{{ adapter.quote('dbt_pk') }} - where archived_data.{{ adapter.quote('dbt_pk') }} is null or ( - archived_data.{{ adapter.quote('dbt_pk') }} is not null and - current_data.{{ adapter.quote('dbt_updated_at') }} > archived_data.{{ adapter.quote('dbt_updated_at') }} and - archived_data.{{ adapter.quote('tmp_valid_to') }} is null + on archived_data.dbt_pk = current_data.dbt_pk + where + archived_data.dbt_pk is null + or ( + archived_data.dbt_pk is not null + and archived_data.dbt_updated_at < current_data.dbt_updated_at + and archived_data.tmp_valid_to is null ) ), @@ -109,56 +141,132 @@ select archived_data.*, - current_data.{{ adapter.quote('dbt_updated_at') }} as {{ adapter.quote('valid_to') }} + current_data.dbt_updated_at as dbt_valid_to from current_data left outer join archived_data - on archived_data.{{ adapter.quote('dbt_pk') }} = current_data.{{ adapter.quote('dbt_pk') }} - where archived_data.{{ adapter.quote('dbt_pk') }} is not null - and archived_data.{{ adapter.quote('dbt_updated_at') }} < current_data.{{ adapter.quote('dbt_updated_at') }} - and archived_data.{{ adapter.quote('tmp_valid_to') }} is null + on archived_data.dbt_pk = current_data.dbt_pk + where archived_data.dbt_pk is not null + and archived_data.dbt_updated_at < current_data.dbt_updated_at + and archived_data.tmp_valid_to is null ), + {%- endset %} + {%- set scd_hash = archive_hash_arguments(['dbt_pk', 'dbt_updated_at']) -%} + {{ archive_select_generic(source_sql, target_relation, transforms, scd_hash) }} +{%- endmacro %} + + +{% macro archive_select_check_cols(source_sql, target_relation, source_columns, unique_key, check_cols) -%} + {%- set timestamp_column = api.Column.create('_', 'timestamp') -%} + + {# if we recognize the primary key, it's the newest record, and anything we care about has changed, it's an update candidate #} + {%- set update_candidate -%} + archived_data.dbt_pk is not null + and ( + {%- for col in check_cols %} + current_data.{{ col }} <> archived_data.{{ col }} + {%- if not loop.last %} or {% endif %} + {% endfor -%} + ) + and archived_data.tmp_valid_to is null + {%- endset %} - merged as ( + {% set transforms -%} + current_data as ( - select *, 'update' as {{ adapter.quote('change_type') }} from updates - union all - select *, 'insert' as {{ adapter.quote('change_type') }} from insertions + select + {% for col in source_columns %} + {{ col.name }} {% if not loop.last %},{% endif %} + {% endfor %}, + {{ archive_get_time() }} as dbt_updated_at, + {{ unique_key }} as dbt_pk, + {{ archive_get_time() }} as dbt_valid_from, + {{ timestamp_column.literal('null') }} as tmp_valid_to + from source + ), + + archived_data as ( + select + {% for col in source_columns %} + {{ col.name }}, + {% endfor %} + dbt_updated_at, + {{ unique_key }} as dbt_pk, + dbt_valid_from, + dbt_valid_to as tmp_valid_to + from {{ target_relation }} + + ), + + insertions as ( + + select + current_data.*, + {{ timestamp_column.literal('null') }} as dbt_valid_to + from current_data + left outer join archived_data + on archived_data.dbt_pk = current_data.dbt_pk + where + archived_data.dbt_pk is null + or ( {{ update_candidate }} ) + ), + + updates as ( + + select + archived_data.*, + {{ archive_get_time() }} as dbt_valid_to + from current_data + left outer join archived_data + on archived_data.dbt_pk = current_data.dbt_pk + where {{ update_candidate }} + ), + {%- endset %} + + {%- set hash_components = ['dbt_pk'] %} + {%- do hash_components.extend(check_cols) -%} + {%- set scd_hash = archive_hash_arguments(hash_components) -%} + {{ archive_select_generic(source_sql, target_relation, transforms, scd_hash) }} +{%- endmacro %} + +{# this is gross #} +{% macro create_empty_table_as(sql) %} + {% set tmp_relation = api.Relation.create(identifier=model['name']+'_dbt_archival_view_tmp', type='view') %} + {% set limited_sql -%} + with cte as ( + {{ sql }} ) + select * from cte limit 0 + {%- endset %} + {%- set tmp_relation = create_temporary_table(limited_sql, tmp_relation) -%} - select *, - {{ archive_scd_hash() }} as {{ adapter.quote('scd_id') }} - from merged + {{ return(tmp_relation) }} {% endmacro %} + {% materialization archive, default %} {%- set config = model['config'] -%} {%- set target_database = config.get('target_database') -%} {%- set target_schema = config.get('target_schema') -%} - {%- set target_table = config.get('target_table') -%} - - {%- set source_database = config.get('source_database') -%} - {%- set source_schema = config.get('source_schema') -%} - {%- set source_table = config.get('source_table') -%} + {%- set target_table = model.get('alias', model.get('name')) -%} + {%- set strategy = config.get('strategy') -%} - {{ create_schema(target_database, target_schema) }} + {% set information_schema = api.Relation.create( + database=target_database, + schema=target_schema, + identifier=target_table).information_schema() %} - {%- set source_relation = adapter.get_relation( - database=source_database, - schema=source_schema, - identifier=source_table) -%} + {% if not check_schema_exists(information_schema, target_schema) %} + {{ create_schema(target_database, target_schema) }} + {% endif %} {%- set target_relation = adapter.get_relation( database=target_database, schema=target_schema, identifier=target_table) -%} - {%- if source_relation is none -%} - {{ exceptions.missing_relation('.'.join([source_database, source_schema, source_table])) }} - {%- endif -%} - {%- if target_relation is none -%} {%- set target_relation = api.Relation.create( database=target_database, @@ -168,13 +276,15 @@ {{ exceptions.relation_wrong_type(target_relation, 'table') }} {%- endif -%} - {%- set source_columns = adapter.get_columns_in_relation(source_relation) -%} + {% set source_info_model = create_empty_table_as(model['injected_sql']) %} + + {%- set source_columns = adapter.get_columns_in_relation(source_info_model) -%} + {%- set unique_key = config.get('unique_key') -%} - {%- set updated_at = config.get('updated_at') -%} {%- set dest_columns = source_columns + [ - api.Column.create('valid_from', 'timestamp'), - api.Column.create('valid_to', 'timestamp'), - api.Column.create('scd_id', 'string'), + api.Column.create('dbt_valid_from', 'timestamp'), + api.Column.create('dbt_valid_to', 'timestamp'), + api.Column.create('dbt_scd_id', 'string'), api.Column.create('dbt_updated_at', 'timestamp'), ] -%} @@ -182,18 +292,31 @@ {{ create_archive_table(target_relation, dest_columns) }} {% endcall %} - {% set missing_columns = adapter.get_missing_columns(source_relation, target_relation) %} + {% set missing_columns = adapter.get_missing_columns(source_info_model, target_relation) %} {{ create_columns(target_relation, missing_columns) }} + {{ adapter.valid_archive_target(target_relation) }} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = identifier + '__dbt_archival_tmp' -%} + {%- set tmp_identifier = model['name'] + '__dbt_archival_tmp' -%} {% set tmp_table_sql -%} with dbt_archive_sbq as ( - {{ archive_select(source_relation, target_relation, source_columns, unique_key, updated_at) }} + + {% if strategy == 'timestamp' %} + {%- set updated_at = config.get('updated_at') -%} + {{ archive_select_timestamp(model['injected_sql'], target_relation, source_columns, unique_key, updated_at) }} + {% elif strategy == 'check' %} + {%- set check_cols = config.get('check_cols') -%} + {% if check_cols == 'all' %} + {% set check_cols = source_columns | map(attribute='name') | list %} + {% endif %} + {{ archive_select_check_cols(model['injected_sql'], target_relation, source_columns, unique_key, check_cols)}} + {% else %} + {{ exceptions.raise_compiler_error('Got invalid strategy "{}"'.format(strategy)) }} + {% endif %} ) select * from dbt_archive_sbq @@ -215,7 +338,7 @@ {{ column_list(dest_columns) }} ) select {{ column_list(dest_columns) }} from {{ tmp_relation }} - where {{ adapter.quote('change_type') }} = 'insert'; + where change_type = 'insert'; {% endcall %} {{ adapter.commit() }} diff --git a/core/dbt/include/global_project/macros/materializations/helpers.sql b/core/dbt/include/global_project/macros/materializations/helpers.sql index c2eadfdab39..da78eb93506 100644 --- a/core/dbt/include/global_project/macros/materializations/helpers.sql +++ b/core/dbt/include/global_project/macros/materializations/helpers.sql @@ -14,14 +14,14 @@ {% macro column_list(columns) %} {%- for col in columns %} - {{ adapter.quote(col.name) }} {% if not loop.last %},{% endif %} + {{ col.name }} {% if not loop.last %},{% endif %} {% endfor -%} {% endmacro %} {% macro column_list_for_create_table(columns) %} {%- for col in columns %} - {{ adapter.quote(col.name) }} {{ col.data_type }} {%- if not loop.last %},{% endif %} + {{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %} {% endfor -%} {% endmacro %} diff --git a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql index 438b3a066c3..f53df57cd4e 100644 --- a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql +++ b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql @@ -12,11 +12,10 @@ {%- endmacro %} {% materialization incremental, default -%} - {%- set sql_where = config.get('sql_where') -%} {%- set unique_key = config.get('unique_key') -%} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = identifier + '__dbt_incremental_tmp' -%} + {%- set tmp_identifier = model['name'] + '__dbt_incremental_tmp' -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database, type='table') -%} @@ -57,19 +56,7 @@ {%- else -%} {%- call statement() -%} - {% set tmp_table_sql -%} - {# We are using a subselect instead of a CTE here to allow PostgreSQL to use indexes. -#} - select * from ( - {{ sql }} - ) as dbt_incr_sbq - - {% if sql_where %} - where ({{ sql_where }}) - or ({{ sql_where }}) is null - {% endif %} - {%- endset %} - - {{ dbt.create_table_as(True, tmp_relation, tmp_table_sql) }} + {{ dbt.create_table_as(True, tmp_relation, sql) }} {%- endcall -%} diff --git a/core/dbt/include/global_project/macros/materializations/table/table.sql b/core/dbt/include/global_project/macros/materializations/table/table.sql index 2e76017fbb3..d12062c50a9 100644 --- a/core/dbt/include/global_project/macros/materializations/table/table.sql +++ b/core/dbt/include/global_project/macros/materializations/table/table.sql @@ -1,7 +1,7 @@ {% materialization table, default %} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = identifier + '__dbt_tmp' -%} - {%- set backup_identifier = identifier + '__dbt_backup' -%} + {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} + {%- set backup_identifier = model['name'] + '__dbt_backup' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} diff --git a/core/dbt/include/global_project/macros/materializations/view/view.sql b/core/dbt/include/global_project/macros/materializations/view/view.sql index f5c68963444..2fa2a672678 100644 --- a/core/dbt/include/global_project/macros/materializations/view/view.sql +++ b/core/dbt/include/global_project/macros/materializations/view/view.sql @@ -1,8 +1,8 @@ {%- materialization view, default -%} {%- set identifier = model['alias'] -%} - {%- set tmp_identifier = identifier + '__dbt_tmp' -%} - {%- set backup_identifier = identifier + '__dbt_backup' -%} + {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%} + {%- set backup_identifier = model['name'] + '__dbt_backup' -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} diff --git a/core/dbt/linker.py b/core/dbt/linker.py index ad1a3bc9e6f..1a17c791af1 100644 --- a/core/dbt/linker.py +++ b/core/dbt/linker.py @@ -1,8 +1,6 @@ import networkx as nx -from collections import defaultdict import threading -import dbt.utils from dbt.compat import PriorityQueue from dbt.node_types import NodeType @@ -174,6 +172,28 @@ def join(self): self.inner.join() +def _subset_graph(graph, include_nodes): + """Create and return a new graph that is a shallow copy of graph but with + only the nodes in include_nodes. Transitive edges across removed nodes are + preserved as explicit new edges. + """ + new_graph = nx.algorithms.transitive_closure(graph) + + include_nodes = set(include_nodes) + + for node in graph.nodes(): + if node not in include_nodes: + new_graph.remove_node(node) + + for node in include_nodes: + if node not in new_graph: + raise RuntimeError( + "Couldn't find model '{}' -- does it exist or is " + "it disabled?".format(node) + ) + return new_graph + + class Linker(object): def __init__(self, data=None): if data is None: @@ -209,23 +229,7 @@ def as_graph_queue(self, manifest, limit_to=None): else: graph_nodes = limit_to - new_graph = nx.DiGraph(self.graph) - - to_remove = [] - graph_nodes_lookup = set(graph_nodes) - for node in new_graph.nodes(): - if node not in graph_nodes_lookup: - to_remove.append(node) - - for node in to_remove: - new_graph.remove_node(node) - - for node in graph_nodes: - if node not in new_graph: - raise RuntimeError( - "Couldn't find model '{}' -- does it exist or is " - "it disabled?".format(node) - ) + new_graph = _subset_graph(self.graph, graph_nodes) return GraphQueue(new_graph, manifest) def get_dependent_nodes(self, node): diff --git a/core/dbt/loader.py b/core/dbt/loader.py index daa74d510c6..7e98d0b29ac 100644 --- a/core/dbt/loader.py +++ b/core/dbt/loader.py @@ -1,7 +1,6 @@ import os import itertools -from dbt import deprecations from dbt.include.global_project import PACKAGES import dbt.exceptions import dbt.flags @@ -12,7 +11,7 @@ from dbt.parser import MacroParser, ModelParser, SeedParser, AnalysisParser, \ DocumentationParser, DataTestParser, HookParser, ArchiveParser, \ - SchemaParser, ParserUtils + SchemaParser, ParserUtils, ArchiveBlockParser from dbt.contracts.project import ProjectList @@ -35,15 +34,15 @@ def _load_sql_nodes(self, parser_type, resource_type, relative_dirs_attr, self.macro_manifest) for project_name, project in self.all_projects.items(): - nodes, disabled = parser.load_and_parse( + parse_results = parser.load_and_parse( package_name=project_name, root_dir=project.project_root, relative_dirs=getattr(project, relative_dirs_attr), resource_type=resource_type, **kwargs ) - self.nodes.update(nodes) - self.disabled.extend(disabled) + self.nodes.update(parse_results.parsed) + self.disabled.extend(parse_results.disabled) def _load_macros(self, internal_manifest=None): # skip any projects in the internal manifest @@ -76,6 +75,8 @@ def _load_seeds(self): def _load_nodes(self): self._load_sql_nodes(ModelParser, NodeType.Model, 'source_paths') + self._load_sql_nodes(ArchiveBlockParser, NodeType.Archive, + 'archive_paths') self._load_sql_nodes(AnalysisParser, NodeType.Analysis, 'analysis_paths') self._load_sql_nodes(DataTestParser, NodeType.Test, 'test_paths', @@ -194,12 +195,14 @@ def _check_resource_uniqueness(manifest): existing_node = names_resources.get(name) if existing_node is not None: dbt.exceptions.raise_duplicate_resource_name( - existing_node, node) + existing_node, node + ) existing_alias = alias_resources.get(alias) if existing_alias is not None: dbt.exceptions.raise_ambiguous_alias( - existing_alias, node) + existing_alias, node + ) names_resources[name] = node alias_resources[alias] = node @@ -211,17 +214,9 @@ def _warn_for_unused_resource_config_paths(manifest, config): config.warn_for_unused_resource_config_paths(resource_fqns, disabled_fqns) -def _warn_for_deprecated_configs(manifest): - for unique_id, node in manifest.nodes.items(): - is_model = node.resource_type == NodeType.Model - if is_model and 'sql_where' in node.config: - deprecations.warn('sql_where') - - def _check_manifest(manifest, config): _check_resource_uniqueness(manifest) _warn_for_unused_resource_config_paths(manifest, config) - _warn_for_deprecated_configs(manifest) def internal_project_names(): diff --git a/core/dbt/logger.py b/core/dbt/logger.py index 6b2ab24b391..f658769172f 100644 --- a/core/dbt/logger.py +++ b/core/dbt/logger.py @@ -4,11 +4,9 @@ import logging.handlers import os import sys -import warnings import colorama - # Colorama needs some help on windows because we're using logger.info # intead of print(). If the Windows env doesn't have a TERM var set, # then we should override the logging stream to use the colorama @@ -17,6 +15,27 @@ colorama_stdout = sys.stdout colorama_wrap = True +colorama.init(wrap=colorama_wrap) + +DEBUG = logging.DEBUG +NOTICE = 15 +INFO = logging.INFO +WARNING = logging.WARNING +ERROR = logging.ERROR +CRITICAL = logging.CRITICAL + +logging.addLevelName(NOTICE, 'NOTICE') + + +class Logger(logging.Logger): + def notice(self, msg, *args, **kwargs): + if self.isEnabledFor(NOTICE): + self._log(NOTICE, msg, args, **kwargs) + + +logging.setLoggerClass(Logger) + + if sys.platform == 'win32' and not os.environ.get('TERM'): colorama_wrap = False colorama_stdout = colorama.AnsiToWin32(sys.stdout).stream @@ -29,23 +48,28 @@ # create a global console logger for dbt stdout_handler = logging.StreamHandler(colorama_stdout) stdout_handler.setFormatter(logging.Formatter('%(message)s')) -stdout_handler.setLevel(logging.INFO) +stdout_handler.setLevel(NOTICE) logger = logging.getLogger('dbt') logger.addHandler(stdout_handler) -logger.setLevel(logging.DEBUG) -logging.getLogger().setLevel(logging.CRITICAL) +logger.setLevel(DEBUG) +logging.getLogger().setLevel(CRITICAL) # Quiet these down in the logs -logging.getLogger('botocore').setLevel(logging.INFO) -logging.getLogger('requests').setLevel(logging.INFO) -logging.getLogger('urllib3').setLevel(logging.INFO) -logging.getLogger('google').setLevel(logging.INFO) -logging.getLogger('snowflake.connector').setLevel(logging.INFO) -logging.getLogger('parsedatetime').setLevel(logging.INFO) +logging.getLogger('botocore').setLevel(INFO) +logging.getLogger('requests').setLevel(INFO) +logging.getLogger('urllib3').setLevel(INFO) +logging.getLogger('google').setLevel(INFO) +logging.getLogger('snowflake.connector').setLevel(INFO) +logging.getLogger('parsedatetime').setLevel(INFO) +# we never want to seek werkzeug logs +logging.getLogger('werkzeug').setLevel(CRITICAL) # provide this for the cache. CACHE_LOGGER = logging.getLogger('dbt.cache') +# provide this for RPC connection logging +RPC_LOGGER = logging.getLogger('dbt.rpc') + # Redirect warnings through our logging setup # They will be logged to a file below @@ -70,6 +94,10 @@ def filter(self, record): return True +def default_formatter(): + return logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s') + + def initialize_logger(debug_mode=False, path=None): global initialized, logger, stdout_handler @@ -77,9 +105,8 @@ def initialize_logger(debug_mode=False, path=None): return if debug_mode: - stdout_handler.setFormatter( - logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s')) - stdout_handler.setLevel(logging.DEBUG) + stdout_handler.setFormatter(default_formatter()) + stdout_handler.setLevel(DEBUG) if path is not None: make_log_dir_if_missing(path) @@ -96,16 +123,15 @@ def initialize_logger(debug_mode=False, path=None): color_filter = ColorFilter() logdir_handler.addFilter(color_filter) - logdir_handler.setFormatter( - logging.Formatter('%(asctime)-18s (%(threadName)s): %(message)s')) - logdir_handler.setLevel(logging.DEBUG) + logdir_handler.setFormatter(default_formatter()) + logdir_handler.setLevel(DEBUG) logger.addHandler(logdir_handler) # Log Python warnings to file warning_logger = logging.getLogger('py.warnings') warning_logger.addHandler(logdir_handler) - warning_logger.setLevel(logging.DEBUG) + warning_logger.setLevel(DEBUG) initialized = True @@ -121,3 +147,56 @@ def log_cache_events(flag): GLOBAL_LOGGER = logger + + +class QueueFormatter(logging.Formatter): + def formatMessage(self, record): + superself = super(QueueFormatter, self) + if hasattr(superself, 'formatMessage'): + # python 3.x + return superself.formatMessage(record) + + # python 2.x, handling weird unicode things + try: + return self._fmt % record.__dict__ + except UnicodeDecodeError: + try: + record.name = record.name.decode('utf-8') + return self._fmt % record.__dict__ + except UnicodeDecodeError as e: + raise e + + def format(self, record): + record.message = record.getMessage() + record.asctime = self.formatTime(record, self.datefmt) + formatted = self.formatMessage(record) + + output = { + 'message': formatted, + 'timestamp': record.asctime, + 'levelname': record.levelname, + 'level': record.levelno, + } + if record.exc_info: + if not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + output['exc_info'] = record.exc_text + return output + + +class QueueLogHandler(logging.Handler): + def __init__(self, queue): + super(QueueLogHandler, self).__init__() + self.queue = queue + + def emit(self, record): + msg = self.format(record) + self.queue.put_nowait(['log', msg]) + + +def add_queue_handler(queue): + """Add a queue log handler to the global logger.""" + handler = QueueLogHandler(queue) + handler.setFormatter(QueueFormatter()) + handler.setLevel(DEBUG) + GLOBAL_LOGGER.addHandler(handler) diff --git a/core/dbt/main.py b/core/dbt/main.py index 8a8a001c1a5..9d96d9c91d9 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -5,6 +5,7 @@ import os.path import sys import traceback +from contextlib import contextmanager import dbt.version import dbt.flags as flags @@ -20,6 +21,8 @@ import dbt.task.generate as generate_task import dbt.task.serve as serve_task import dbt.task.freshness as freshness_task +import dbt.task.run_operation as run_operation_task +from dbt.task.rpc_server import RPCServerTask from dbt.adapters.factory import reset_adapters import dbt.tracking @@ -29,9 +32,8 @@ import dbt.profiler from dbt.utils import ExitCodes -from dbt.config import Project, UserConfig, RuntimeConfig, PROFILES_DIR, \ - read_profiles -from dbt.exceptions import DbtProjectError, DbtProfileError, RuntimeException +from dbt.config import UserConfig, PROFILES_DIR +from dbt.exceptions import RuntimeException PROFILES_HELP_MESSAGE = """ @@ -81,7 +83,7 @@ def main(args=None): else: exit_code = ExitCodes.ModelError - except KeyboardInterrupt as e: + except KeyboardInterrupt: logger.info("ctrl-c") exit_code = ExitCodes.UnhandledError @@ -148,138 +150,60 @@ def handle_and_check(args): reset_adapters() - try: - task, res = run_from_args(parsed) - finally: - dbt.tracking.flush() - + task, res = run_from_args(parsed) success = task.interpret_results(res) return res, success -def get_nearest_project_dir(): - root_path = os.path.abspath(os.sep) - cwd = os.getcwd() - - while cwd != root_path: - project_file = os.path.join(cwd, "dbt_project.yml") - if os.path.exists(project_file): - return cwd - cwd = os.path.dirname(cwd) - - return None - - -def run_from_args(parsed): - task = None - cfg = None - - if parsed.which in ('init', 'debug'): - # bypass looking for a project file if we're running `dbt init` or - # `dbt debug` - task = parsed.cls(args=parsed) - else: - nearest_project_dir = get_nearest_project_dir() - if nearest_project_dir is None: - raise RuntimeException( - "fatal: Not a dbt project (or any of the parent directories). " - "Missing dbt_project.yml file" - ) - - os.chdir(nearest_project_dir) - - res = invoke_dbt(parsed) - if res is None: - raise RuntimeException("Could not run dbt") - else: - task, cfg = res - - log_path = None - - if cfg is not None: - log_path = cfg.log_path - - initialize_logger(parsed.debug, log_path) - logger.debug("Tracking: {}".format(dbt.tracking.active_user.state())) - - dbt.tracking.track_invocation_start(config=cfg, args=parsed) - - results = run_from_task(task, cfg, parsed) - - return task, results - - -def run_from_task(task, cfg, parsed_args): - result = None +@contextmanager +def track_run(task): + dbt.tracking.track_invocation_start(config=task.config, args=task.args) try: - result = task.run() + yield dbt.tracking.track_invocation_end( - config=cfg, args=parsed_args, result_type="ok" + config=task.config, args=task.args, result_type="ok" ) except (dbt.exceptions.NotImplementedException, dbt.exceptions.FailedToConnectException) as e: - logger.info('ERROR: {}'.format(e)) + logger.error('ERROR: {}'.format(e)) dbt.tracking.track_invocation_end( - config=cfg, args=parsed_args, result_type="error" + config=task.config, args=task.args, result_type="error" ) - except Exception as e: + except Exception: dbt.tracking.track_invocation_end( - config=cfg, args=parsed_args, result_type="error" + config=task.config, args=task.args, result_type="error" ) raise - - return result + finally: + dbt.tracking.flush() -def invoke_dbt(parsed): - task = None - cfg = None - +def run_from_args(parsed): log_cache_events(getattr(parsed, 'log_cache_events', False)) + update_flags(parsed) + logger.info("Running with dbt{}".format(dbt.version.installed)) - try: - if parsed.which in {'deps', 'clean'}: - # deps doesn't need a profile, so don't require one. - cfg = Project.from_current_directory(getattr(parsed, 'vars', '{}')) - elif parsed.which != 'debug': - # for debug, we will attempt to load the various configurations as - # part of the task, so just leave cfg=None. - cfg = RuntimeConfig.from_args(parsed) - except DbtProjectError as e: - logger.info("Encountered an error while reading the project:") - logger.info(dbt.compat.to_string(e)) - - dbt.tracking.track_invalid_invocation( - config=cfg, - args=parsed, - result_type=e.result_type) - - return None - except DbtProfileError as e: - logger.info("Encountered an error while reading profiles:") - logger.info(" ERROR {}".format(str(e))) - - all_profiles = read_profiles(parsed.profiles_dir).keys() - - if len(all_profiles) > 0: - logger.info("Defined profiles:") - for profile in all_profiles: - logger.info(" - {}".format(profile)) - else: - logger.info("There are no profiles defined in your " - "profiles.yml file") + # this will convert DbtConfigErrors into RuntimeExceptions + task = parsed.cls.from_args(args=parsed) + logger.debug("running dbt with arguments %s", parsed) - logger.info(PROFILES_HELP_MESSAGE) + log_path = None + if task.config is not None: + log_path = getattr(task.config, 'log_path', None) + initialize_logger(parsed.debug, log_path) + logger.debug("Tracking: {}".format(dbt.tracking.active_user.state())) - dbt.tracking.track_invalid_invocation( - config=cfg, - args=parsed, - result_type=e.result_type) + results = None - return None + with track_run(task): + results = task.run() + return task, results + + +def update_flags(parsed): flags.NON_DESTRUCTIVE = getattr(parsed, 'non_destructive', False) flags.USE_CACHE = getattr(parsed, 'use_cache', True) @@ -297,11 +221,7 @@ def invoke_dbt(parsed): elif arg_full_refresh: flags.FULL_REFRESH = True - logger.debug("running dbt with arguments %s", parsed) - - task = parsed.cls(args=parsed, config=cfg) - - return task, cfg + flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False) def _build_base_subparser(): @@ -377,9 +297,9 @@ def _build_source_subparser(subparsers, base_subparser): def _build_init_subparser(subparsers, base_subparser): sub = subparsers.add_parser( - 'init', - parents=[base_subparser], - help="Initialize a new DBT project.") + 'init', + parents=[base_subparser], + help="Initialize a new DBT project.") sub.add_argument('project_name', type=str, help='Name of the new project') sub.set_defaults(cls=init_task.InitTask, which='init') return sub @@ -478,7 +398,7 @@ def _build_docs_generate_subparser(subparsers, base_subparser): return generate_sub -def _add_common_arguments(*subparsers): +def _add_selection_arguments(*subparsers): for sub in subparsers: sub.add_argument( '-m', @@ -497,15 +417,10 @@ def _add_common_arguments(*subparsers): Specify the models to exclude. """ ) - sub.add_argument( - '--threads', - type=int, - required=False, - help=""" - Specify number of threads to use while executing models. Overrides - settings in profiles.yml. - """ - ) + + +def _add_table_mutability_arguments(*subparsers): + for sub in subparsers: sub.add_argument( '--non-destructive', action='store_true', @@ -521,6 +436,19 @@ def _add_common_arguments(*subparsers): If specified, DBT will drop incremental models and fully-recalculate the incremental table from the model definition. """) + + +def _add_common_arguments(*subparsers): + for sub in subparsers: + sub.add_argument( + '--threads', + type=int, + required=False, + help=""" + Specify number of threads to use while executing models. Overrides + settings in profiles.yml. + """ + ) sub.add_argument( '--no-version-check', dest='version_check', @@ -583,32 +511,6 @@ def _build_test_subparser(subparsers, base_subparser): action='store_true', help='Run constraint validations from schema.yml files' ) - sub.add_argument( - '--threads', - type=int, - required=False, - help=""" - Specify number of threads to use while executing tests. Overrides - settings in profiles.yml - """ - ) - sub.add_argument( - '-m', - '--models', - required=False, - nargs='+', - help=""" - Specify the models to test. - """ - ) - sub.add_argument( - '--exclude', - required=False, - nargs='+', - help=""" - Specify the models to exclude from testing. - """ - ) sub.set_defaults(cls=test_task.TestTask, which='test') return sub @@ -639,11 +541,43 @@ def _build_source_snapshot_freshness_subparser(subparsers, base_subparser): target/sources.json """ ) + sub.add_argument( + '--threads', + type=int, + required=False, + help=""" + Specify number of threads to use. Overrides settings in profiles.yml + """ + ) sub.set_defaults(cls=freshness_task.FreshnessTask, which='snapshot-freshness') return sub +def _build_rpc_subparser(subparsers, base_subparser): + sub = subparsers.add_parser( + 'rpc', + parents=[base_subparser], + help='Start a json-rpc server', + ) + sub.add_argument( + '--host', + default='0.0.0.0', + help='Specify the host to listen on for the rpc server.' + ) + sub.add_argument( + '--port', + default=8580, + type=int, + help='Specify the port number for the rpc server.' + ) + sub.set_defaults(cls=RPCServerTask, which='rpc') + # the rpc task does a 'compile', so we need these attributes to exist, but + # we don't want users to be allowed to set them. + sub.set_defaults(models=None, exclude=None) + return sub + + def parse_args(args): p = DBTArgumentParser( prog='dbt: data build tool', @@ -686,6 +620,14 @@ def parse_args(args): help='''Run schema validations at runtime. This will surface bugs in dbt, but may incur a performance penalty.''') + p.add_argument( + '--warn-error', + action='store_true', + help='''If dbt would normally warn, instead raise an exception. + Examples include --models that selects nothing, deprecations, + configurations with no associated models, invalid test configurations, + and missing sources/refs in tests''') + # if set, run dbt in single-threaded mode: thread count is ignored, and # calls go through `map` instead of the thread pool. This is useful for # getting performance information about aspects of dbt that normally run in @@ -697,6 +639,15 @@ def parse_args(args): help=argparse.SUPPRESS, ) + # if set, extract all models and blocks with the jinja block extractor, and + # verify that we don't fail anywhere the actual jinja parser passes. The + # reverse (passing files that ends up failing jinja) is fine. + p.add_argument( + '--test-new-parser', + action='store_true', + help=argparse.SUPPRESS + ) + subs = p.add_subparsers(title="Available sub-commands") base_subparser = _build_base_subparser() @@ -711,18 +662,53 @@ def parse_args(args): _build_clean_subparser(subs, base_subparser) _build_debug_subparser(subs, base_subparser) _build_deps_subparser(subs, base_subparser) - _build_archive_subparser(subs, base_subparser) + archive_sub = _build_archive_subparser(subs, base_subparser) + rpc_sub = _build_rpc_subparser(subs, base_subparser) run_sub = _build_run_subparser(subs, base_subparser) compile_sub = _build_compile_subparser(subs, base_subparser) generate_sub = _build_docs_generate_subparser(docs_subs, base_subparser) - _add_common_arguments(run_sub, compile_sub, generate_sub) + test_sub = _build_test_subparser(subs, base_subparser) + # --threads, --no-version-check + _add_common_arguments(run_sub, compile_sub, generate_sub, test_sub, + rpc_sub) + # --models, --exclude + _add_selection_arguments(run_sub, compile_sub, generate_sub, test_sub, + archive_sub) + # --full-refresh, --non-destructive + _add_table_mutability_arguments(run_sub, compile_sub) _build_seed_subparser(subs, base_subparser) _build_docs_serve_subparser(docs_subs, base_subparser) - _build_test_subparser(subs, base_subparser) _build_source_snapshot_freshness_subparser(source_subs, base_subparser) + sub = subs.add_parser( + 'run-operation', + parents=[base_subparser], + help=""" + (beta) Run the named macro with any supplied arguments. This + subcommand is unstable and subject to change in a future release + of dbt. Please use it with caution""" + ) + sub.add_argument( + '--macro', + required=True, + help=""" + Specify the macro to invoke. dbt will call this macro with the + supplied arguments and then exit""" + ) + sub.add_argument( + '--args', + type=str, + default='{}', + help=""" + Supply arguments to the macro. This dictionary will be mapped + to the keyword arguments defined in the selected macro. This + argument should be a YAML string, eg. '{my_variable: my_value}'""" + ) + sub.set_defaults(cls=run_operation_task.RunOperationTask, + which='run-operation') + if len(args) == 0: p.print_help() sys.exit(1) diff --git a/core/dbt/node_runners.py b/core/dbt/node_runners.py index 669b93bae22..bba860c9356 100644 --- a/core/dbt/node_runners.py +++ b/core/dbt/node_runners.py @@ -1,25 +1,18 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.exceptions import NotImplementedException, CompilationException, \ RuntimeException, InternalException, missing_materialization -from dbt.utils import get_nodes_by_tags -from dbt.node_types import NodeType, RunHookType -from dbt.adapters.factory import get_adapter +from dbt.node_types import NodeType from dbt.contracts.results import RunModelResult, collect_timing_info, \ - SourceFreshnessResult, PartialResult + SourceFreshnessResult, PartialResult, RemoteCompileResult, RemoteRunResult from dbt.compilation import compile_node -import dbt.clients.jinja import dbt.context.runtime import dbt.exceptions import dbt.utils import dbt.tracking import dbt.ui.printer -import dbt.flags -import dbt.schema -import dbt.writer +from dbt import rpc -import six -import sys import threading import time import traceback @@ -48,6 +41,15 @@ def track_model_run(index, num_nodes, run_model_result): }) +class ExecutionContext(object): + """During execution and error handling, dbt makes use of mutable state: + timing information and the newest (compiled vs executed) form of the node. + """ + def __init__(self, node): + self.timing = [] + self.node = node + + class BaseRunner(object): def __init__(self, config, adapter, node, node_index, num_nodes): self.config = config @@ -119,67 +121,78 @@ def from_run_result(self, result, start_time, timing_info): timing_info=timing_info ) - def safe_run(self, manifest): - catchable_errors = (CompilationException, RuntimeException) - - # result = self.DefaultResult(self.node) - started = time.time() - timing = [] - error = None - node = self.node + def compile_and_execute(self, manifest, ctx): result = None + self.adapter.acquire_connection(self.node.get('name')) + with collect_timing_info('compile') as timing_info: + # if we fail here, we still have a compiled node to return + # this has the benefit of showing a build path for the errant + # model + ctx.node = self.compile(manifest) + ctx.timing.append(timing_info) + + # for ephemeral nodes, we only want to compile, not run + if not ctx.node.is_ephemeral_model: + with collect_timing_info('execute') as timing_info: + result = self.run(ctx.node, manifest) + ctx.node = result.node + + ctx.timing.append(timing_info) - try: - with collect_timing_info('compile') as timing_info: - # if we fail here, we still have a compiled node to return - # this has the benefit of showing a build path for the errant - # model - node = self.compile(manifest) - - timing.append(timing_info) + return result - # for ephemeral nodes, we only want to compile, not run - if not node.is_ephemeral_model: - with collect_timing_info('execute') as timing_info: - result = self.run(node, manifest) - node = result.node + def _handle_catchable_exception(self, e, ctx): + if e.node is None: + e.node = ctx.node - timing.append(timing_info) + return dbt.compat.to_string(e) - # result.extend(item.serialize() for item in timing) + def _handle_internal_exception(self, e, ctx): + build_path = self.node.build_path + prefix = 'Internal error executing {}'.format(build_path) - except catchable_errors as e: - if e.node is None: - e.node = node + error = "{prefix}\n{error}\n\n{note}".format( + prefix=dbt.ui.printer.red(prefix), + error=str(e).strip(), + note=INTERNAL_ERROR_STRING + ) + logger.debug(error) + return dbt.compat.to_string(e) + + def _handle_generic_exception(self, e, ctx): + node_description = self.node.get('build_path') + if node_description is None: + node_description = self.node.unique_id + prefix = "Unhandled error while executing {}".format(node_description) + error = "{prefix}\n{error}".format( + prefix=dbt.ui.printer.red(prefix), + error=str(e).strip() + ) - error = dbt.compat.to_string(e) + logger.error(error) + logger.debug('', exc_info=True) + return dbt.compat.to_string(e) - except InternalException as e: - build_path = self.node.build_path - prefix = 'Internal error executing {}'.format(build_path) + def handle_exception(self, e, ctx): + catchable_errors = (CompilationException, RuntimeException) + if isinstance(e, catchable_errors): + error = self._handle_catchable_exception(e, ctx) + elif isinstance(e, InternalException): + error = self._handle_internal_exception(e, ctx) + else: + error = self._handle_generic_exception(e, ctx) + return error - error = "{prefix}\n{error}\n\n{note}".format( - prefix=dbt.ui.printer.red(prefix), - error=str(e).strip(), - note=INTERNAL_ERROR_STRING) - logger.debug(error) - error = dbt.compat.to_string(e) + def safe_run(self, manifest): + started = time.time() + ctx = ExecutionContext(self.node) + error = None + result = None + try: + result = self.compile_and_execute(manifest, ctx) except Exception as e: - node_description = self.node.get('build_path') - if node_description is None: - node_description = self.node.unique_id - prefix = "Unhandled error while executing {description}".format( - description=node_description) - - error = "{prefix}\n{error}".format( - prefix=dbt.ui.printer.red(prefix), - error=str(e).strip()) - - logger.error(error) - logger.debug('', exc_info=True) - error = dbt.compat.to_string(e) - + error = self.handle_exception(e, ctx) finally: exc_str = self._safe_release_connection() @@ -190,24 +203,23 @@ def safe_run(self, manifest): if error is not None: # we could include compile time for runtime errors here - result = self.error_result(node, error, started, []) + result = self.error_result(ctx.node, error, started, []) elif result is not None: - result = self.from_run_result(result, started, timing) + result = self.from_run_result(result, started, ctx.timing) else: - result = self.ephemeral_result(node, started, timing) + result = self.ephemeral_result(ctx.node, started, ctx.timing) return result def _safe_release_connection(self): """Try to release a connection. If an exception is hit, log and return the error string. """ - node_name = self.node.name try: - self.adapter.release_connection(node_name) + self.adapter.release_connection() except Exception as exc: logger.debug( 'Error releasing connection for node {}: {!s}\n{}' - .format(node_name, exc, traceback.format_exc()) + .format(self.node.name, exc, traceback.format_exc()) ) return dbt.compat.to_string(exc) @@ -286,11 +298,17 @@ def compile(self, manifest): class ModelRunner(CompileRunner): + def get_node_representation(self): + if self.config.credentials.database == self.node.database: + template = "{0.schema}.{0.alias}" + else: + template = "{0.database}.{0.schema}.{0.alias}" + + return template.format(self.node) + def describe_node(self): - materialization = dbt.utils.get_materialization(self.node) - return "{0} model {1.database}.{1.schema}.{1.alias}".format( - materialization, self.node - ) + return "{} model {}".format(self.node.get_materialization(), + self.get_node_representation()) def print_start_line(self): description = self.describe_node() @@ -298,9 +316,9 @@ def print_start_line(self): self.num_nodes) def print_result_line(self, result): - schema_name = self.node.schema + description = self.describe_node() dbt.ui.printer.print_model_result_line(result, - schema_name, + description, self.node_index, self.num_nodes) @@ -365,7 +383,8 @@ def _calculate_status(self, target_freshness, freshness): continue target = target_freshness[fullkey] - kwargs = {target['period']+'s': target['count']} + kwname = target['period'] + 's' + kwargs = {kwname: target['count']} if freshness > timedelta(**kwargs).total_seconds(): return key return 'pass' @@ -394,11 +413,14 @@ def from_run_result(self, result, start_time, timing_info): def execute(self, compiled_node, manifest): relation = self.adapter.Relation.create_from_source(compiled_node) # given a Source, calculate its fresnhess. - freshness = self.adapter.calculate_freshness( - relation, - compiled_node.loaded_at_field, - manifest=manifest - ) + with self.adapter.connection_named(compiled_node.unique_id): + self.adapter.clear_transaction() + freshness = self.adapter.calculate_freshness( + relation, + compiled_node.loaded_at_field, + manifest=manifest + ) + status = self._calculate_status( compiled_node.freshness, freshness['age'] @@ -439,7 +461,6 @@ def print_start_line(self): def execute_test(self, test): res, table = self.adapter.execute( test.wrapped_sql, - model_name=test.name, auto_begin=True, fetch=True) @@ -466,8 +487,10 @@ def after_execute(self, result): class ArchiveRunner(ModelRunner): def describe_node(self): cfg = self.node.get('config', {}) - return "archive {source_database}.{source_schema}.{source_table} --> "\ - "{target_database}.{target_schema}.{target_table}".format(**cfg) + return ( + "archive {name} --> {target_database}.{target_schema}.{name}" + .format(name=self.node.name, **cfg) + ) def print_result_line(self, result): dbt.ui.printer.print_archive_result_line(result, self.node_index, @@ -476,7 +499,7 @@ def print_result_line(self, result): class SeedRunner(ModelRunner): def describe_node(self): - return "seed file {0.database}.{0.schema}.{0.alias}".format(self.node) + return "seed file {}".format(self.get_node_representation()) def before_execute(self): description = self.describe_node() @@ -492,3 +515,80 @@ def print_result_line(self, result): schema_name, self.node_index, self.num_nodes) + + +class RPCCompileRunner(CompileRunner): + def __init__(self, config, adapter, node, node_index, num_nodes): + super(RPCCompileRunner, self).__init__(config, adapter, node, + node_index, num_nodes) + + def handle_exception(self, e, ctx): + if isinstance(e, dbt.exceptions.Exception): + if isinstance(e, dbt.exceptions.RuntimeException): + e.node = ctx.node + return rpc.dbt_error(e) + elif isinstance(e, rpc.RPCException): + return e + else: + return rpc.server_error(e) + + def before_execute(self): + pass + + def after_execute(self, result): + pass + + def compile(self, manifest): + return compile_node(self.adapter, self.config, self.node, manifest, {}, + write=False) + + def execute(self, compiled_node, manifest): + return RemoteCompileResult( + raw_sql=compiled_node.raw_sql, + compiled_sql=compiled_node.injected_sql, + node=compiled_node + ) + + def error_result(self, node, error, start_time, timing_info): + raise error + + def ephemeral_result(self, node, start_time, timing_info): + raise NotImplementedException( + 'cannot execute ephemeral nodes remotely!' + ) + + def from_run_result(self, result, start_time, timing_info): + timing = [t.serialize() for t in timing_info] + return RemoteCompileResult( + raw_sql=result.raw_sql, + compiled_sql=result.compiled_sql, + node=result.node, + timing=timing + ) + + +class RPCExecuteRunner(RPCCompileRunner): + def from_run_result(self, result, start_time, timing_info): + timing = [t.serialize() for t in timing_info] + return RemoteRunResult( + raw_sql=result.raw_sql, + compiled_sql=result.compiled_sql, + node=result.node, + table=result.table, + timing=timing + ) + + def execute(self, compiled_node, manifest): + status, table = self.adapter.execute(compiled_node.injected_sql, + fetch=True) + table = { + 'column_names': list(table.column_names), + 'rows': [list(row) for row in table] + } + + return RemoteRunResult( + raw_sql=compiled_node.raw_sql, + compiled_sql=compiled_node.injected_sql, + node=compiled_node, + table=table + ) diff --git a/core/dbt/node_types.py b/core/dbt/node_types.py index 4f097ab1070..d0a94404ae0 100644 --- a/core/dbt/node_types.py +++ b/core/dbt/node_types.py @@ -10,6 +10,7 @@ class NodeType(object): Seed = 'seed' Documentation = 'documentation' Source = 'source' + RPCCall = 'rpc' @classmethod def executable(cls): @@ -21,6 +22,7 @@ def executable(cls): cls.Operation, cls.Seed, cls.Documentation, + cls.RPCCall, ] @classmethod @@ -28,6 +30,7 @@ def refable(cls): return [ cls.Model, cls.Seed, + cls.Archive, ] diff --git a/core/dbt/parser/__init__.py b/core/dbt/parser/__init__.py index 78ca99c3eda..5363aa29400 100644 --- a/core/dbt/parser/__init__.py +++ b/core/dbt/parser/__init__.py @@ -1,6 +1,7 @@ from .analysis import AnalysisParser from .archives import ArchiveParser +from .archives import ArchiveBlockParser from .data_test import DataTestParser from .docs import DocumentationParser from .hooks import HookParser @@ -14,6 +15,7 @@ __all__ = [ 'AnalysisParser', 'ArchiveParser', + 'ArchiveBlockParser', 'DataTestParser', 'DocumentationParser', 'HookParser', diff --git a/core/dbt/parser/analysis.py b/core/dbt/parser/analysis.py index 5d218544983..c466ead1cfe 100644 --- a/core/dbt/parser/analysis.py +++ b/core/dbt/parser/analysis.py @@ -7,3 +7,8 @@ class AnalysisParser(BaseSqlParser): @classmethod def get_compiled_path(cls, name, relative_path): return os.path.join('analysis', relative_path) + + +class RPCCallParser(AnalysisParser): + def get_compiled_path(cls, name, relative_path): + return os.path.join('rpc', relative_path) diff --git a/core/dbt/parser/archives.py b/core/dbt/parser/archives.py index ad4de342ebd..981570a48da 100644 --- a/core/dbt/parser/archives.py +++ b/core/dbt/parser/archives.py @@ -1,7 +1,12 @@ - from dbt.contracts.graph.unparsed import UnparsedNode +from dbt.contracts.graph.parsed import ParsedArchiveNode from dbt.node_types import NodeType from dbt.parser.base import MacrosKnownParser +from dbt.parser.base_sql import BaseSqlParser, SQLParseResult +from dbt.adapters.factory import get_adapter +import dbt.clients.jinja +import dbt.exceptions +import dbt.utils import os @@ -20,7 +25,7 @@ def parse_archives_from_project(cls, config): for table in tables: cfg = table.copy() - cfg['source_database'] = archive_config.get( + source_database = archive_config.get( 'source_database', config.credentials.database ) @@ -29,11 +34,24 @@ def parse_archives_from_project(cls, config): config.credentials.database ) - cfg['source_schema'] = archive_config.get('source_schema') + source_schema = archive_config['source_schema'] cfg['target_schema'] = archive_config.get('target_schema') + # project-defined archives always use the 'timestamp' strategy. + cfg['strategy'] = 'timestamp' fake_path = [cfg['target_database'], cfg['target_schema'], cfg['target_table']] + + relation = get_adapter(config).Relation.create( + database=source_database, + schema=source_schema, + identifier=table['source_table'], + type='table' + ) + + raw_sql = '{{ config(materialized="archive") }}' + \ + 'select * from {!s}'.format(relation) + archives.append({ 'name': table.get('target_table'), 'root_path': config.project_root, @@ -42,7 +60,7 @@ def parse_archives_from_project(cls, config): 'original_file_path': 'dbt_project.yml', 'package_name': config.project_name, 'config': cfg, - 'raw_sql': '{{config(materialized="archive")}} -- noop' + 'raw_sql': raw_sql }) return archives @@ -76,3 +94,74 @@ def load_and_parse(self): archive_config=archive_config) return to_return + + +class ArchiveBlockParser(BaseSqlParser): + def parse_archives_from_file(self, file_node, tags=None): + # the file node has a 'raw_sql' field that contains the jinja data with + # (we hope!) `archive` blocks + try: + blocks = dbt.clients.jinja.extract_toplevel_blocks( + file_node['raw_sql'] + ) + except dbt.exceptions.CompilationException as exc: + if exc.node is None: + exc.node = file_node + raise + for block in blocks: + if block.block_type_name != NodeType.Archive: + # non-archive blocks are just ignored + continue + name = block.block_name + raw_sql = block.contents + updates = { + 'raw_sql': raw_sql, + 'name': name, + } + yield dbt.utils.deep_merge(file_node, updates) + + @classmethod + def get_compiled_path(cls, name, relative_path): + return relative_path + + @classmethod + def get_fqn(cls, node, package_project_config, extra=[]): + parts = dbt.utils.split_path(node.path) + fqn = [package_project_config.project_name] + fqn.extend(parts[:-1]) + fqn.extend(extra) + fqn.append(node.name) + + return fqn + + @staticmethod + def validate_archives(node): + if node.resource_type == NodeType.Archive: + try: + return ParsedArchiveNode(**node.to_shallow_dict()) + except dbt.exceptions.JSONValidationException as exc: + raise dbt.exceptions.CompilationException(str(exc), node) + else: + return node + + def parse_sql_nodes(self, nodes, tags=None): + if tags is None: + tags = [] + + results = SQLParseResult() + + # in archives, we have stuff in blocks. + for file_node in nodes: + archive_nodes = list( + self.parse_archives_from_file(file_node, tags=tags) + ) + found = super(ArchiveBlockParser, self).parse_sql_nodes( + nodes=archive_nodes, tags=tags + ) + # make sure our blocks are going to work when we try to archive + # them! + found.parsed = {k: self.validate_archives(v) for + k, v in found.parsed.items()} + + results.update(found) + return results diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index d21b48cb1ea..3c709f3e459 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -13,7 +13,6 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.contracts.graph.parsed import ParsedNode from dbt.parser.source_config import SourceConfig -from dbt.node_types import NodeType class BaseParser(object): @@ -40,8 +39,8 @@ def get_path(cls, resource_type, package_name, resource_name): return "{}.{}.{}".format(resource_type, package_name, resource_name) @classmethod - def get_fqn(cls, path, package_project_config, extra=[]): - parts = dbt.utils.split_path(path) + def get_fqn(cls, node, package_project_config, extra=[]): + parts = dbt.utils.split_path(node.path) name, _ = os.path.splitext(parts[-1]) fqn = ([package_project_config.project_name] + parts[:-1] + @@ -59,6 +58,7 @@ def __init__(self, root_project_config, all_projects, macro_manifest): ) self.macro_manifest = macro_manifest self._get_schema_func = None + self._get_alias_func = None def get_schema_func(self): """The get_schema function is set by a few different things: @@ -88,13 +88,51 @@ def get_schema(_): else: root_context = dbt.context.parser.generate_macro( get_schema_macro, self.root_project_config, - self.macro_manifest, 'generate_schema_name' + self.macro_manifest ) get_schema = get_schema_macro.generator(root_context) self._get_schema_func = get_schema return self._get_schema_func + def get_alias_func(self): + """The get_alias function is set by a few different things: + - if there is a 'generate_alias_name' macro in the root project, + it will be used. + - if that does not exist but there is a 'generate_alias_name' + macro in the 'dbt' internal project, that will be used + - if neither of those exist (unit tests?), a function that returns + the 'default alias' as set in the model's filename or alias + configuration. + """ + if self._get_alias_func is not None: + return self._get_alias_func + + get_alias_macro = self.macro_manifest.find_macro_by_name( + 'generate_alias_name', + self.root_project_config.project_name + ) + if get_alias_macro is None: + get_alias_macro = self.macro_manifest.find_macro_by_name( + 'generate_alias_name', + GLOBAL_PROJECT_NAME + ) + if get_alias_macro is None: + def get_alias(node, custom_alias_name=None): + if custom_alias_name is None: + return node.name + else: + return custom_alias_name + else: + root_context = dbt.context.parser.generate_macro( + get_alias_macro, self.root_project_config, + self.macro_manifest + ) + get_alias = get_alias_macro.generator(root_context) + + self._get_alias_func = get_alias + return self._get_alias_func + def _build_intermediate_node_dict(self, config, node_dict, node_path, package_project_config, tags, fqn, agate_table, archive_config, @@ -159,10 +197,6 @@ def _render_with_context(self, parsed_node, config): parsed_node.raw_sql, context, parsed_node.to_shallow_dict(), capture_macros=True) - # Clean up any open conns opened by adapter functions that hit the db - db_wrapper = context['adapter'] - db_wrapper.adapter.release_connection(parsed_node.name) - def _update_parsed_node_info(self, parsed_node, config): """Given the SourceConfig used for parsing and the parsed node, generate and set the true values to use, overriding the temporary parse @@ -173,7 +207,11 @@ def _update_parsed_node_info(self, parsed_node, config): schema_override = config.config.get('schema') get_schema = self.get_schema_func() parsed_node.schema = get_schema(schema_override).strip() - parsed_node.alias = config.config.get('alias', parsed_node.get('name')) + + alias_override = config.config.get('alias') + get_alias = self.get_alias_func() + parsed_node.alias = get_alias(parsed_node, alias_override).strip() + parsed_node.database = config.config.get( 'database', self.default_database ).strip() @@ -207,7 +245,7 @@ def parse_node(self, node, node_path, package_project_config, tags=None, fqn_extra = coalesce(fqn_extra, []) if fqn is None: - fqn = self.get_fqn(node.path, package_project_config, fqn_extra) + fqn = self.get_fqn(node, package_project_config, fqn_extra) config = SourceConfig( self.root_project_config, @@ -227,3 +265,16 @@ def parse_node(self, node, node_path, package_project_config, tags=None, parsed_node.validate() return parsed_node + + def check_block_parsing(self, name, path, contents): + """Check if we were able to extract toplevel blocks from the given + contents. Return True if extraction was successful (no exceptions), + False if it fails. + """ + if not dbt.flags.TEST_NEW_PARSER: + return True + try: + dbt.clients.jinja.extract_toplevel_blocks(contents) + except Exception: + return False + return True diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py index d6d7322a423..2a576ffaaf9 100644 --- a/core/dbt/parser/base_sql.py +++ b/core/dbt/parser/base_sql.py @@ -9,6 +9,7 @@ from dbt.contracts.graph.unparsed import UnparsedNode from dbt.parser.base import MacrosKnownParser +from dbt.node_types import NodeType class BaseSqlParser(MacrosKnownParser): @@ -62,36 +63,77 @@ def load_and_parse(self, package_name, root_dir, relative_dirs, return self.parse_sql_nodes(result, tags) - def parse_sql_nodes(self, nodes, tags=None): - + def parse_sql_node(self, node_dict, tags=None): if tags is None: tags = [] - to_return = {} - disabled = [] + node = UnparsedNode(**node_dict) + package_name = node.package_name - for n in nodes: - node = UnparsedNode(**n) - package_name = node.package_name + unique_id = self.get_path(node.resource_type, + package_name, + node.name) + + project = self.all_projects.get(package_name) + + parse_ok = True + if node.resource_type == NodeType.Model: + parse_ok = self.check_block_parsing( + node.name, node.original_file_path, node.raw_sql + ) - node_path = self.get_path(node.resource_type, - package_name, - node.name) + node_parsed = self.parse_node(node, unique_id, project, tags=tags) + if not parse_ok: + # if we had a parse error in parse_node, we would not get here. So + # this means we rejected a good file :( + raise dbt.exceptions.InternalException( + 'the block parser rejected a good node: {} was marked invalid ' + 'but is actually valid!'.format(node.original_file_path) + ) + return unique_id, node_parsed + + def parse_sql_nodes(self, nodes, tags=None): + if tags is None: + tags = [] - project = self.all_projects.get(package_name) - node_parsed = self.parse_node(node, node_path, project, tags=tags) + results = SQLParseResult() + + for n in nodes: + node_path, node_parsed = self.parse_sql_node(n, tags) # Ignore disabled nodes - if not node_parsed['config']['enabled']: - disabled.append(node_parsed) + if not node_parsed.config['enabled']: + results.disable(node_parsed) continue - # Check for duplicate model names - existing_node = to_return.get(node_path) - if existing_node is not None: - dbt.exceptions.raise_duplicate_resource_name( - existing_node, node_parsed) + results.keep(node_path, node_parsed) + + return results + + +class SQLParseResult(object): + def __init__(self): + self.parsed = {} + self.disabled = [] + + def result(self, unique_id, node): + if node.config['enabled']: + self.keep(unique_id, node) + else: + self.disable(node) + + def disable(self, node): + self.disabled.append(node) + + def keep(self, unique_id, node): + if unique_id in self.parsed: + dbt.exceptions.raise_duplicate_resource_name( + self.parsed[unique_id], node + ) - to_return[node_path] = node_parsed + self.parsed[unique_id] = node - return to_return, disabled + def update(self, other): + self.disabled.extend(other.disabled) + for unique_id, node in other.parsed.items(): + self.keep(unique_id, node) diff --git a/core/dbt/parser/docs.py b/core/dbt/parser/docs.py index 9f9e403894c..840ecdb1a9f 100644 --- a/core/dbt/parser/docs.py +++ b/core/dbt/parser/docs.py @@ -62,8 +62,6 @@ def parse(self, docfile): # because docs are in their own graph namespace, node type doesn't # need to be part of the unique ID. unique_id = '{}.{}'.format(docfile.package_name, name) - fqn = self.get_fqn(docfile.path, - self.all_projects[docfile.package_name]) merged = dbt.utils.deep_merge( docfile.serialize(), @@ -78,10 +76,10 @@ def parse(self, docfile): def load_and_parse(self, package_name, root_dir, relative_dirs): to_return = {} for docfile in self.load_file(package_name, root_dir, relative_dirs): - for parsed in self.parse(docfile): - if parsed.unique_id in to_return: - dbt.exceptions.raise_duplicate_resource_name( - to_return[parsed.unique_id], parsed - ) - to_return[parsed.unique_id] = parsed + for parsed in self.parse(docfile): + if parsed.unique_id in to_return: + dbt.exceptions.raise_duplicate_resource_name( + to_return[parsed.unique_id], parsed + ) + to_return[parsed.unique_id] = parsed return to_return diff --git a/core/dbt/parser/hooks.py b/core/dbt/parser/hooks.py index 19d6a80b2ee..e12ece3ad69 100644 --- a/core/dbt/parser/hooks.py +++ b/core/dbt/parser/hooks.py @@ -56,8 +56,8 @@ def load_and_parse_run_hook_type(self, hook_type): }) tags = [hook_type] - hooks, _ = self.parse_sql_nodes(result, tags=tags) - return hooks + results = self.parse_sql_nodes(result, tags=tags) + return results.parsed def load_and_parse(self): if dbt.flags.STRICT_MODE: diff --git a/core/dbt/parser/macros.py b/core/dbt/parser/macros.py index c03713dae03..3ad2978dee7 100644 --- a/core/dbt/parser/macros.py +++ b/core/dbt/parser/macros.py @@ -28,8 +28,6 @@ def parse_macro_file(self, macro_file_path, macro_file_contents, root_path, if tags is None: tags = [] - context = {} - # change these to actual kwargs base_node = UnparsedMacro( path=macro_file_path, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 82e92973536..fc8bc943528 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -14,7 +14,7 @@ from dbt.clients.jinja import get_rendered from dbt.node_types import NodeType -from dbt.compat import basestring, to_string, to_native_string +from dbt.compat import basestring, to_string from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import get_pseudo_test_path from dbt.contracts.graph.unparsed import UnparsedNode, UnparsedNodeUpdate, \ @@ -194,7 +194,7 @@ def _filter_validate(filepath, location, values, validate): except dbt.exceptions.JSONValidationException as exc: # we don't want to fail the full run, but we do want to fail # parsing this file - warn_invalid(filepath, location, value, '- '+exc.msg) + warn_invalid(filepath, location, value, '- ' + exc.msg) continue @@ -243,6 +243,10 @@ def _generate_test_name(self, target, test_type, test_args): """Returns a hashed_name, full_name pair.""" raise NotImplementedError + @staticmethod + def _describe_test_target(test_target): + raise NotImplementedError + def build_test_node(self, test_target, package_name, test, root_dir, path, column_name=None): """Build a test node against the given target (a model or a source). @@ -257,8 +261,9 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, source_package = self.all_projects.get(package_name) if source_package is None: - desc = '"{}" test on model "{}"'.format(test_type, - model_name) + desc = '"{}" test on {}'.format( + test_type, self._describe_test_target(test_target) + ) dbt.exceptions.raise_dep_not_found(None, desc, test_namespace) test_path = os.path.basename(path) @@ -273,6 +278,7 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, full_path = get_pseudo_test_path(full_name, test_path, 'schema_test') raw_sql = self._build_raw_sql(test_namespace, test_target, test_type, test_args) + unparsed = UnparsedNode( name=full_name, resource_type=NodeType.Test, @@ -285,18 +291,29 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, # supply our own fqn which overrides the hashed version from the path # TODO: is this necessary even a little bit for tests? - fqn_override = self.get_fqn(full_path, source_package) + fqn_override = self.get_fqn(unparsed.incorporate(path=full_path), + source_package) node_path = self.get_path(NodeType.Test, unparsed.package_name, unparsed.name) - return self.parse_node(unparsed, - node_path, - source_package, - tags=['schema'], - fqn_extra=None, - fqn=fqn_override, - column_name=column_name) + result = self.parse_node(unparsed, + node_path, + source_package, + tags=['schema'], + fqn_extra=None, + fqn=fqn_override, + column_name=column_name) + + parse_ok = self.check_block_parsing(full_name, test_path, raw_sql) + if not parse_ok: + # if we had a parse error in parse_node, we would not get here. So + # this means we rejected a good file :( + raise dbt.exceptions.InternalException( + 'the block parser rejected a good node: {} was marked invalid ' + 'but is actually valid!'.format(test_path) + ) + return result class SchemaModelParser(SchemaBaseTestParser): @@ -306,6 +323,10 @@ def _build_raw_sql(self, test_namespace, target, test_type, test_args): def _generate_test_name(self, target, test_type, test_args): return get_nice_schema_test_name(test_type, target['name'], test_args) + @staticmethod + def _describe_test_target(test_target): + return 'model "{}"'.format(test_target) + def parse_models_entry(self, model_dict, path, package_name, root_dir): model_name = model_dict['name'] refs = ParserRef() @@ -373,11 +394,15 @@ def _build_raw_sql(self, test_namespace, target, test_type, test_args): def _generate_test_name(self, target, test_type, test_args): return get_nice_schema_test_name( - 'source_'+test_type, + 'source_' + test_type, '{}_{}'.format(target['source']['name'], target['table']['name']), test_args ) + @staticmethod + def _describe_test_target(test_target): + return 'source "{0[source]}.{0[table]}"'.format(test_target) + def get_path(self, *parts): return '.'.join(str(s) for s in parts) @@ -392,12 +417,17 @@ def generate_source_node(self, source, table, path, package_name, root_dir, get_rendered(description, context) get_rendered(source_description, context) - # we'll fill columns in later. freshness = dbt.utils.deep_merge(source.get('freshness', {}), table.get('freshness', {})) loaded_at_field = table.get('loaded_at_field', source.get('loaded_at_field')) + + # use 'or {}' to allow quoting: null + source_quoting = source.get('quoting') or {} + table_quoting = table.get('quoting') or {} + quoting = dbt.utils.deep_merge(source_quoting, table_quoting) + default_database = self.root_project_config.credentials.database return ParsedSourceDefinition( package_name=package_name, @@ -417,6 +447,7 @@ def generate_source_node(self, source, table, path, package_name, root_dir, docrefs=refs.docrefs, loaded_at_field=loaded_at_field, freshness=freshness, + quoting=quoting, resource_type=NodeType.Source ) diff --git a/core/dbt/parser/source_config.py b/core/dbt/parser/source_config.py index fd5482a3c4c..65f3e6b93c3 100644 --- a/core/dbt/parser/source_config.py +++ b/core/dbt/parser/source_config.py @@ -13,7 +13,6 @@ class SourceConfig(object): 'schema', 'enabled', 'materialized', - 'sql_where', 'unique_key', 'database', } @@ -66,6 +65,8 @@ def config(self): if self.node_type == NodeType.Seed: defaults['materialized'] = 'seed' + elif self.node_type == NodeType.Archive: + defaults['materialized'] = 'archive' active_config = self.load_config_from_active_project() @@ -150,6 +151,8 @@ def get_project_config(self, runtime_config): if self.node_type == NodeType.Seed: model_configs = runtime_config.seeds + elif self.node_type == NodeType.Archive: + model_configs = {} else: model_configs = runtime_config.models diff --git a/core/dbt/parser/util.py b/core/dbt/parser/util.py index c4c2245746d..c5b4a46ae67 100644 --- a/core/dbt/parser/util.py +++ b/core/dbt/parser/util.py @@ -27,8 +27,6 @@ def do_docs(*args): dbt.exceptions.doc_target_not_found(node, doc_name, doc_package_name) - target_doc_id = target_doc.unique_id - return target_doc.block_contents return do_docs @@ -124,96 +122,125 @@ def _get_node_column(cls, node, column_name): return column + @classmethod + def process_docs_for_node(cls, manifest, current_project, node): + for docref in node.get('docrefs', []): + column_name = docref.get('column_name') + if column_name is None: + description = node.get('description', '') + else: + column = cls._get_node_column(node, column_name) + description = column.get('description', '') + context = { + 'doc': docs(node, manifest, current_project, column_name), + } + + # At this point, target_doc is a ParsedDocumentation, and we + # know that our documentation string has a 'docs("...")' + # pointing at it. We want to render it. + description = dbt.clients.jinja.get_rendered(description, + context) + # now put it back. + if column_name is None: + node.set('description', description) + else: + column['description'] = description + @classmethod def process_docs(cls, manifest, current_project): - for _, node in manifest.nodes.items(): - target_doc = None - target_doc_name = None - target_doc_package = None - for docref in node.get('docrefs', []): - column_name = docref.get('column_name') - if column_name is None: - description = node.get('description', '') - else: - column = cls._get_node_column(node, column_name) - description = column.get('description', '') - target_doc_name = docref['documentation_name'] - target_doc_package = docref['documentation_package'] - context = { - 'doc': docs(node, manifest, current_project, column_name), - } - - # At this point, target_doc is a ParsedDocumentation, and we - # know that our documentation string has a 'docs("...")' - # pointing at it. We want to render it. - description = dbt.clients.jinja.get_rendered(description, - context) - # now put it back. - if column_name is None: - node.set('description', description) - else: - column['description'] = description + for node in manifest.nodes.values(): + cls.process_docs_for_node(manifest, current_project, node) return manifest @classmethod - def process_refs(cls, manifest, current_project): - for _, node in manifest.nodes.items(): - target_model = None - target_model_name = None - target_model_package = None - - for ref in node.refs: - if len(ref) == 1: - target_model_name = ref[0] - elif len(ref) == 2: - target_model_package, target_model_name = ref - - target_model = cls.resolve_ref( - manifest, - target_model_name, - target_model_package, - current_project, - node.get('package_name')) - - if target_model is None or target_model is cls.DISABLED: - # This may raise. Even if it doesn't, we don't want to add - # this node to the graph b/c there is no destination node - node.config['enabled'] = False - dbt.utils.invalid_ref_fail_unless_test( - node, target_model_name, target_model_package, - disabled=(target_model is cls.DISABLED) - ) - - continue - - target_model_id = target_model.get('unique_id') - - node.depends_on['nodes'].append(target_model_id) - manifest.nodes[node['unique_id']] = node + def process_refs_for_node(cls, manifest, current_project, node): + """Given a manifest and a node in that manifest, process its refs""" + target_model = None + target_model_name = None + target_model_package = None + + for ref in node.refs: + if len(ref) == 1: + target_model_name = ref[0] + elif len(ref) == 2: + target_model_package, target_model_name = ref + + target_model = cls.resolve_ref( + manifest, + target_model_name, + target_model_package, + current_project, + node.get('package_name')) + + if target_model is None or target_model is cls.DISABLED: + # This may raise. Even if it doesn't, we don't want to add + # this node to the graph b/c there is no destination node + node.config['enabled'] = False + dbt.utils.invalid_ref_fail_unless_test( + node, target_model_name, target_model_package, + disabled=(target_model is cls.DISABLED) + ) + + continue + + target_model_id = target_model.get('unique_id') + + node.depends_on['nodes'].append(target_model_id) + manifest.nodes[node['unique_id']] = node + @classmethod + def process_refs(cls, manifest, current_project): + for node in manifest.nodes.values(): + cls.process_refs_for_node(manifest, current_project, node) return manifest @classmethod - def process_sources(cls, manifest, current_project): - for _, node in manifest.nodes.items(): - target_source = None - for source_name, table_name in node.sources: - target_source = cls.resolve_source( - manifest, + def process_sources_for_node(cls, manifest, current_project, node): + target_source = None + for source_name, table_name in node.sources: + target_source = cls.resolve_source( + manifest, + source_name, + table_name, + current_project, + node.get('package_name')) + + if target_source is None: + # this folows the same pattern as refs + node.config['enabled'] = False + dbt.utils.invalid_source_fail_unless_test( + node, source_name, - table_name, - current_project, - node.get('package_name')) - - if target_source is None: - # this folows the same pattern as refs - node.config['enabled'] = False - dbt.utils.invalid_source_fail_unless_test( - node, - source_name, - table_name) - continue - target_source_id = target_source.unique_id - node.depends_on['nodes'].append(target_source_id) - manifest.nodes[node['unique_id']] = node + table_name) + continue + target_source_id = target_source.unique_id + node.depends_on['nodes'].append(target_source_id) + manifest.nodes[node['unique_id']] = node + + @classmethod + def process_sources(cls, manifest, current_project): + for node in manifest.nodes.values(): + cls.process_sources_for_node(manifest, current_project, node) + return manifest + + @classmethod + def add_new_refs(cls, manifest, current_project, node, macros): + """Given a new node that is not in the manifest, copy the manifest and + insert the new node into it as if it were part of regular ref + processing + """ + manifest = manifest.deepcopy(config=current_project) + # it's ok for macros to silently override a local project macro name + manifest.macros.update(macros) + + if node.unique_id in manifest.nodes: + # this should be _impossible_ due to the fact that rpc calls get + # a unique ID that starts with 'rpc'! + raise dbt.exceptions.raise_duplicate_resource_name( + manifest.nodes[node.unique_id], node + ) + manifest.nodes[node.unique_id] = node + cls.process_sources_for_node(manifest, current_project, node) + cls.process_refs_for_node(manifest, current_project, node) + cls.process_docs_for_node(manifest, current_project, node) return manifest diff --git a/core/dbt/rpc.py b/core/dbt/rpc.py new file mode 100644 index 00000000000..9335fc3051e --- /dev/null +++ b/core/dbt/rpc.py @@ -0,0 +1,395 @@ +from jsonrpc.exceptions import \ + JSONRPCDispatchException, \ + JSONRPCInvalidParams, \ + JSONRPCParseError, \ + JSONRPCInvalidRequestException, \ + JSONRPCInvalidRequest +from jsonrpc import JSONRPCResponseManager +from jsonrpc.jsonrpc import JSONRPCRequest +from jsonrpc.jsonrpc2 import JSONRPC20Response + +import json +import uuid +import multiprocessing +import os +import signal +import time +from collections import namedtuple + +from dbt.logger import RPC_LOGGER as logger +from dbt.logger import add_queue_handler +from dbt.compat import QueueEmpty +import dbt.exceptions + + +class RPCException(JSONRPCDispatchException): + def __init__(self, code=None, message=None, data=None, logs=None): + if code is None: + code = -32000 + if message is None: + message = 'Server error' + if data is None: + data = {} + + super(RPCException, self).__init__(code=code, message=message, + data=data) + self.logs = logs + + def __str__(self): + return ( + 'RPCException({0.code}, {0.message}, {0.data}, {1.logs})' + .format(self.error, self) + ) + + @property + def logs(self): + return self.error.data.get('logs') + + @logs.setter + def logs(self, value): + if value is None: + return + self.error.data['logs'] = value + + @classmethod + def from_error(cls, err): + return cls(err.code, err.message, err.data, err.data.get('logs')) + + +def invalid_params(data): + return RPCException( + code=JSONRPCInvalidParams.CODE, + message=JSONRPCInvalidParams.MESSAGE, + data=data + ) + + +def server_error(err, logs=None): + exc = dbt.exceptions.Exception(str(err)) + return dbt_error(exc, logs) + + +def timeout_error(timeout_value, logs=None): + exc = dbt.exceptions.RPCTimeoutException(timeout_value) + return dbt_error(exc, logs) + + +def dbt_error(exc, logs=None): + exc = RPCException(code=exc.CODE, message=exc.MESSAGE, data=exc.data(), + logs=logs) + return exc + + +class QueueMessageType(object): + Error = 'error' + Result = 'result' + Log = 'log' + + @classmethod + def terminating(cls): + return [ + cls.Error, + cls.Result + ] + + +def sigterm_handler(signum, frame): + raise dbt.exceptions.RPCKilledException(signum) + + +class RequestDispatcher(object): + """A special dispatcher that knows about requests.""" + def __init__(self, http_request, json_rpc_request, manager): + self.http_request = http_request + self.json_rpc_request = json_rpc_request + self.manager = manager + self.task = None + + def rpc_factory(self, task): + request_handler = RequestTaskHandler(task, + self.http_request, + self.json_rpc_request) + + def rpc_func(**kwargs): + try: + self.manager.add_request(request_handler) + return request_handler.handle(kwargs) + finally: + self.manager.mark_done(request_handler) + + return rpc_func + + def __getitem__(self, key): + # the dispatcher's keys are method names and its values are functions + # that implement the RPC calls + func = self.manager.rpc_builtin(key) + if func is not None: + return func + + task = self.manager.rpc_task(key) + return self.rpc_factory(task) + + +class RequestTaskHandler(object): + def __init__(self, task, http_request, json_rpc_request): + self.task = task + self.http_request = http_request + self.json_rpc_request = json_rpc_request + self.queue = None + self.process = None + self.started = None + self.timeout = None + self.logs = [] + self.task_id = uuid.uuid4() + + @property + def request_source(self): + return self.http_request.remote_addr + + @property + def request_id(self): + return self.json_rpc_request._id + + @property + def method(self): + return self.task.METHOD_NAME + + def _next_timeout(self): + if self.timeout is None: + return None + end = self.started + self.timeout + timeout = end - time.time() + if timeout < 0: + raise dbt.exceptions.RPCTimeoutException(self.timeout) + return timeout + + def _wait_for_results(self): + """Wait for results off the queue. If there is a timeout set, and it is + exceeded, raise an RPCTimeoutException. + """ + while True: + get_timeout = self._next_timeout() + try: + msgtype, value = self.queue.get(timeout=get_timeout) + except QueueEmpty: + raise dbt.exceptions.RPCTimeoutException(self.timeout) + + if msgtype == QueueMessageType.Log: + self.logs.append(value) + elif msgtype in QueueMessageType.terminating(): + return msgtype, value + else: + raise dbt.exceptions.InternalException( + 'Got invalid queue message type {}'.format(msgtype) + ) + + def _join_process(self): + try: + msgtype, result = self._wait_for_results() + except dbt.exceptions.RPCTimeoutException: + self.process.terminate() + raise timeout_error(self.timeout) + except dbt.exceptions.Exception as exc: + raise dbt_error(exc) + except Exception as exc: + raise server_error(exc) + finally: + self.process.join() + + if msgtype == QueueMessageType.Error: + raise RPCException.from_error(result) + + return result + + def get_result(self): + try: + result = self._join_process() + except RPCException as exc: + exc.logs = self.logs + raise + + result['logs'] = self.logs + return result + + def task_bootstrap(self, kwargs): + signal.signal(signal.SIGTERM, sigterm_handler) + # the first thing we do in a new process: start logging + add_queue_handler(self.queue) + + error = None + result = None + try: + result = self.task.handle_request(**kwargs) + except RPCException as exc: + error = exc + except dbt.exceptions.RPCKilledException as exc: + # do NOT log anything here, you risk triggering a deadlock on the + # queue handler we inserted above + error = dbt_error(exc) + except dbt.exceptions.Exception as exc: + logger.debug('dbt runtime exception', exc_info=True) + error = dbt_error(exc) + except Exception as exc: + logger.debug('uncaught python exception', exc_info=True) + error = server_error(exc) + + # put whatever result we got onto the queue as well. + if error is not None: + self.queue.put([QueueMessageType.Error, error.error]) + else: + self.queue.put([QueueMessageType.Result, result]) + + def handle(self, kwargs): + self.started = time.time() + self.timeout = kwargs.pop('timeout', None) + self.queue = multiprocessing.Queue() + self.process = multiprocessing.Process( + target=self.task_bootstrap, + args=(kwargs,) + ) + self.process.start() + return self.get_result() + + @property + def state(self): + if self.started is None: + return 'not started' + elif self.process is None: + return 'initializing' + elif self.process.is_alive(): + return 'running' + else: + return 'finished' + + +TaskRow = namedtuple( + 'TaskRow', + 'task_id request_id request_source method state start elapsed timeout' +) + + +class TaskManager(object): + def __init__(self): + self.tasks = {} + self.completed = {} + self._rpc_task_map = {} + self._rpc_function_map = {} + self._lock = multiprocessing.Lock() + + def add_request(self, request_handler): + self.tasks[request_handler.task_id] = request_handler + + def add_task_handler(self, task): + self._rpc_task_map[task.METHOD_NAME] = task + + def rpc_task(self, method_name): + return self._rpc_task_map[method_name] + + def process_listing(self, active=True, completed=False): + included_tasks = {} + with self._lock: + if completed: + included_tasks.update(self.completed) + if active: + included_tasks.update(self.tasks) + + table = [] + now = time.time() + for task_handler in included_tasks.values(): + start = task_handler.started + if start is not None: + elapsed = now - start + + table.append(TaskRow( + str(task_handler.task_id), task_handler.request_id, + task_handler.request_source, task_handler.method, + task_handler.state, start, elapsed, task_handler.timeout + )) + table.sort(key=lambda r: (r.state, r.start)) + result = { + 'rows': [dict(r._asdict()) for r in table], + } + return result + + def process_kill(self, task_id): + # TODO: this result design is terrible + result = { + 'found': False, + 'started': False, + 'finished': False, + 'killed': False + } + task_id = uuid.UUID(task_id) + try: + task = self.tasks[task_id] + except KeyError: + # nothing to do! + return result + + result['found'] = True + + if task.process is None: + return result + pid = task.process.pid + if pid is None: + return result + + result['started'] = True + + if task.process.is_alive(): + os.kill(pid, signal.SIGINT) + result['killed'] = True + return result + + result['finished'] = True + return result + + def rpc_builtin(self, method_name): + if method_name == 'ps': + return self.process_listing + if method_name == 'kill': + return self.process_kill + return None + + def mark_done(self, request_handler): + task_id = request_handler.task_id + with self._lock: + if task_id not in self.tasks: + # lost a task! Maybe it was killed before it started. + return + self.completed[task_id] = self.tasks.pop(task_id) + + def methods(self): + rpc_builtin_methods = ['ps', 'kill'] + return list(self._rpc_task_map) + rpc_builtin_methods + + +class ResponseManager(JSONRPCResponseManager): + """Override the default response manager to handle request metadata and + track in-flight tasks. + """ + @classmethod + def handle(cls, http_request, task_manager): + # pretty much just copy+pasted from the original, with slight tweaks to + # preserve the request + request_str = http_request.data + if isinstance(request_str, bytes): + request_str = request_str.decode("utf-8") + + try: + data = json.loads(request_str) + except (TypeError, ValueError): + return JSONRPC20Response(error=JSONRPCParseError()._data) + + try: + request = JSONRPCRequest.from_data(data) + except JSONRPCInvalidRequestException: + return JSONRPC20Response(error=JSONRPCInvalidRequest()._data) + + dispatcher = RequestDispatcher( + http_request, + request, + task_manager + ) + + return cls.handle_request(request, dispatcher) diff --git a/core/dbt/schema.py b/core/dbt/schema.py deleted file mode 100644 index f46e89e9859..00000000000 --- a/core/dbt/schema.py +++ /dev/null @@ -1,177 +0,0 @@ -from dbt.logger import GLOBAL_LOGGER as logger # noqa -import dbt.exceptions - - -class Column(object): - TYPE_LABELS = { - 'STRING': 'TEXT', - 'TIMESTAMP': 'TIMESTAMP', - 'FLOAT': 'FLOAT', - 'INTEGER': 'INT' - } - - def __init__(self, column, dtype, char_size=None, numeric_precision=None, - numeric_scale=None): - self.column = column - self.dtype = dtype - self.char_size = char_size - self.numeric_precision = numeric_precision - self.numeric_scale = numeric_scale - - @classmethod - def translate_type(cls, dtype): - return cls.TYPE_LABELS.get(dtype.upper(), dtype) - - @classmethod - def create(cls, name, label_or_dtype): - column_type = cls.translate_type(label_or_dtype) - return cls(name, column_type) - - @property - def name(self): - return self.column - - @property - def quoted(self): - return '"{}"'.format(self.column) - - @property - def data_type(self): - if self.is_string(): - return Column.string_type(self.string_size()) - elif self.is_numeric(): - return Column.numeric_type(self.dtype, self.numeric_precision, - self.numeric_scale) - else: - return self.dtype - - def is_string(self): - return self.dtype.lower() in ['text', 'character varying', 'character', - 'varchar'] - - def is_numeric(self): - return self.dtype.lower() in ['numeric', 'number'] - - def string_size(self): - if not self.is_string(): - raise RuntimeError("Called string_size() on non-string field!") - - if self.dtype == 'text' or self.char_size is None: - # char_size should never be None. Handle it reasonably just in case - return 255 - else: - return int(self.char_size) - - def can_expand_to(self, other_column): - """returns True if this column can be expanded to the size of the - other column""" - if not self.is_string() or not other_column.is_string(): - return False - - return other_column.string_size() > self.string_size() - - def literal(self, value): - return "{}::{}".format(value, self.data_type) - - @classmethod - def string_type(cls, size): - return "character varying({})".format(size) - - @classmethod - def numeric_type(cls, dtype, precision, scale): - # This could be decimal(...), numeric(...), number(...) - # Just use whatever was fed in here -- don't try to get too clever - if precision is None or scale is None: - return dtype - else: - return "{}({},{})".format(dtype, precision, scale) - - def __repr__(self): - return "".format(self.name, self.data_type) - - -class BigQueryColumn(Column): - TYPE_LABELS = { - 'STRING': 'STRING', - 'TIMESTAMP': 'TIMESTAMP', - 'FLOAT': 'FLOAT64', - 'INTEGER': 'INT64', - 'RECORD': 'RECORD', - } - - def __init__(self, column, dtype, fields=None, mode='NULLABLE'): - super(BigQueryColumn, self).__init__(column, dtype) - - if fields is None: - fields = [] - - self.fields = self.wrap_subfields(fields) - self.mode = mode - - @classmethod - def wrap_subfields(cls, fields): - return [BigQueryColumn.create_from_field(field) for field in fields] - - @classmethod - def create_from_field(cls, field): - return BigQueryColumn(field.name, cls.translate_type(field.field_type), - field.fields, field.mode) - - @classmethod - def _flatten_recursive(cls, col, prefix=None): - if prefix is None: - prefix = [] - - if len(col.fields) == 0: - prefixed_name = ".".join(prefix + [col.column]) - new_col = BigQueryColumn(prefixed_name, col.dtype, col.fields, - col.mode) - return [new_col] - - new_fields = [] - for field in col.fields: - new_prefix = prefix + [col.column] - new_fields.extend(cls._flatten_recursive(field, new_prefix)) - - return new_fields - - def flatten(self): - return self._flatten_recursive(self) - - @property - def quoted(self): - return '`{}`'.format(self.column) - - def literal(self, value): - return "cast({} as {})".format(value, self.dtype) - - @property - def data_type(self): - if self.dtype.upper() == 'RECORD': - subcols = [ - "{} {}".format(col.name, col.data_type) for col in self.fields - ] - field_type = 'STRUCT<{}>'.format(", ".join(subcols)) - - else: - field_type = self.dtype - - if self.mode.upper() == 'REPEATED': - return 'ARRAY<{}>'.format(field_type) - - else: - return field_type - - def is_string(self): - return self.dtype.lower() == 'string' - - def is_numeric(self): - return False - - def can_expand_to(self, other_column): - """returns True if both columns are strings""" - return self.is_string() and other_column.is_string() - - def __repr__(self): - return "".format(self.name, self.data_type, - self.mode) diff --git a/core/dbt/semver.py b/core/dbt/semver.py index a04c1f8b46a..e00c9ea3168 100644 --- a/core/dbt/semver.py +++ b/core/dbt/semver.py @@ -30,7 +30,8 @@ alpha_no_leading_zeros=_ALPHA_NO_LEADING_ZEROS, alpha=_ALPHA) -_VERSION_REGEX = re.compile(r""" + +_VERSION_REGEX_PAT_STR = r""" ^ {matchers} {base_version_regex} @@ -39,8 +40,9 @@ """.format( matchers=_MATCHERS, base_version_regex=_BASE_VERSION_REGEX, - version_extra_regex=_VERSION_EXTRA_REGEX), - re.VERBOSE) + version_extra_regex=_VERSION_EXTRA_REGEX) + +_VERSION_REGEX = re.compile(_VERSION_REGEX_PAT_STR, re.VERBOSE) class Matchers: @@ -356,7 +358,7 @@ def reduce_versions(*args): for version_specifier in version_specifiers: to_return = to_return.reduce(version_specifier.to_range()) - except VersionsNotCompatibleException as e: + except VersionsNotCompatibleException: raise VersionsNotCompatibleException( 'Could not find a satisfactory version from options: {}' .format([str(a) for a in args])) @@ -371,7 +373,7 @@ def versions_compatible(*args): try: reduce_versions(*args) return True - except VersionsNotCompatibleException as e: + except VersionsNotCompatibleException: return False diff --git a/core/dbt/ssh_forward.py b/core/dbt/ssh_forward.py deleted file mode 100644 index 0ff32097998..00000000000 --- a/core/dbt/ssh_forward.py +++ /dev/null @@ -1,10 +0,0 @@ -import logging - -# modules are only imported once -- make sure that we don't have > 1 -# because subsequent tunnels will block waiting to acquire the port - -server = None - - -def get_or_create_tunnel(host, port, user, remote_host, remote_port, timeout): - pass diff --git a/core/dbt/task/archive.py b/core/dbt/task/archive.py index 1a90b615874..0cfbd82aa5c 100644 --- a/core/dbt/task/archive.py +++ b/core/dbt/task/archive.py @@ -9,9 +9,10 @@ def raise_on_first_error(self): def build_query(self): return { - 'include': ['*'], - 'exclude': [], - 'resource_types': [NodeType.Archive] + "include": self.args.models, + "exclude": self.args.exclude, + "resource_types": [NodeType.Archive], + "tags": [], } def get_runner_type(self): diff --git a/core/dbt/task/base.py b/core/dbt/task/base.py new file mode 100644 index 00000000000..6a7a6b2be7e --- /dev/null +++ b/core/dbt/task/base.py @@ -0,0 +1,127 @@ +from abc import ABCMeta, abstractmethod +import os + +import six + +from dbt.config import RuntimeConfig, Project +from dbt.config.profile import read_profile, PROFILES_DIR +from dbt import tracking +from dbt.logger import GLOBAL_LOGGER as logger +import dbt.exceptions + + +class NoneConfig(object): + @classmethod + def from_args(cls, args): + return None + + +def read_profiles(profiles_dir=None): + """This is only used for some error handling""" + if profiles_dir is None: + profiles_dir = PROFILES_DIR + + raw_profiles = read_profile(profiles_dir) + + if raw_profiles is None: + profiles = {} + else: + profiles = {k: v for (k, v) in raw_profiles.items() if k != 'config'} + + return profiles + + +PROFILES_HELP_MESSAGE = """ +For more information on configuring profiles, please consult the dbt docs: + +https://docs.getdbt.com/docs/configure-your-profile +""" + + +@six.add_metaclass(ABCMeta) +class BaseTask(object): + ConfigType = NoneConfig + + def __init__(self, args, config): + self.args = args + self.config = config + + @classmethod + def from_args(cls, args): + try: + config = cls.ConfigType.from_args(args) + except dbt.exceptions.DbtProjectError as exc: + logger.error("Encountered an error while reading the project:") + logger.error(" ERROR: {}".format(str(exc))) + + tracking.track_invalid_invocation( + args=args, + result_type=exc.result_type) + raise dbt.exceptions.RuntimeException('Could not run dbt') + except dbt.exceptions.DbtProfileError as exc: + logger.error("Encountered an error while reading profiles:") + logger.error(" ERROR {}".format(str(exc))) + + all_profiles = read_profiles(args.profiles_dir).keys() + + if len(all_profiles) > 0: + logger.info("Defined profiles:") + for profile in all_profiles: + logger.info(" - {}".format(profile)) + else: + logger.info("There are no profiles defined in your " + "profiles.yml file") + + logger.info(PROFILES_HELP_MESSAGE) + + tracking.track_invalid_invocation( + args=args, + result_type=exc.result_type) + raise dbt.exceptions.RuntimeException('Could not run dbt') + return cls(args, config) + + @abstractmethod + def run(self): + raise dbt.exceptions.NotImplementedException('Not Implemented') + + def interpret_results(self, results): + return True + + +def get_nearest_project_dir(): + root_path = os.path.abspath(os.sep) + cwd = os.getcwd() + + while cwd != root_path: + project_file = os.path.join(cwd, "dbt_project.yml") + if os.path.exists(project_file): + return cwd + cwd = os.path.dirname(cwd) + + return None + + +def move_to_nearest_project_dir(): + nearest_project_dir = get_nearest_project_dir() + if nearest_project_dir is None: + raise dbt.exceptions.RuntimeException( + "fatal: Not a dbt project (or any of the parent directories). " + "Missing dbt_project.yml file" + ) + + os.chdir(nearest_project_dir) + + +class RequiresProjectTask(BaseTask): + @classmethod + def from_args(cls, args): + move_to_nearest_project_dir() + return super(RequiresProjectTask, cls).from_args(args) + + +class ConfiguredTask(RequiresProjectTask): + ConfigType = RuntimeConfig + + +class ProjectOnlyTask(RequiresProjectTask): + ConfigType = Project diff --git a/core/dbt/task/base_task.py b/core/dbt/task/base_task.py deleted file mode 100644 index db8cedbff45..00000000000 --- a/core/dbt/task/base_task.py +++ /dev/null @@ -1,13 +0,0 @@ -import dbt.exceptions - - -class BaseTask(object): - def __init__(self, args, config=None): - self.args = args - self.config = config - - def run(self): - raise dbt.exceptions.NotImplementedException('Not Implemented') - - def interpret_results(self, results): - return True diff --git a/core/dbt/task/clean.py b/core/dbt/task/clean.py index f7b524057b8..ab0ef081b10 100644 --- a/core/dbt/task/clean.py +++ b/core/dbt/task/clean.py @@ -2,10 +2,10 @@ import os import shutil -from dbt.task.base_task import BaseTask +from dbt.task.base import ProjectOnlyTask -class CleanTask(BaseTask): +class CleanTask(ProjectOnlyTask): def __is_project_path(self, path): proj_path = os.path.abspath('.') diff --git a/core/dbt/task/compile.py b/core/dbt/task/compile.py index ac7f49ec2c8..64cbefc9daf 100644 --- a/core/dbt/task/compile.py +++ b/core/dbt/task/compile.py @@ -1,11 +1,24 @@ -from dbt.node_runners import CompileRunner +import os +import signal +import threading + +from dbt.adapters.factory import get_adapter +from dbt.clients.jinja import extract_toplevel_blocks +from dbt.compilation import compile_manifest +from dbt.loader import load_all_projects +from dbt.node_runners import CompileRunner, RPCCompileRunner from dbt.node_types import NodeType +from dbt.parser.analysis import RPCCallParser +from dbt.parser.macros import MacroParser +from dbt.parser.util import ParserUtils import dbt.ui.printer +from dbt.logger import RPC_LOGGER as rpc_logger + +from dbt.task.runnable import GraphRunnableTask, RemoteCallable -from dbt.task.runnable import RunnableTask +class CompileTask(GraphRunnableTask): -class CompileTask(RunnableTask): def raise_on_first_error(self): return True @@ -22,3 +35,129 @@ def get_runner_type(self): def task_end_messages(self, results): dbt.ui.printer.print_timestamped_line('Done.') + + +class RemoteCompileTask(CompileTask, RemoteCallable): + METHOD_NAME = 'compile' + + def __init__(self, args, config, manifest): + super(RemoteCompileTask, self).__init__(args, config) + self._base_manifest = manifest.deepcopy(config=config) + + def get_runner_type(self): + return RPCCompileRunner + + def runtime_cleanup(self, selected_uids): + """Do some pre-run cleanup that is usually performed in Task __init__. + """ + self.run_count = 0 + self.num_nodes = len(selected_uids) + self.node_results = [] + self._skipped_children = {} + self._skipped_children = {} + self._raise_next_tick = None + + def _extract_request_data(self, data): + data = self.decode_sql(data) + macro_blocks = [] + data_chunks = [] + for block in extract_toplevel_blocks(data): + if block.block_type_name == 'macro': + macro_blocks.append(block.full_block) + else: + data_chunks.append(block.full_block) + macros = '\n'.join(macro_blocks) + sql = ''.join(data_chunks) + return sql, macros + + def _get_exec_node(self, name, sql, macros): + request_path = os.path.join(self.config.target_path, 'rpc', name) + all_projects = load_all_projects(self.config) + macro_overrides = {} + sql, macros = self._extract_request_data(sql) + + if macros: + macro_parser = MacroParser(self.config, all_projects) + macro_overrides.update(macro_parser.parse_macro_file( + macro_file_path='from remote system', + macro_file_contents=macros, + root_path=request_path, + package_name=self.config.project_name, + resource_type=NodeType.Macro + )) + + self._base_manifest.macros.update(macro_overrides) + rpc_parser = RPCCallParser( + self.config, + all_projects=all_projects, + macro_manifest=self._base_manifest + ) + + node_dict = { + 'name': name, + 'root_path': request_path, + 'resource_type': NodeType.RPCCall, + 'path': name + '.sql', + 'original_file_path': 'from remote system', + 'package_name': self.config.project_name, + 'raw_sql': sql, + } + + unique_id, node = rpc_parser.parse_sql_node(node_dict) + self.manifest = ParserUtils.add_new_refs( + manifest=self._base_manifest, + current_project=self.config, + node=node, + macros=macro_overrides + ) + + # don't write our new, weird manifest! + self.linker = compile_manifest(self.config, self.manifest, write=False) + return node + + def _raise_set_error(self): + if self._raise_next_tick is not None: + raise self._raise_next_tick + + def _in_thread(self, node, thread_done): + runner = self.get_runner(node) + try: + self.node_results.append(runner.safe_run(self.manifest)) + except Exception as exc: + self._raise_next_tick = exc + finally: + thread_done.set() + + def handle_request(self, name, sql, macros=None): + # we could get a ctrl+c at any time, including during parsing. + thread = None + try: + node = self._get_exec_node(name, sql, macros) + + selected_uids = [node.unique_id] + self.runtime_cleanup(selected_uids) + + thread_done = threading.Event() + thread = threading.Thread(target=self._in_thread, + args=(node, thread_done)) + thread.start() + thread_done.wait() + except KeyboardInterrupt: + adapter = get_adapter(self.config) + if adapter.is_cancelable(): + + for conn_name in adapter.cancel_open_connections(): + rpc_logger.debug('canceled query {}'.format(conn_name)) + if thread: + thread.join() + else: + msg = ("The {} adapter does not support query " + "cancellation. Some queries may still be " + "running!".format(adapter.type())) + + rpc_logger.debug(msg) + + raise dbt.exceptions.RPCKilledException(signal.SIGINT) + + self._raise_set_error() + return self.node_results[0].serialize() diff --git a/core/dbt/task/debug.py b/core/dbt/task/debug.py index 6a141cd7b3c..1399effe7c4 100644 --- a/core/dbt/task/debug.py +++ b/core/dbt/task/debug.py @@ -1,7 +1,6 @@ # coding=utf-8 import os import platform -import pprint import sys from dbt.logger import GLOBAL_LOGGER as logger @@ -16,7 +15,7 @@ from dbt.clients.yaml_helper import load_yaml_text from dbt.ui.printer import green, red -from dbt.task.base_task import BaseTask +from dbt.task.base import BaseTask PROFILE_DIR_MESSAGE = """To view your profiles.yml file, run: @@ -59,7 +58,7 @@ class DebugTask(BaseTask): - def __init__(self, args, config=None): + def __init__(self, args, config): super(DebugTask, self).__init__(args, config) self.profiles_dir = getattr(self.args, 'profiles_dir', dbt.config.PROFILES_DIR) @@ -210,8 +209,7 @@ def _load_profile(self): self.profile_name = self._choose_profile_name() self.target_name = self._choose_target_name() try: - self.profile = Profile.from_args(self.args, self.profile_name, - self.cli_vars) + self.profile = Profile.from_args(self.args, self.profile_name) except dbt.exceptions.DbtConfigError as exc: self.profile_fail_details = str(exc) return red('ERROR invalid') diff --git a/core/dbt/task/deps.py b/core/dbt/task/deps.py index 2fe91a8af66..3e282c25d7b 100644 --- a/core/dbt/task/deps.py +++ b/core/dbt/task/deps.py @@ -21,7 +21,7 @@ GIT_PACKAGE_CONTRACT, REGISTRY_PACKAGE_CONTRACT, \ REGISTRY_PACKAGE_METADATA_CONTRACT, PackageConfig -from dbt.task.base_task import BaseTask +from dbt.task.base import ProjectOnlyTask DOWNLOADS_PATH = None REMOVE_DOWNLOADS = False @@ -440,7 +440,7 @@ def _read_packages(project_yaml): return packages -class DepsTask(BaseTask): +class DepsTask(ProjectOnlyTask): def __init__(self, args, config=None): super(DepsTask, self).__init__(args=args, config=config) self._downloads_path = None diff --git a/core/dbt/task/freshness.py b/core/dbt/task/freshness.py index b3956daa917..75bbd4f5ba9 100644 --- a/core/dbt/task/freshness.py +++ b/core/dbt/task/freshness.py @@ -1,5 +1,5 @@ import os -from dbt.task.runnable import BaseRunnableTask +from dbt.task.runnable import GraphRunnableTask from dbt.node_runners import FreshnessRunner from dbt.node_types import NodeType from dbt.ui.printer import print_timestamped_line, print_run_result_error @@ -8,7 +8,7 @@ RESULT_FILE_NAME = 'sources.json' -class FreshnessTask(BaseRunnableTask): +class FreshnessTask(GraphRunnableTask): def result_path(self): if self.args.output: return os.path.realpath(self.args.output) diff --git a/core/dbt/task/generate.py b/core/dbt/task/generate.py index 58b2238a02a..db7c91504f6 100644 --- a/core/dbt/task/generate.py +++ b/core/dbt/task/generate.py @@ -1,4 +1,3 @@ -import json import os import shutil @@ -202,11 +201,12 @@ def run(self): DOCS_INDEX_FILE_PATH, os.path.join(self.config.target_path, 'index.html')) - manifest = self._get_manifest() adapter = get_adapter(self.config) + with adapter.connection_named('generate_catalog'): + manifest = self._get_manifest() - dbt.ui.printer.print_timestamped_line("Building catalog") - results = adapter.get_catalog(manifest) + dbt.ui.printer.print_timestamped_line("Building catalog") + results = adapter.get_catalog(manifest) results = [ dict(zip(results.column_names, row)) diff --git a/core/dbt/task/init.py b/core/dbt/task/init.py index 9f8569b9481..c3184c60fa3 100644 --- a/core/dbt/task/init.py +++ b/core/dbt/task/init.py @@ -6,11 +6,11 @@ from dbt.logger import GLOBAL_LOGGER as logger -from dbt.task.base_task import BaseTask +from dbt.task.base import BaseTask STARTER_REPO = 'https://github.com/fishtown-analytics/dbt-starter-project.git' DOCS_URL = 'https://docs.getdbt.com/docs/configure-your-profile' -SAMPLE_PROFILES_YML_FILE = 'https://github.com/fishtown-analytics/dbt/blob/master/sample.profiles.yml' # noqa +SAMPLE_PROFILES_YML_FILE = 'https://docs.getdbt.com/reference#profile' # noqa ON_COMPLETE_MESSAGE = """ Your new dbt project "{project_name}" was created! If this is your first time diff --git a/core/dbt/task/rpc_server.py b/core/dbt/task/rpc_server.py new file mode 100644 index 00000000000..0dfdfa4c277 --- /dev/null +++ b/core/dbt/task/rpc_server.py @@ -0,0 +1,83 @@ +import json + +from werkzeug.wsgi import DispatcherMiddleware +from werkzeug.wrappers import Request, Response +from werkzeug.serving import run_simple +from werkzeug.exceptions import NotFound + +from dbt.logger import RPC_LOGGER as logger +from dbt.task.base import ConfiguredTask +from dbt.task.compile import CompileTask, RemoteCompileTask +from dbt.task.run import RemoteRunTask +from dbt.utils import JSONEncoder +from dbt import rpc + + +class RPCServerTask(ConfiguredTask): + def __init__(self, args, config, tasks=None): + super(RPCServerTask, self).__init__(args, config) + # compile locally + self.manifest = self._compile_manifest() + self.task_manager = rpc.TaskManager() + tasks = tasks or [RemoteCompileTask, RemoteRunTask] + for cls in tasks: + task = cls(args, config, self.manifest) + self.task_manager.add_task_handler(task) + + def _compile_manifest(self): + compile_task = CompileTask(self.args, self.config) + compile_task.run() + return compile_task.manifest + + def run(self): + host = self.args.host + port = self.args.port + addr = (host, port) + + display_host = host + if host == '0.0.0.0': + display_host = 'localhost' + + logger.info( + 'Serving RPC server at {}:{}'.format(*addr) + ) + + logger.info( + 'Supported methods: {}'.format(self.task_manager.methods()) + ) + + logger.info( + 'Send requests to http://{}:{}/jsonrpc'.format(display_host, port) + ) + + app = self.handle_request + app = DispatcherMiddleware(app, { + '/jsonrpc': self.handle_jsonrpc_request, + }) + + # we have to run in threaded mode if we want to share subprocess + # handles, which is the easiest way to implement `kill` (it makes `ps` + # easier as well). The alternative involves tracking metadata+state in + # a multiprocessing.Manager, adds polling the manager to the request + # task handler and in general gets messy fast. + run_simple(host, port, app, threaded=True) + + @Request.application + def handle_jsonrpc_request(self, request): + msg = 'Received request ({0}) from {0.remote_addr}, data={0.data}' + logger.info(msg.format(request)) + response = rpc.ResponseManager.handle(request, self.task_manager) + json_data = json.dumps(response.data, cls=JSONEncoder) + response = Response(json_data, mimetype='application/json') + # this looks and feels dumb, but our json encoder converts decimals and + # datetimes, and if we use the json_data itself the output looks silly + # because of escapes, so re-serialize it into valid JSON types for + # logging. + logger.info('sending response ({}) to {}, data={}'.format( + response, request.remote_addr, json.loads(json_data)) + ) + return response + + @Request.application + def handle_request(self, request): + raise NotFound() diff --git a/core/dbt/task/run.py b/core/dbt/task/run.py index 7c9a7c6b418..3e7903a3736 100644 --- a/core/dbt/task/run.py +++ b/core/dbt/task/run.py @@ -2,7 +2,7 @@ from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType, RunHookType -from dbt.node_runners import ModelRunner +from dbt.node_runners import ModelRunner, RPCExecuteRunner import dbt.exceptions import dbt.flags @@ -11,7 +11,7 @@ from dbt.hooks import get_hook_dict from dbt.compilation import compile_node -from dbt.task.compile import CompileTask +from dbt.task.compile import CompileTask, RemoteCompileTask from dbt.utils import get_nodes_by_tags @@ -29,19 +29,14 @@ def run_hooks(self, adapter, hook_type, extra_context): ordered_hooks = sorted(hooks, key=lambda h: h.get('index', len(hooks))) + # on-run-* hooks should run outside of a transaction. This happens + # b/c psycopg2 automatically begins a transaction when a connection + # is created. + adapter.clear_transaction() + for i, hook in enumerate(ordered_hooks): - model_name = hook.get('name') - - # This will clear out an open transaction if there is one. - # on-run-* hooks should run outside of a transaction. This happens - # b/c psycopg2 automatically begins a transaction when a connection - # is created. TODO : Move transaction logic out of here, and - # implement a for-loop over these sql statements in jinja-land. - # Also, consider configuring psycopg2 (and other adapters?) to - # ensure that a transaction is only created if dbt initiates it. - adapter.clear_transaction(model_name) - compiled = compile_node(adapter, self.config, hook, self.manifest, - extra_context) + compiled = compile_node(adapter, self.config, hook, + self.manifest, extra_context) statement = compiled.wrapped_sql hook_index = hook.get('index', len(hooks)) @@ -53,10 +48,7 @@ def run_hooks(self, adapter, hook_type, extra_context): sql = hook_dict.get('sql', '') if len(sql.strip()) > 0: - adapter.execute(sql, model_name=model_name, auto_begin=False, - fetch=False) - - adapter.release_connection(model_name) + adapter.execute(sql, auto_begin=False, fetch=False) def safe_run_hooks(self, adapter, hook_type, extra_context): try: @@ -82,9 +74,10 @@ def print_results_line(cls, results, execution_time): .format(stat_line=stat_line, execution=execution)) def before_run(self, adapter, selected_uids): - self.populate_adapter_cache(adapter) - self.safe_run_hooks(adapter, RunHookType.Start, {}) - self.create_schemas(adapter, selected_uids) + with adapter.connection_named('master'): + self.create_schemas(adapter, selected_uids) + self.populate_adapter_cache(adapter) + self.safe_run_hooks(adapter, RunHookType.Start, {}) def after_run(self, adapter, results): # in on-run-end hooks, provide the value 'schemas', which is a list of @@ -94,8 +87,9 @@ def after_run(self, adapter, results): r.node.schema for r in results if not any((r.error is not None, r.failed, r.skipped)) )) - self.safe_run_hooks(adapter, RunHookType.End, - {'schemas': schemas, 'results': results}) + with adapter.connection_named('master'): + self.safe_run_hooks(adapter, RunHookType.End, + {'schemas': schemas, 'results': results}) def after_hooks(self, adapter, results, elapsed): self.print_results_line(results, elapsed) @@ -114,3 +108,10 @@ def get_runner_type(self): def task_end_messages(self, results): if results: dbt.ui.printer.print_run_end_messages(results) + + +class RemoteRunTask(RemoteCompileTask, RunTask): + METHOD_NAME = 'run' + + def get_runner_type(self): + return RPCExecuteRunner diff --git a/core/dbt/task/run_operation.py b/core/dbt/task/run_operation.py new file mode 100644 index 00000000000..fe90649d1e0 --- /dev/null +++ b/core/dbt/task/run_operation.py @@ -0,0 +1,64 @@ +from dbt.logger import GLOBAL_LOGGER as logger +from dbt.task.base import ConfiguredTask +from dbt.adapters.factory import get_adapter +from dbt.loader import GraphLoader + +import dbt +import dbt.utils +import dbt.exceptions + + +class RunOperationTask(ConfiguredTask): + def _get_macro_parts(self): + macro_name = self.args.macro + if '.' in macro_name: + package_name, macro_name = macro_name.split(".", 1) + else: + package_name = None + + return package_name, macro_name + + def _get_kwargs(self): + return dbt.utils.parse_cli_vars(self.args.args) + + def _run_unsafe(self): + manifest = GraphLoader.load_all(self.config) + adapter = get_adapter(self.config) + + package_name, macro_name = self._get_macro_parts() + macro_kwargs = self._get_kwargs() + + with adapter.connection_named('macro_{}'.format(macro_name)): + adapter.clear_transaction() + res = adapter.execute_macro( + macro_name, + project=package_name, + kwargs=macro_kwargs, + manifest=manifest + ) + + return res + + def run(self): + try: + result = self._run_unsafe() + except dbt.exceptions.Exception as exc: + logger.error( + 'Encountered an error while running operation: {}' + .format(exc) + ) + logger.debug('', exc_info=True) + return False, None + except Exception as exc: + logger.error( + 'Encountered an uncaught exception while running operation: {}' + .format(exc) + ) + logger.debug('', exc_info=True) + return False, None + else: + return True, result + + def interpret_results(self, results): + success, _ = results + return success diff --git a/core/dbt/task/runnable.py b/core/dbt/task/runnable.py index 0084bdf83b6..0b107d1819f 100644 --- a/core/dbt/task/runnable.py +++ b/core/dbt/task/runnable.py @@ -1,9 +1,15 @@ +import base64 import os +import re import time +from abc import abstractmethod +from multiprocessing.dummy import Pool as ThreadPool -from dbt.task.base_task import BaseTask +from dbt import rpc +from dbt.task.base import ConfiguredTask from dbt.adapters.factory import get_adapter from dbt.logger import GLOBAL_LOGGER as logger +from dbt.compat import to_unicode from dbt.compilation import compile_manifest from dbt.contracts.graph.manifest import CompileResultNode from dbt.contracts.results import ExecutionResult @@ -15,8 +21,6 @@ import dbt.graph.selector -from multiprocessing.dummy import Pool as ThreadPool - RESULT_FILE_NAME = 'run_results.json' MANIFEST_FILE_NAME = 'manifest.json' @@ -32,11 +36,20 @@ def load_manifest(config): return manifest -class BaseRunnableTask(BaseTask): +class ManifestTask(ConfiguredTask): def __init__(self, args, config): - super(BaseRunnableTask, self).__init__(args, config) + super(ManifestTask, self).__init__(args, config) self.manifest = None self.linker = None + + def _runtime_initialize(self): + self.manifest = load_manifest(self.config) + self.linker = compile_manifest(self.config, self.manifest) + + +class GraphRunnableTask(ManifestTask): + def __init__(self, args, config): + super(GraphRunnableTask, self).__init__(args, config) self.job_queue = None self._flattened_nodes = None @@ -46,12 +59,14 @@ def __init__(self, args, config): self._skipped_children = {} self._raise_next_tick = None - def _runtime_initialize(self): - self.manifest = load_manifest(self.config) - self.linker = compile_manifest(self.config, self.manifest) - + def select_nodes(self): selector = dbt.graph.selector.NodeSelector(self.linker, self.manifest) selected_nodes = selector.select(self.build_query()) + return selected_nodes + + def _runtime_initialize(self): + super(GraphRunnableTask, self)._runtime_initialize() + selected_nodes = self.select_nodes() self.job_queue = self.linker.as_graph_queue(self.manifest, selected_nodes) @@ -112,7 +127,7 @@ def _submit(self, pool, args, callback): This does still go through the callback path for result collection. """ - if self.config.args.single_threaded or True: + if self.config.args.single_threaded: callback(self.call_runner(*args)) else: pool.apply_async(self.call_runner, args=args, callback=callback) @@ -229,27 +244,8 @@ def after_run(self, adapter, results): def after_hooks(self, adapter, results, elapsed): pass - def task_end_messages(self, results): - raise dbt.exceptions.NotImplementedException('Not Implemented') - - def get_result(self, results, elapsed_time, generated_at): - raise dbt.exceptions.NotImplementedException('Not Implemented') - - def run(self): - """ - Run dbt for the query, based on the graph. - """ - self._runtime_initialize() + def execute_with_hooks(self, selected_uids): adapter = get_adapter(self.config) - - if len(self._flattened_nodes) == 0: - logger.info("WARNING: Nothing to do. Try checking your model " - "configs and model specification args") - return [] - else: - logger.info("") - - selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) try: self.before_hooks(adapter) started = time.time() @@ -267,10 +263,28 @@ def run(self): elapsed_time=elapsed, generated_at=dbt.utils.timestring() ) + return result + + def run(self): + """ + Run dbt for the query, based on the graph. + """ + self._runtime_initialize() + + if len(self._flattened_nodes) == 0: + logger.warning("WARNING: Nothing to do. Try checking your model " + "configs and model specification args") + return [] + else: + logger.info("") + + selected_uids = frozenset(n.unique_id for n in self._flattened_nodes) + result = self.execute_with_hooks(selected_uids) + result.write(self.result_path()) - self.task_end_messages(res) - return res + self.task_end_messages(result.results) + return result.results def interpret_results(self, results): if results is None: @@ -279,8 +293,6 @@ def interpret_results(self, results): failures = [r for r in results if r.error or r.failed] return len(failures) == 0 - -class RunnableTask(BaseRunnableTask): def get_model_schemas(self, selected_uids): schemas = set() for node in self.manifest.nodes.values(): @@ -320,3 +332,44 @@ def get_result(self, results, elapsed_time, generated_at): def task_end_messages(self, results): dbt.ui.printer.print_run_end_messages(results) + + +class RemoteCallable(object): + METHOD_NAME = None + is_async = False + + @abstractmethod + def handle_request(self, **kwargs): + raise dbt.exceptions.NotImplementedException( + 'from_kwargs not implemented' + ) + + def decode_sql(self, sql): + """Base64 decode a string. This should only be used for sql in calls. + + :param str sql: The base64 encoded form of the original utf-8 string + :return str: The decoded utf-8 string + """ + # JSON is defined as using "unicode", we'll go a step further and + # mandate utf-8 (though for the base64 part, it doesn't really matter!) + base64_sql_bytes = to_unicode(sql).encode('utf-8') + # in python3.x you can pass `validate=True` to b64decode to get this + # behavior. + if not re.match(b'^[A-Za-z0-9+/]*={0,2}$', base64_sql_bytes): + self.raise_invalid_base64(sql) + + try: + sql_bytes = base64.b64decode(base64_sql_bytes) + except ValueError: + self.raise_invalid_base64(sql) + + return sql_bytes.decode('utf-8') + + @staticmethod + def raise_invalid_base64(sql): + raise rpc.invalid_params( + data={ + 'message': 'invalid base64-encoded sql input', + 'sql': str(sql), + } + ) diff --git a/core/dbt/task/serve.py b/core/dbt/task/serve.py index d8ce756b75c..dd3af94f17b 100644 --- a/core/dbt/task/serve.py +++ b/core/dbt/task/serve.py @@ -6,10 +6,10 @@ from dbt.compat import SimpleHTTPRequestHandler, TCPServer from dbt.logger import GLOBAL_LOGGER as logger -from dbt.task.base_task import BaseTask +from dbt.task.base import ProjectOnlyTask -class ServeTask(BaseTask): +class ServeTask(ProjectOnlyTask): def run(self): os.chdir(self.config.target_path) @@ -31,7 +31,7 @@ def run(self): try: webbrowser.open_new_tab('http://127.0.0.1:{}'.format(port)) - except webbrowser.Error as e: + except webbrowser.Error: pass try: diff --git a/core/dbt/task/test.py b/core/dbt/task/test.py index 422214bb780..f20b9897924 100644 --- a/core/dbt/task/test.py +++ b/core/dbt/task/test.py @@ -1,23 +1,24 @@ from dbt.node_runners import TestRunner from dbt.node_types import NodeType -import dbt.ui.printer from dbt.task.run import RunTask class TestTask(RunTask): """ Testing: - 1) Create tmp views w/ 0 rows to ensure all tables, schemas, and SQL - statements are valid - 2) Read schema files and validate that constraints are satisfied - a) not null - b) uniquenss - c) referential integrity - d) accepted value + Read schema files + custom data tests and validate that + constraints are satisfied. """ def raise_on_first_error(self): return False + def before_run(self, adapter, selected_uids): + # Don't execute on-run-* hooks for tests + self.populate_adapter_cache(adapter) + + def after_run(self, adapter, results): + pass + def build_query(self): query = { "include": self.args.models, diff --git a/core/dbt/ui/printer.py b/core/dbt/ui/printer.py index 3f81711b38b..e92816d5b97 100644 --- a/core/dbt/ui/printer.py +++ b/core/dbt/ui/printer.py @@ -146,17 +146,11 @@ def print_test_result_line(result, schema_name, index, total): result.execution_time) -def print_model_result_line(result, schema_name, index, total): - model = result.node - +def print_model_result_line(result, description, index, total): info, status = get_printable_result(result, 'created', 'creating') print_fancy_output_line( - "{info} {model_type} model {schema}.{relation}".format( - info=info, - model_type=get_materialization(model), - schema=schema_name, - relation=model.get('alias')), + "{info} {description}".format(info=info, description=description), status, index, total, @@ -169,9 +163,10 @@ def print_archive_result_line(result, index, total): info, status = get_printable_result(result, 'archived', 'archiving') cfg = model.get('config', {}) + msg = "{info} {name} --> {target_database}.{target_schema}.{name}".format( + info=info, name=model.name, **cfg) print_fancy_output_line( - "{info} {source_schema}.{source_table} --> " - "{target_schema}.{target_table}".format(info=info, **cfg), + msg, status, index, total, diff --git a/core/dbt/utils.py b/core/dbt/utils.py index edf283bda6b..b194f0aa5db 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -1,18 +1,14 @@ -from datetime import datetime -from decimal import Decimal - import collections import copy +import datetime import functools import hashlib import itertools import json -import numbers import os import dbt.exceptions -from dbt.include.global_project import PACKAGES from dbt.compat import basestring, DECIMALS from dbt.logger import GLOBAL_LOGGER as logger from dbt.node_types import NodeType @@ -170,7 +166,7 @@ def merge(*args): return args[0] lst = list(args) - last = lst.pop(len(lst)-1) + last = lst.pop(len(lst) - 1) return _merge(merge(*lst), last) @@ -194,7 +190,7 @@ def deep_merge(*args): return copy.deepcopy(args[0]) lst = list(args) - last = copy.deepcopy(lst.pop(len(lst)-1)) + last = copy.deepcopy(lst.pop(len(lst) - 1)) return _deep_merge(deep_merge(*lst), last) @@ -424,9 +420,10 @@ def parse_cli_vars(var_string): dbt.exceptions.raise_compiler_error( "The --vars argument must be a YAML dictionary, but was " "of type '{}'".format(type_name)) - except dbt.exceptions.ValidationException as e: + except dbt.exceptions.ValidationException: logger.error( - "The YAML provided in the --vars argument is not valid.\n") + "The YAML provided in the --vars argument is not valid.\n" + ) raise @@ -442,7 +439,7 @@ def add_ephemeral_model_prefix(s): def timestring(): """Get the current datetime as an RFC 3339-compliant string""" # isoformat doesn't include the mandatory trailing 'Z' for UTC. - return datetime.utcnow().isoformat() + 'Z' + return datetime.datetime.utcnow().isoformat() + 'Z' class JSONEncoder(json.JSONEncoder): @@ -453,6 +450,9 @@ class JSONEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, DECIMALS): return float(obj) + if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): + return obj.isoformat() + return super(JSONEncoder, self).default(obj) @@ -473,8 +473,10 @@ def translate_aliases(kwargs, aliases): key_names = ', '.join("{}".format(k) for k in kwargs if aliases.get(k) == canonical_key) - raise AliasException('Got duplicate keys: ({}) all map to "{}"' - .format(key_names, canonical_key)) + raise dbt.exceptions.AliasException( + 'Got duplicate keys: ({}) all map to "{}"' + .format(key_names, canonical_key) + ) result[canonical_key] = value diff --git a/core/dbt/version.py b/core/dbt/version.py index 752926a2f59..ab46bd02992 100644 --- a/core/dbt/version.py +++ b/core/dbt/version.py @@ -1,5 +1,4 @@ import json -import re import requests @@ -57,5 +56,5 @@ def get_version_information(): .format(version_msg)) -__version__ = '0.13.0a1' +__version__ = '0.13.0' installed = get_installed_version() diff --git a/core/setup.py b/core/setup.py index b29bb8fd1fd..29714da5d23 100644 --- a/core/setup.py +++ b/core/setup.py @@ -9,7 +9,7 @@ def read(fname): package_name = "dbt-core" -package_version = "0.13.0a1" +package_version = "0.13.0" description = """dbt (data build tool) is a command line tool that helps \ analysts and engineers transform data in their warehouse more effectively""" @@ -51,6 +51,8 @@ def read(fname): 'requests>=2.18.0,<3', 'colorama==0.3.9', 'agate>=1.6,<2', - 'jsonschema==2.6.0', + 'jsonschema>=3.0.1,<4', + 'json-rpc>=1.12,<2', + 'werkzeug>=0.14.1,<0.15', ] ) diff --git a/dev_requirements.txt b/dev_requirements.txt index cc33b84fbb1..b2f7fb82fc8 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,9 +1,10 @@ freezegun==0.3.9 -nose>=1.3.7 +pytest==4.4.0 +pytest-cov==2.6.1 mock>=1.3.0 -pep8>=1.6.2 +flake8>=3.5.0 pytz==2017.2 bumpversion==0.5.3 -coverage==4.2 +coverage==4.4 tox==2.5.0 ipdb diff --git a/docker-compose.yml b/docker-compose.yml index 176c6f50837..da9b389fefe 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,7 @@ services: build: context: . dockerfile: Dockerfile - command: "/root/.virtualenvs/dbt/bin/nosetests" + command: "/root/.virtualenvs/dbt/bin/pytest" env_file: - ./test.env volumes: diff --git a/etc/dbt-horizontal.png b/etc/dbt-horizontal.png index 03dce6d02bacdca4c5eb5198a0bafbb89f0ef869..2d2549089222496ec86b65423fe0122f29a5d4d7 100644 GIT binary patch literal 8968 zcmb7q^7_;b(jb_Tj zkd~bE_xSmI|AEhbfW2Pl+&lNV;(fiZ>wPCaG0KQu^VUw^jy?P35t@LZ!)pxiR{OeW6Wwe>b!n5%(+D@|)OW`$j;_*KN3edd% z!#@hGNyLdS-xmQunI>~tM`lOCM`Xg|_F9#mvhjDF@!VoV%G$4vkkR*#<=!0InoK<_ zP8C<#{4~@_21KXCU2Dm74ZO$lN+HX#>V8&$1|To&n4PQJ_C$Wy?se5OkAHMEsB}zWMSa11<9=XpIk`}-*RLaDyAPoR6Z$_|FPL^W#Ax=mM6wu^T z!w$2DOu~O1&rV5K-HCWXoUrWK0Dx$La4NK8_JU5fX=I0!mjk(l8z+Sm69Q2Yn(J>; zN?8p6l!ID?^?NP|t3Iyb)?3jGNs7Tpk*E=dL;@QS?bFH#=gy?@FzI!$an2*vl02}X z=p)8eDUIKBfbt7CHM+@Z7qj;?pd#&xso%)ywC)jMs__f01>QW}+JuDxP`15KjgFc` z^f*m3#FO`bad-=^`xvl`mV*NL0u+>}w+jnCY%d zp+&fQVX6M-Var{euX8T)c#Dn?>Xwnz1HPrv^4d$%7nee))yHp`^r~+ffR5zjnrgvR zhjcBatOTTemrn~YDwzoziz&F)lS__1Pz|e3j+i_r99TQGv4ams_20zr*{r=Tu9m{x zztcI#Kj&yz(*EerA)kcZy`UO%Z=}Kp134&|gJ&MoqZdQ3wyp9eS130%)y zR$s7*m@{>0cqa>I!j_`$6{xLuvsr-+;h?7-fr`&pWL3f3|!gcaG!ep_(X?> z{ArO1wsW4Us{RwoJM7dlxiQ6Uo(2Gd%>{jug%%?ZjrBJem2e$42o~+70m=4;#}>bdl-h?noqr90~xTWy3!zKY5=7N#WA&XzZ^cP)^95#cy%N_2NTEA1GoN z#>E^M6_0;EA&o~!=CeCyE55L0J++q&Jpa6>Q}q4g*F%7yp-;`cxxTX{b6nMttUp<^ zt^9j|$AII#_z{CKRVwCsrudgsreoM+I1>bPi z@#gLCdIR|0^#$nO?bvqGlD6$kJ5__WyPQa%LodDOyupl9H7Paas}4SWLoIk9D7CZt zMP2ciGNIK3i~iZ>c;hJ2Hzx;-JG)OTE$mX;CLw9qZf(*iqL;b`BU885_qif*X<@T? zd!%jnd@tJ_yfby35^du)?`*v6a8dQ8+@)3{DW1c9ml zhwxvo?EMg)`>hD=)WaiDPTu?xuZR=*)GX8E(JZLCSS4yi4vw3a#{q-0gwoIK%&Cu~ z;*BQ?J6Az#n>ANp1*agt-t@xRt=D$bJzp$P%=+AU6VsWA z;yc1mM0YZBRtlsqe&Ri>F#9gP)NFPB{)Ztfc(Z#c3IGUNRamKPLubcqM44s0H+T5f zGo*Qdz1B2QyzvWD`b$qmy8%i0^kZw@KD`JXxjEP1baq~MT-K2zxE88^(qi4mRiKJY zlqMJF9Mlm=L*Q+?Il4byrooRvm<7tJk9wh2QN3%Mf!u2k%aS9!9HM)h$bnJ+MhfVL z8W;?1jn_uZK#Nox5kPosbs?twjwq%AbM+iAxqSb1pX#)Ex>xvLKTF~t`7{dtbRKFZ z5BNzjV-8F|9q|3I@&`WP`!h_)1+5K@(T8$4L5vQp##VmQ0Lsdl%N9gi|J!RBgE6bY z%k0n(9knFb75J89mCzLD=PLnmM^NB_`oQjM9*PN>qbO3uxhZh|On3c?Ey+&%>0AEEp|DEpnhTgh=)=7-H1o|a$ zVnBRoF-ef)tlzUj8c;6N6F98md1i*5wy%($wNun6p~vK&fVQ!%P#*;{ls2LNu0`0V7Czli!n(gF3ji4M8+;iba4;ruHs3pD}0?`hwy1@PW#*+%LHEii^xu0z) z#?K~-Q{|^g_u|h5aiqLh?NBJ%@$Z(yzG)NOUf-n07wgqmr<-$2-GAHvj+?@fqeliD2|&8E^@*CV6x#$uLyPSzp>*>gA96vx;x^>HDTvXs}Kvc&V# zFKDA)%GxOHeks`VV6&^go^QOWaHvDuC{`vdoL}qEft>7NgeHVv%TBU|O>_SJ4M$mn zJDpiq9z3NH>P{Cp8qpcp1(^efh#!r6mxpX#*)0CDBq#aIsdMGe_-?klg!syq4^NBc z{+b%fNdjf_==sJB!Au z^!(YT`^gs)I6^g;-t9tFfAhO zfRuWve{OG)xy*tpIJBM9SJb(~4@>a|c}kvIrkDq80| z)>pR{kE|1&`(2;`1C$kl;*Bqr#9e7Zw~b3=U+nX(*`I&{^_&Te_EP)SAf|32XswA0*=||P29M)BQxSqqo;$_Sn|rFF~2j*@gNlXW+CDt z*^zvm9#z3xxaAEw5OYB!@h{As2X!*$k(zfHt|m5P&Fon{m=Ia6M{_bmE=mY6tZQc# zk4Idf4c@YOF0vP*ELTswcH`3&aScZ^RnG`)BLflMwvRig(d?*($YXfP3{E#AaL?T8 zU(1_YFOP(K1f^{cr^Xo~>|=3-SBG00tJ(?dAIU}bc)&Dh3bNk{rX9HONS*R<5H3QD zrK~){`Gt#_Byfi6>oq!K{>vW>`5jJ&qfpuHMNz7SwG2z~fJMHcOHol~2rEHXi>*(o}F2 z+R(8au731xDuzIgk4cH`1abbWbSY$}{Aq&t3N?n`tR6j{>UGU9fj-Ss1Njv~#TZbY z+~<4xu4*yQlE zR{5WnK)zn=6y2>G?)KabM1+_F#nYvyM$6(u0?2Kbp0C8cOR0j?%w_+F7J$po7izF7 zE7yj*;okNyVj19`0_fiz$!$25ih4tXn6u z{OE!RHbpJoov)zqHjVm4-}S8Pr0!gx?B4?{vgCpa4OQBZ{7cvMB=W^lKu)pnKmo{= zcvE|AX%5TBCVsQt`)EhCNJGAl)<5mHdI?(6b>-pQYsr?Kw(oXkJ1b2&U-iZXwcOcK za5M5MG$1Cm-3Os<>wR!Q;jeW*bB)q;Oc*RoF*)uZ%rDrI(&TM;oL)kvSo)qpPIpdE zo_6+(ah7}BE>BRw$|JN{mMf;DWA8k3xp*%k``D^TV>?*CLHHMT>O|M>%y{QZP;&X5 zi5;-$2ptREd?Yrp`L9&PuU9uEnZp*p>+(3P&&!Ch5yj@ELx^3TDZ>iavh3P3-5bi@ zhu$Q-Tgk-|$6*ugPxxXKsP7e1 zSZ?Sos3m(0Z#PE`8X%JLP+!Gi|G`ph1}=MNpgef$z-!?Iw*e|1)(d*%22uiSEFXso z?caM8NnGOhPZY{JfJ)|H+|aBsa(d_*W+R_HT`43eG$zV|{hk+8%kx&;AGhbk%_`P2 znaVWgQakP!F1%K8rr4zxl~47>GvQEuJB-4ra+b>kB%+Qer^{YRv;1EqG5gxip;hrA z3}o%&dKmdlsTaRucbwLO(yiSaT8a(-g*m!uL|B*K_R;u#a;+s^X{R}(w&H&}x`?dS ztFYB;QNn9YV9|HX=AQ8=9of{YGOEEQ4JK|T_Hc)fy6mPzv*kWSA#I8}m^OEAFjrzI z(Q#n3jkVZVp`2F55A}VQ7v32T*?1mJ;pqt+me7X2Rh1!=WOn2!oZRfsdoFDpo63eR zfg?26q3ArrHL${qyu<1^>L0mJ@^tLM-ByMh!;-K;g{tI;r=i^^p-cPcd1PniV&wSI zoEaI}Xw#CJk8Pa?H@_r3c`S$>gQh0WA;Op=5`W}XXdW9H-H$FfyzRq%-|DiJNd@6S z5m1o$ZK91=&hs&o!Uz;~U8I&-k{nNbDHjArq4N1J!?{d}Qz;7M@m-d84_#@j-%&)z zC1ix;YL3#v&t21r_l6HCBN7v&B6!lt{FvOkd2p10nuJ+6d_U6J)9ZuaU*wGN zCRcQDYU@dX4!MR=jf6MEzEZ5bviS--MG@8?X(QINsirSBn-y#^f&X#CR^%W>F5177 z7OqyETsh{j1l7%F*6Wv~FPtDdSxHWi@@H}C?_Fx3M#F|2|BXZ)Nbrvj<$XxU-oCeMmnwW+ z^|Sv!)68H9OQ0fpjs0L4IgImk-03(+JR=|1G9lD zbUhnnK;UEow$ZSvNHnkvxl%VWa)T<#6App=?={u{f*VCP#sSVhaZYGXNcnIazz^Bd zSN(q1!ehD3^aWEEtu`3h%8z3i;Zq~gPIYR1%G831M5GeR7TSH^=6C=*xP(zb0FL|nd3NJRM+(ykUTvb>^tW44L!c6 z`T1u#Xd?jlK`ZLhNVbUDiD=kVA18?qMRG~kFHjkrd)~8%7-^sYr%II9Tly(Q)iyUm z_}1F=>UTg&nCZ%{weKpr*1wNYeeZBOC*#8po|uJ%_W zR6s+LP&6$JtmsE#b1#>kvI+w&S|}iv9pnS`Rex_&coj=kvttc*am?v_c52@RpID44 zmN(oel5<0lZF1sc+zZEIjmap7oV%w;E_izF(w`HKHV^37(H&r9Vf_}2qDg9$BOp%n zy5Lz(N%BUo7Oy)XNKD?v)6R*@hZtFFrm{h3x72gsKjrZdaLkdOqVao{=2h$hL@82? z1xz4_Rm#&=2t_)W17(9D1P~=(f4R98YF;v);x0qg(~_=iHpQ#Gm@P>E+3?LM7T3tt-lG)?U83L7`zon5qAt%l?gu+9k&FO zjvvv~F>T$tDJl*OlS4VV;!lc?%f0;m4PH_AZvh_*2|wcz0D-1>Wx7W;EkiRYBDf0# z8DOY&uqA#}o4{Y2H>vMMS>2j&ZYu-YMeT><)XE=+)8uH}@eSWjrWteXms>Xw2k+is z!D6Tzcd5Oa)z3ER}|HuGDmA`|g{?W>rz zG=?^NKc`gR1-6Cq+6f;5_$0MaPT{dU()o=+4c2NTkmDTR`zP(FRL@pq%C z@w&?9O<&!Bh$nI{Ra@pORNDl@Pals%MSAeLlr_ZF-0A#~`IF_HT8LcurjiW?;cU6+ z*={G9PfYjA*=>LFXMrW&O_ABY?UNAF2OdFQM&!We6w3HWV z+aDA@`0e6chuC;8WH{j0PR;&xqB?BL&>AL7#u%@CPC~5B$WfXsPQTlih?4qYQ3&Aws@=seaEs8r` zeRs_BzW1EwKcQsytYLGBtkF{oC!9!Z$eId z*F{iU*BM=g@-zChcHf;Y6pyi|6+075$IRh079<+eazeL5S2zHt_bENYAB5>(k;!qb{YPNG@s5Gv4LVO7Auo#|h9u%Ji zcTYHRI2g8}tEgU&86=xbyyGe>>aaeI>4kgF9L2w%?ovZP`}TXDGL!5K?2owr_IoZc zutr2U&l8_g+k->BEElERsn&W1+f>i3ow(p<>$70cCMrHkD7+!!7Z!bc@?>}VLc3#| zJlOJw)FlN}5S7l@LN91oDF z2YFAHk_U`!lEmZAqA^5`1&g#kceabuh@> zuIev?vW1mka?rS#mfOSNj6pI{dDWvQ4sU0{EfwAr@!MM6=`GhxddhHGcYpSpt z;pRGxv64$uW%8>HPx{m`tty)8@l>!ZIjO@JiBEUR5ARxB9eA?zpLJbGc~Ebb*;pQp zcF$+-?$E@L$OPCkE1MSN_s*%iK8>ZDZ_ES74Ra~0uW{4D0yS|+TRj|-7;+u0&^eiN zI!hb+v}Eij(mNKXb&Ek$xggsK?kYQJ_xt|&WK1*#{E(s3Yz{MAfu zueb_=JIuyjDUET>O$Q_rYNqm_1+CL@f#cpAgvNTK<|6Mi_sgFA-Vb96rw^FHFBD$sx~Ni<0=?#Sx9InaEOS8S&W?2joE)3v!@lgeF@UK zTPH=t^F`yE2E(&@&$;J<4LQa^(f1>{77ZJd6>> zl?ms&?Iwflg-=m4&s7CX9__a#iVul8YW_ajVp-3q`mvCBE+>Sq(%g4lSyc>mkj?9b z$NZYxzib${7+(3KJPyg-B93KDP(TKDKU!aOiaH!gRQ!3j;c>=!C?wIv<9p_+d{}WP z(vvvIwv@P(tl-YSzkhQ3Uh^So`nw-|jff=2>zXmKdwsH&ZvIGNGnHBwlL6|9HqOlX z+gqd^uwzJT*tW6)!GrW^apC7oC4|}moKD6U-HDW`t9Mh`nDsj|TcEK+Q{WTx#I#OL zH^L6~i%-NectT0zml>aE1hsx%8?HeO zl=Z+;3?1sKYw_nD9{g)+&!RWQ4h6$nIQKi>$yx|T9d+ewZpI9f4wc9AL(h14%VtQY zDnw^Y`hkAx=XDAdH!5P7)Wk{1-aDfx$IFzdvmmABsO#bhm2}!vDAUVz2Y7{3%QhH{ z=G7WGQUSjADpOE;SXl64kAX)H+C2^xbAK}}4%RPuigpt-&d|B`01!yJKKpqDF5ps? zU;fa0+GcKclz!d`9@F%0a1+fd`LeT|lQH?(D|fS{^~EtK1TU+(<}JJg;vd!P!8EZY zSr&eA`9|@E8^U>4Y?nS=kWG}(q0|I;twP>#iSRUVRZn(>S{H=rp*Zh=%9BYUTtljYJppo0g*sSJ6U0iXpzHKUsPlD7z4`A7m)Xc1)$aX6JJ~#cp zugQNab|;tLXq!_4Bs?wPE`(t|73?lI=EoMribD|qF#1|9#(gUE%QNwzb*H^yx5a8S zXyy|;UUFP1Xnc@$nV_zyw=S=29OYE`cXbe90Dvt5*H|wrU2o!^81~q5G=5qo=Z7je zD2;ISp5m8R-8?L^0uKj#{d*SR$U-1mgiA!eJln3hIe*e3NplPU4xSv2WfjHsOA5JH zHl|-;Y^G*7`qrNGnIevQ^j?8Gqdaq^LnM!tv+-P$b-vm8lNmM1d|y=-6#C)_`=$!ofj*u#W96_djTZHMm4+OPA$)gr2;|jgOgH0!fZCVb&$~XkWtN%& z3#`HQQL9$Ss9q;A(2nJC*xHx!=Vx3YABntfjQ42B06u0>a%eg3o&J*b;Lk$29PX|s zT>|D+?m=XLGA*caBN?5m5K=AGx)NKU&WsM;R(=At$*TuNuA7Y#Xyf0Oc(}mV?6Aci z>zag*UToX@KSaTqel3M7_lz=?#G)4tDwdLaAeR`f?P}}Dk0z(EA!nR5G(z3 z<|IH%uY9V&n&ideQFj;-RKs>ri>0(MGJqCUjWtW}mz2-C@}wB_mPop3fFVVL#y@bSx`)md})~Sld{bL;N)`31cqYueriXZ{Z0ZY&^LembCpr)GAs zGc`f_{5kRBJSybl9Mrndi2xotiC)OK3`16LGf9TAou6Lb|NbGFn)&Z(5HxcOxUAqh zEgTePN9ig59!mK|^;g&i4#;Cb@d*fx!Lot>{;!{^(AGShxhkg z-^E4k!gJ=#nKN_G%st_%$}*U5h~7MV_6$=_R#N@hvwtLkpW9!d0$+pMzjg!PURa4M zi9dT*8;gGT@g?y42Qyi9rDxB)X`em&68!Ah4e-*J?Pt&2*q=SyF@5$-F!k9pg3p<4 zYQn%9sGk&MB%eJ#edKl)CjzgaxymU?p{=3g;9!#%(f!ze_Dm{KPEuUcbAErxBXd!! z_3HR?PKhe+nT_m#)QfXCWklypIb1H;RL5JZ#-r1)`sGA9ch7RwOys{3l6#%Yc2WUw z`pJSKwRayVEIj-JG7tAg9WC9hJgnTU+;hfu8>@f%ZY@}GZgGtXjJYHTwyx^QB$FR} zk&(|1qFe9L4bx+o3v_G2dStpL^AA0*S@B@2y8PUO_vzeuN#51<=wD&dF_4+TVm%}DEH`H-;a$NWUIpfk$emYc4vu3%%-mo|hPoGSXZzN-B1zO9>wK`T z2&T_;dzVWJv^Q-*?@fVcBFDyiJT2BjwiABNI&OT;sUB&Ot|NJ5LHD$oD3*u{1fDp- zQGlZ*8A1ka{>ZV3vCOEops-T+&OZL`oz=Qe3;D&Za@d5XIq)o3_&^swS+$%1k>)+F zo{!$^fB^x9>o*4>AvqWvZ-7ThODj<@nwMxK=U%So+8X?#04FUh1`U5!mI)P0miNBA zcbA>e{CLi>v@!O5G=b_!kR)>`zCRwJ`Z#6I#roaH8J9vz9Op&PULx^A$F8 zNz=tEixNn?!I6OP`{sYcCXklrwM*=zDt*M%V?Ta&#SUy+lq8U^HC|=>_>K;ac-qm; zY!!mRoPL$i7q4B@zy*U-vxp!7mKli;IUDR+dBc&eh(`%K!pV$Ck>5-rAUetoj2~9@ z$|6Tl27q6w;k*!6WkX4aMR0(}dI)N7FYI_Ep^VM#CS*Ei&Ufz&z!CE~62Yc#jiVE5 z0K^smkw71CSW zC%T4WM?AIwOcrId<<65`&qLFl&_2->c?*uvPx&q5ams(!-^Kr2=3{IZzBC({#-7DE zD_K3Mv20j14ShqgDf@pC(cf$oqIAY^bAsAJCh@OcPMgM%kn)pYE0lJBUeDdP6&o91 zvZ4}s!$h;Scm}u7Vf?$aMNA=G`F+RirftCe!cIn3xwpH7epWypH2>i`Pq=9hRbb}~ zrxpDmJeB8jyLzP`X?6m-JdKXhS?7}>>1PbRns7`w{1cC&U2;Es-|orR89;shWZ8>X z9Tpor7jg)L4f~ullvv)Y?V$HsV#_R7!lLE9Bi?Xg2RMVdHEMPspL}-SNvCB4=a~nh z{YXjb`B=o*k5NyWWvV$AwlW6r#{P0}Y)_zyB#a(7vl8XD)j7W9%9?V%Mnav~g{VX6Q4NPr%apqRN)5jlu>!7*1!!I=rp+)Wk(eG# zgLv6=Qya5y5eC0G{5&)o`n9+f$o%;*^-m>!y(?rVot zX-<#lVTQ{KyXU%|r;#bIIc!O!G)pw9sLljhb+>S}e&G>)n^oU^tO^svQb^t}r)kF2 zHS}8%qSrCNeiEJt7E*%CwNr5nd2(lM0|)kTR_!bPGx;fbCl_)t_E{kcG?6qyiyiz0 z*31>6Z3{c`N2AWI zU8kJDXc6DhcbW6nub7n~ohq$%scL`~j8aYI^#&^*4VvR}x@HmZ@8L3&*|=LLum)SS zKfwUM#8CZ2xi4Kh75F;!`c$_XZ$ur>k7TpH0n?!W?|W3>XdUEbR9s;GXc?7#UH zTDDGbDP~ervDkP_xY5S3VZ^5mWt_>PDz>PGWm3#-ZPE-cc2HAF_o@w!V+a$Q^j&+l z9LG~I2(Z)m6y6NnkOYhDTroRnO7a9fo$C9fpt9<}KHO)nBr4iJU9<~ku=|X4)mwAJ zIW=4DRXP-6-jgOJwDPg(Z6+5|NC)Y;xi&Av94OO}L0yh`?#eYgi&PseFwE9xGtq-1eL)MB0F+6u}4AUz!CJ(NI^+)x3;#n!4eR@~8{ppj;z_#MvShqEWvq8oZlY(nziI&Oa9U znCG$LWTgI{0%E>B!ml=A#48S{Wr_5Py!7mK>6u5N=Dgi?;kswJLC~AG>Cs#3nBl#! zfkdTDjHH=sre(RmLbJlw?E;>9@}u7lz+}~r*SpTjgR1O!A@+MggNzuW-J+U4p zpHT>s7xVtr?m)46CxNFZ!btmR-$-GgnVvLa6-l`oa`I(i_;@%#G+@q_#)Ud`t?%S2 zF+2(bbJ?*=QxJblOFrjEz|y4O5B*fePYpXIj^1uNN7jAzXQd}IM?Z#fgkh5e$0CT! z^Dzr|oa~&FKi8yCVdKZsjN^Ofw$2`;Y5HB~Z;okFtvM&xMO1Ngu4qGtoBl0Uu#G@W z@*a<89}(tk*NOcaomob~RPN&y3njQf-D?Ep=p%EoFUd;lMi>=uk8z5!GKapKrirGt zSg8~5%@W!od^(Oo$`wLD!axwnY$Y~I=-qT%HdYgB*X1hLTAgrUW{8^=Xt6(a$4 zOp27dko3@d@*gCA5)A+X$^%z3zY1VtBd`3`P=lf6*o`|em?s+pO;JH)(Q7o_9Bl3MiuGETd{~=KZ$~cF6sMv8t zHkwq?az02k-J4C&tiz{IA@LeJ$tpkKwfmB?ZP`*Jf(jhaZ#6+Yk@s}5$WUF=v^29H zY~}qMm*x}Ipf|*KSi(_1rljH#Q9G>1?U;%K#J@?jK!!*2#WuUUKJ4lud2L z>c{Im>%b-iKW$2ht|rt98?WLQ&rV)*@cb4xqkErDj@503Xkj<`_M}vGj zcX*b}ZI%?W(f(STm>!arIkLN1Y(ExKj`;fzsy-Rm8z>$;;G6JKO$?M^Kj6li8?)}(Trv%ZN7fCpurI+CuI&~fCF_d?S7|3#4D%PLj|H%{m zcUe=9w}-RAxM1$ddt>HSy;9q8njPz#g4L%>KzHUGL)V`&( z{P=^bn_GMU^H{tU>1+h?pPSw4ob9QMvtKllw}#H)8OALb{y~r54J4pMjYfmx^cLIi z5nhxdqOSsZ^Kr;iSuYv)VpvJ&$H1)t$9L$STar14nt96{l6nzXCeuM9j&!D6faBf?wn$Kksd;^Rbn!PgSd`Rv-WkI7RX6Dz=%aJvAXz#lmVtA~9p=cu*=n~_V)Z-Tg#;0^BEfTzSDkd|Zf z)mz4?Bl1d4%zzz)>IA7aCBeEN^~RubXx;_aQ6me33aU>A@{km9D1YY-faUYdBSSj>os5<-wXAfpkpsz3uahjYJh{kpS+IQQVdu%a6W!F)&$oC<6iP$1 z3bS%2vYAkYW2Fuy$e*3{HX$AInWg3v_!u+Wqr=21n_w2rn1}~PS7@{YCAfw}j1mq8 z?z*^(jbe&kTEfcCkwr%(#CdQW=h7ZodR~9bkNC*X;0)?&*t|L@AFto$@|&QH6ip(p z7Rd8q0cr)*V?A=7?<|{YFqWt%_kv6(RXKWn7#|n|;!+~IjawdWcIj?hgBWCcgRKvf zQd+zab;iEZF|xu@ae1`UpdDE+_%}Y;7}!?I?a%b}_Q$W2&p2*eDWvJ#o#Lnz#ePh} zIbTYuAA!W@uW2)o$B7>9Zf=$FcCkxQ3rNRPZ9i-Uvz5N! zJt0C#Gb+inT|ApB224f^GEU7~qELb6j|U^!p14>8*9p}X0(rNNHx^+JVG}@JcDS>5 z)|yQw_N|nk2wh(mS_LE=c%0lkCDyW5pZN4H$fH>InE1-t2jz%`wh7?M_wgZQdSSf7 zl&(38aaDaREv@C{yQl(Y&W(nTc9GVu(jy<9&x2WCLg=x2f{vbfSzdxpe6-kTt~=*G zbbAE!Jg(^&85sqnY?RtSG!17klGa(v>y=#SoU$kaHZ}S>9l2?)2f$9~Lz^v=suc6L zbqb4hu4(EBDF$q$pZAjsZLD@qbtykxrY51kRuLvR9B`V>95*V1QR-eHL^*Egmcz+q zmxJh3%hKNRDRVaHWz%y^?$HVK5%?xZZJD+bUphk^Dwj6sCBXdx?_tDq=XE$s!FHc{ z9vBt0sABnSO06@u*7Z(eYkQID*7Zc6!>ES`zpowQbQo6cUf2mt-Y->ERsH7Hj$*9Z z|IM%VeqSGk9MqX4Y1`P_x19J&>>T0iRLngRh9id{g}9IBbI_SvhuM*AE=oH)Sq>T{ z$9_voV5<_S^)V9=)I@w&VQs#E9XQB$g_hHIz8pXVwV)k%`%K_F-Ig7oCpg+H54 zCP6G(p}nnbdg>@_LOh*`>ip=-T6LPes^nskAm$@a!%Re%*JxL0btnd=b98U!Lh1pg z55WZd@dufm@?O*CkAr<}ea{{UI-6n4q=ToBF&$>qXjDGN5(}VB7T^ql48?NI1_Hit z$fS@7*|e>~z?2GQHoHZ@-~Y;u%d>&--RdAOA;LgnG_?`-i|)uGq=mc2fB}(%DE+pA zB!8iiT@)O8sqg%{v_XzBiw!$$m(IMT%7u8U^REf70@k%ZMWOJZ-@e>ZS?$qxeCiCh zt9Ql>DvLtdoRTpZ_o-o6O0;j@?tG)#Sj%HNA_AbCq?{_d1f%)3r>~~}BW-$`XS|`u ze&M!#9G()FTktkp0}`kN7SG*pbSLeL-~u726@Ux&-OPoVwvQVy;cSY6?Ld4O> zkYTo}+Z7sO+s%ZFR-2mR9hw#2J@~1TUH+DLMNGhP z*(;x`|6&Mmq!YSCxY(nn(@XlWbi)?;9zllqoSG|7i)rdN9Vk8$A7W9&^*NyHG`a-7%VJN!}nPr5R?(a<6e!lgaYF0fcV`H4eeH5dkrBsU$b* z5cz0p7ni=J(5J&L!-|rTmI~daAsG5wa}NYWA2Li>9w&CZuY7TFT!o02&H0olB;;j| zuK2-$FsVuM8wMhT*GxcXzz>2m`jJOZ$d}P4{G>Ul z(Hg&-+*5J}u8nk^#4O^CI=rv6=o7}nY%J|RW{ys~2qd6TkyzS6pk|1|kfm~%SHd7Q zs1B=0>Hs|!Y8P1B&$R1R$@(Sc?w4AB$&O?jLNN*AEm-UHjnB-i{R|%~RlfLf8xbHg zY(G-T_KVSwil7%G%48%u6PmM#%gKG`#jOS%nsH?xO?H*55V3g0AQI0`i->;G{rN4mu8!3{06LTv&zEH>GI8BYVcA@NsipMz#>5G6ytQ_ zE~L*%?oeD_cv9X;9o;I*4k32UpF4bNJ8Gx+;(O~~=gTa`bJek$`4>g-39p8`Y*9c= z212gTRm1&jW7<~kZ+ZEP{%DCNVY~>2OYxo-$*_wyq&h>&q?nj96QGXys%uWo%~u_) zG+DdKm83L*FSjZ`=*0TtR@si98z)>jfA0Vc7xf}xAWhgDu0l=7h27`X*S$YP2FaV7 zFwYv9PK{Wo{7KsK5v~GHBAnqoFGV(AsROZW0jO{GG{7=hb^LfT@$eTOUpu9o{M*4{ zcqq1!&J6(5`3!h%8iPl2l(nC>)}s%I+x9 zutk$HnMZwH0SV4i98|sE(zlqSw&cq%UMW~_0j zj$=qQZD6fYP|@IW{$tzds1e0P2E-u^Kfh!7#QtJJi&tP4-6=jm3vGXdY<2~i@O6v$ zn#9dzLtip6tvd{KfW}@$ej~!x)o^!YcV+Vg*@1_mA^?laPZ9<}6XDleRUhWQV9Qp4 zR?m~C_MF}~@Zfj1#Bv@dnxcxckl7-0#t>u_^HJq=H`Myk;a}cB7Fx{wG#m%K0NyEz9Kc+Ols+)2T_Q(I+J7-Z=J2UkhzGE^6H^v3Q`{ z!^Da^^B4TsF*;{yyRi)-1`0X$Yn^{;yFSO7anL@#G0$mQSC+K4#+mBB{o-TnTWVke zdEslK+#BFFsOq=yTX=?OhP!V6n@i0L6XVOa!h17l8T(@<{sF%qgO_dWmR$1myhq>p za-nQ}&advyqG$M+158NP@{rQH8*I_=7#QEhu>+vA_Lm8|IQ zf5pUEIUpWcyH4?Fq?Z`=#&QS72IB>%#%wF=(vx4sh=q^6**PAMFYo3H9sLk*FV}q$ zTu6>r6kuE+m8n0{Z=D{m+@WYiBTwx9Ove86+&CiVTSu(4tM$9_5Q}x!ipnco*2G9J zVwRhByz{GW8OW+5A^l>7 zK*_@9^(**Hl(C&LHbBWaXQ{K(xPjTn_eIVe3v2e~EjkK;PZj-J5b$ zCC3cDlz05zC{=b%bd6ptoNY zz6uWkH#!3ET*dOcSMV3kO^tMPW(2Qq*IYa?l{K!xWnMoMU)sx3l<}`XQg-9;9tjSt z*SuDO3{bNS#;Iu)!!A0u)tfud!qmgZithSuj^wDp1{=@qR$zCk50KV;SZ|H{?H;5C zfUGClLI}0GUSu0cB?)zum|K}s)8D;<)G$V@TF9aF#G=drbeVKAPU&)J zWX2@z6Eo94$9`%)3db-XpJmRL)U`a^B1QL#keLD9*NPZj5%AS|1=- zZG?+Txmr&i5t2+I&F&1VB2Tf`2u{q`fSh~g_wV0K!!!2RLM}73uKaqICl(JrG5Tb}qtI*jA!UhRwW;lM`}4 znV=~{5aVM#hDwN#GBGXY;~xHoMl1bsr18r99#d*4el` z0C4%Vsr#+F^$&HOJlpHzB@e5bCM}=*uw%OQ?2n~!v3p$+jF2kc6Zr`1_?!eLc!?2z&22TR(%hLO#euQOEdxydV`vn#%h%c z*7QrT7;ufP?!$KY7H@aV6NP0VjhR}PER=%WpptD?pTNvl;~3t^gT%OP1__Ez8eedg z$1i5fA&tZr;#&F=VG)j8S5=E)O+gq*RD9iq0-Tzbt0BW`Z8?)NNw@PEY+wtlJ~d0G z;@O!332%R@;?>80HNjc-KYuOijaMgiqXFxjU*&)LshcP(5f(^c{hFfekKo-Wpoxhg z>ezg0iBnLk*r>fP|F&8vG-h`RRkg~+jUMu zth2T4E(4SEEJO)?d>gA5_L_A?A{^^{7?!mBrzJu9u`bS+Ekii({o!nrC#1eEndT)w zIT2IT2cEr0YVe`%-22iise2qR(HhGd{R7_DMXWj+%49ZgTrjIV2wuFzHBPNffMa&a zDVwdB1t*m|X}vN|EMC8&@aIk5HFWvZtS%=b@2-IO>KT{$P7sBdrg{AkR`q+=F+3z^ zk027Z$VMRO`*ld4eI$`0!N^&EsC0aH0CF#G;9`n<>6exI8B*p_E}Zsvk9I$xYVp7! zdm9~7KQ{Rt<@jG?kq2h4dG~5ekuNubB#Y*tdCIZk1{>~)JRUmt#KJ8O1~0iG+=Taj8A7!vrjTmh3OMoY2k*8 zX~d@0b?`+7(635Qt%N?<1p^%5LK~84Ms7K*+wkS; zEuLP9PY^dk4q=Sje#kUQWOaQxp7{jBk_7 z!K7Ov^Sh*1>i$RctvD8Kr_tb8nm4qh?dlu#3xwWD3XU%{(kgWqe%z1YNZ|d-EetLR z)Eau(wo}gKihH0l8fY7X(Q}w|2D2(Bk9h;~t`c6j%`;0aaT7GARavh!sU1BB+|1{# zC3+B}n4mU?i>vGe>{GVzupifKK`Z9RaWebF0UVx4<;rQUcMTYojFKMszthmv+qc)m zkl5Zt&wVQZVZXc9m20WInEvTfS6v%_!1#59V7C6svOf;3bQAxj%bTsbVCF zKGfSe$Xt7*lSdkhxBN6k0SFhM>={!KWvH)8valw^BeBY(rE0*)xy^jb`#_GJA+^|R zosGAggpW|s;=pOGO99pQFRHj0ux0w9RYHHqx{(&CO12$o#B&?Y~LpOW^;%J zh^H#fZxLn@^1r8T>M)YLXxmm}2o<<(O6iAYD_$ERqW?67cTMNg%yM2B{VLGmz?Hzk zDAEd{2aUhvMJ}3O%1$$&|N7Hu9Y;c)wL>W8eE!yNB9E>B`L^*wL>k1HGldRA)8)JI zw&OTz1VN@3mq{yc>d`9_V0}9#WD4@!ZtN_Zg~WMx{_QXCi2ct z_;dDkY@$dM6NMW_Pgg+g4gnp|A=UF!$HjB3RzD(7rQ1K6q?U{4$zku5KEeGJ*vS;9 zgc;gYB*OxU8elj}4?B)h@4k^m9KUh8`4`S~e4cBj`XOPo2bHTNE zh1G+y+Z9nt2~IQmEa)Ciug3nSD3Sp`@F$kpij>my@Kn%?9`}Y!oFsWc^%FL#)W)u6 zPX~vx3fBcsP@u>$3FvoUU@9EOorJCJey1b;-N{FDctm*IQDFo$LuX&X?04E}( z50TYKvMrK=VoF&31{$0`K)L4kjLL(~NvQY&W!>#f^oir+Njy8uDu-uX63waj9GtP;i7Eb5jGf^ znU{2o3j4w)U&Abc%FEwB4%5X*j5~|LbtO?ixt@YCAK;RS!RP52b$1_JwF^wpG=d{8 z@8Z*S-rf_9L_<&=MuX8O%JSQW%^YK3bWBUxc^js*7ge4cRxOn~?$Jqkfs?+zFAh`o znl5040E3``Q~rz&uuV;oWn>ZsQ1ablO2t^rYfZC63;X(8BaQSpmGW3T~Gqk>3NN0{iAUF4X(TROs{h$ zEw~O<$Fh4oP*F-D$=7JRC4Ea{>vHgIllCA$jABe94dwIa#0$SziLm*oRI$$_X{r=J zBi-YgB&PT4LZxao2{oE*9~#{prQTN-An`bZ z(rssS_<5l*24h1#nWSrMLJDYuXbbU)YFHUAs zfBS_%h84|+Ai(2&q|WN6Qh7~yD?n$n7uG(})eAo8&q`=Vdb3VNcB{XYN~-|YD23G@ z9Qba%I~Daz%iV9$Ic2&Rl_OKQN~t}dxAtTFaT$_MK)RPSknCtze+)))zNm+t)&v!C zQoMyrK>v90`+keekA(e9xr_V(A7f?n$Y&W?37xCbWcSW2;C`pE6o1=BOSJi_4^S#INBeR^p?lmKxYDY#P;|w^2pP2 z5Au0-9;=;}`R3=zNx4Z+4j5XNb=WeD^D{0JPW9#p-~bw7229=Qv)-;dq~fZ<5pu^} zqu>5Q)RpRIBZg~R! zMJJ$RR%N~8maQkXr-{$?tMQiqns@*?c&K13mt27LCW$2wA9|okcojF#e?%Z+2w$ zS|x;|-?HvqGxs&&rLCi1)%8`}nh;k(iI<{e^4vOXYLA!=ZLI$#>S(KL%3Yy%7&m(c z3()%T>1Wd%4?+- z&w?${H`Bvuy+YA$GT_f8%<)_689;HrnUQl0+TIj#Nr^Z=u^dusCFLk?Q6ZjZN2y*d zs6B*>0w-+mi^b-UK(ZJ@u)ek7ENQ2_JsQ4Gx*T!IL|x-xQ8Mk#liZvOP%<694IeAL zVz9-IV4E3094>S?3!l9g;5?_gd!u6(eJ3zqboC{r`xhF|KT{k1w%M5zt~gln?M~#> zk!tnb=re;n$yG!Sy&BF+Tf^{K3|@qVf;RqEhkliaTk+!2icUqn9$Cb=#+}>XrB~77 z;AkQj>SfYE@IIZ4U_Te)Z2jcR{<0;{MW0VyvvBC9XRu=v;vbjBhlAbrjmv^XhndpH z&Qz9ea+i&<>yEW!!|VImK&s;b9Jq4?juuA2gAU2^4_QQ20)clzPPN?9dq3}1sT^a+ zk?)S)JQ)DE^^pZ!J^e7%rU=X@1_5$@1mh` z$F|0BCD36KSLv6{M(-vq{k^-`PZSQ@OXRpO5<$uxI$CcN_7UO7ff>}4) zZgoBIqEeM{*@Sx45-hgMoCTLVTg0n4_|&2#0~uQlUa65Qk!vpTWr(mP3V$sc!Vw^O z@QpaAI~fj#-5Fu!E!W(;1M~)FFcP{C`+>5)ZS- zz0MWNHoZPnnBCD=8O&1R ze!h>@>tZ?*ey;`>W!Py{KcTI74dRRk3MZdo%c&b3MkQBTclch+W9EO*AWQkjjw<9+?`qe>jUAd)a7jP@@91 zqS}R#+X4u$f zM32x*7Z=wEw*Eb_w9#~M9Zqo3$?nfdO~a)Dk(a-yaE}32il>r7;6w5TP4E^l+YjUN zpn#7~%A}39bI35=yCWm89jm$BKk@vYqr-gLXy`&`OaZMkZltN{+I%qLZMlq`T&&cJ-U8Ig!DFJNA}nCWBs@heuW2`6^*$tZQ+N)PuV58 zG_3?VaiM~Sa(08N&RCbjxU~-u_*gL(Zx}oZ#kvFsesZIQ3p4&=Mq{&& zP_WB>1lFzj6NpcPNaAm5i4l|aCiYvV0RL{<#hsoROa!%^E=9hqRFX0~!zHP9qmnY% z@cJei!2;%n>o8j7pjJv&3tR5g}jX0dXPtigGWcI9q>8p|B~dxoo|8iEk&HHr`Cf)(Qc-eojRHAGVX(7!TX%Pi_VqUsn$Mp@Kw{ zBlAoK*6hv0M)GX*56ct;OdaArS~Sa)k$<}pQyhNJ_?oRXOJXBjZbgh4tUfkg`bw-6V znM0id+;>{W`NGAyZIOT+<#9e3Vm1!c-3cCWSCZ<#BoB&ZOqO_v3Qi-|Lqy@gZ8+_Q zQ1NTdnZ18wWri)RINIe`AUy4&B7obXMQgtVHQ2a)#B3$La9?L< z)1ztySSoG+g$?M%FSp_PWQ==ah6+?Iln)PuU5S=%y2vWgmru^zvesZ|xtWw~8_+!!zV zJ9jIEG3BGFcNjz4j(b zTT(Hu7@2 z*KN#`>YCAXpKMPMql?&s(ZkDjH){149+_R-hwaE7Nct5MHM+E0Czv^BjZ<^^fU&jx zjO9WrDu?)khDMpg!w+;_)?CfV*HT$T(R ze-QQXw#Jj6&!$8`&!1z^o3_c3U2>N(Ffmqnpt>i8ShMX>$;4gIC+{;xPVTKa72RdK z;Vq;*qCK&R^yUvz3ipj$wa08*;~$EdtM~>FS45z>(MX~VSqx)Ww9ZTe zncAi7ax{8PC84YcJ6Z+)`>>L|JRI`)@$;w!cgI^TGKhBCfzPLyZ zU^y)8uFcWt5LJ3mscF1wS0SVCP{z7nVfIj1WaWFc5XN>dt$Hl#Hh9>}tQYJaN4%d_ zCpiMc)fs{zFF!_-JfCM)SdhNHU!Ytr?g?y2WX=>KcpUv5kTWdNpw#^$>E$Web#^kj z6YF%^0W>{QY^Fr5e#3yF`&yv7%%)#N62Z&9tr;=y^v3RvPYgK^S> zIE%D;a3tS5m*skO_G`jl&r%*48DvPk8|P9JfEk$_HFPy8ow-D3hWCwW7)h-%C*qkw zO6a4+t$1o-6N>h=1TjjjF%!!?)z$qcae3(jw-LgQYY#YZfJT=)5v3L84nsR(vC{xj z^cCvjQh(KWKVE%~wo^<6M7KS8_!PB*=CI~#5Yb=Bw#&gJn{H*v@FxadKPMwaSr`^+ zIGe1N^iXpwM$#Nqf%@<}W~Z*}!FIu`1K)LZpgdc@(J)NMcv&Bc5xsf;mAnusyI4Eb zuJa#>Pnsk7dsI8(Z5-kQ7@01?)~kc7k|o`?-jEl`ht<~+Tlv9C#}};J?8o)M>R{5y zZ7yq+6AKf5$rj?X(ya?H-f*}h-pYh&lTT&wl+6Hr8&D?WLN+J&RfkirUc%V05zl50 zSHHCyc9B<&VdgHrF8`4)K&=!!jM#smEM7GsQ2S2@YEgz^G5fRxCpT?|u+QzCjjb3- zIra=y$3fora_Z?~P*(1iQzd+xn+{Q$u|6ycFgV8}Xf03{*cAPDl%^F?nqlrgQpQC*u@iLxit`7(7#E?d02=QtKi{ZNXUR0?T;4B~ zd(}(b(*aiWO1{q#k)ibe&azf51t5Kg8b^#VsL1j)ngero8Al+*)_IGPm5KIT3BwQT zWaJ#=_aPk!Rz%L(u#}J^oBVM}`OuKziNz>AEE~N2{N(3^O35Mw;Ay8BBMvv-%$N*x znkB=VSsBDKV^h}g5MNPP`G7~y@#>L{h5k2D*6(~<9{b_abWhv{4TkC~Gjo+)J@2pS zWq9bVX+!+(_qzmj(%UlUyXB7|C6t0^mICd?ROZ&-5|ysVM*G=nDk(m}rp~axF_dNk zQfWHOnQU<1*@43Vk%(j{B-_^l=tU@LI)T}-<4#dZ3ZgOxLTy)h@NLgva0z$i=dSa8^E55cTV-Z_v07DJlEgL#$$EQx}fT;c1$^dG= zXo)wyyTB=@N4FnBK{bwK4KXrBJMC{z4`7<2H09koZdi(Hjk*3_8^ ztw0iRSH|2_C;|B~of0z3f5bSM2$bCt0o(t4Ti%|>MS~6ViIUF#siERNr@F66I%i>1 z;$ED+zO}UgxK-C}7Je=*_i`mt&`ysNjn(>_#Tj`@?$q-`eTf8^@GH9Yq_V=lDvm(B zZA417xDPZ}lKj5M01U8*3?)4gXj;8uRS_<^NvEyHSPD~->$1+!D&jg6GthxD>OEG` zARWgC2yAY~G?b@uh&VNkFVs3nMgALp6&KMY))c{{QxONX5$E(o9(|-K-^^-F8{2*R!GxjmN1C3clR*7ch?1IcP)sa_-}ytkXs_OLu{W7tbQWV zYHIo9oG}yvdG5_Zv#Df4H)H;V@|xiv$RU^ogdB?Zo9!4t`XOxPviUh;a*`$QiI>}7 zSgxjd=Ih(sR8oOO81y2AB6SGlHXWKb#|Jt|*nrv?5OYik8Fz8q)@09z$KQLYPqD25 zX)xLmSy+l`nt4hGWO#5~8i9W^we3UO{wgW$wrkkl^DyD|Sm}=3Sij&e`7e)ReK1u1 zN<&^WGUrnx31ZQJjM^WAOn1vUhf+WjF*dNp`ZP7MFK-cSErz(%!YVp2Fl(>mU1j!F za*ImuP$e7p{kKVoJ3X+stVnAgLx=lN_!2S+TM@omyYl&ug8w+>ZrF{QbNrbjxBwUTyH- za-bxeOSEzoXjR{}?H~?w$7g{m3O;?YbYPN)I45~(d+dAJ^;X3zP?_mLw}~{PL!vxT zH1AfD8*^vkGl3qcAX*t(847{n)8|RW0ik2Rg%7?s+ZEn#I6DAp6jx1@r`Wk|6j4OE z(9ny3(zWV9S|h|I*FS&_j*L1qv!CCWU0YStkBA9LmJqp&>DnBCm{ylZl^?Sb`uI`! z&MUb^=gc3!jhPO3R8)9%;fK8M4rz2V&)aAA`a=B_cx=_*!);PSy@_hdCk&VbpBNT? z|D5pI7yRYaRL&2`%G5hEvVZ-yEVk9EqG8=f`@6$c?qbM|7vknQ6E9oigSSAwy{+VAV*ZvjDP`pQ=(;D<(_3;7kpAhjo?X70(Q!+Z$+w|}G7t7za~`D-z0e)}x&KBsVVwQ+g`T-&UtvG&!Ahvj)jg^emaVI*kVYVO~;`Lpz3jBMJA${ghj1MmHn^dDsjYc zigv!~thR=ZlwuD0I(B!^PWF@If zM9HTgiFQyigi4i|M~t){S2rZ#{N=mZVO1#Ln=_B8^QPuD@KBZP^pjMT&jlaG` zfE(}Bxx!rf5e_7W&Be%z^oa^U=^s>Ce|k#r=^1yiq=}h^>_jiR@n>fR{9H;xDPgj( z4)9VO<3b~=oCn!Ic0^z-(RjpW#dhc7QF3!E-ahmp|LPeB&#bE@jP7fbTm#+d)ht27 z5_3B#Y0wQE?D83kbR8O~UR|?EXRLk1S zC_iz{gxxAt)m2L}FP}LW{E<&<6=#23+qMVCi$dSbPw?)za3t76u=36Mi<2I2%)igI zc>an~W;^BU6Lu-8Mt6s&J&ONOb#mgA#m>&BE|>u~xW_Ak_GcUn2;ma6T>ZkPyJBo%rVO_dSc6`{99BW1);pVDGUF(I_H_M;rV(h@YH677SQf@ zi7rN`D;FJCK8dhgyyBFJobU97zYAKQ6D-(F6kT%{TK-sKbMjfm$$U1Z}y1-w_Hbmu$!Z4bQ)L!G+EtT=NVNUR4%6!{i&j6+9l`gwZN4JySU@M z=gj|+-5Jzz;mPBDo5h}Aw6XO4UjW1xvNC@GhvnApV0eFQFF0Zx_lD?(Pv_SAw&3wc z3-SKE1w6UHqR)Pv$KrSaC6kvcHMZ?&f1)vGfs*fjMx!mB=O+ANFcXMP1(k4rudKPh z``eSHKd(+=R((HN+wXkB6;RP*&p%z`(hKLdWn$lJD-3^54)nWhuf*$jS$-$a-4_0b z-8<}Ez`?EQ?3c0O+mwB)E*F0^nlo!syQEF^j|)a2htB@cKbc;-E6~5=LdbG~#WOW4 z+2Yj2KhLpslDyTR{bbSHdI#Xi7N8j8Ej{~T3vfQPs_Mm?E&g6&$((Y1u}d9)znHJ{ zEv~X|Oa7yvlUHBx3UZwa@ws9p?mvAGi~o^#Q$DpExDBjD? z%6BZ=vu4Kf96i2CHbU%=7-zJuowRiaTm7WxGP{MBvOaI+VBLE2$Hb#g^lsK_+6wVc zexp6TQ6q70%-&yGe**ig@{}PyvreAHTyr;jdf2<=&h2r#HEteh>Th9SU7Y^dJwB!` z;@!NrtvB=eesY515Q5r1|7Uz#6!FV6zI6fc#1_>O*NBpo#FA92 0: - fields = [column_to_bq_schema(field) for field in col.fields] - kwargs = {"fields": fields} - - return google.cloud.bigquery.SchemaField(col.name, col.dtype, col.mode, - **kwargs) - - class BigQueryAdapter(BaseAdapter): RELATION_TYPES = { @@ -48,7 +35,7 @@ class BigQueryAdapter(BaseAdapter): } Relation = BigQueryRelation - Column = dbt.schema.BigQueryColumn + Column = BigQueryColumn ConnectionManager = BigQueryConnectionManager AdapterSpecificConfigs = frozenset({"cluster_by", "partition_by"}) @@ -65,13 +52,12 @@ def date_function(cls): def is_cancelable(cls): return False - def drop_relation(self, relation, model_name=None): - is_cached = self._schema_is_cached(relation.database, relation.schema, - model_name) + def drop_relation(self, relation): + is_cached = self._schema_is_cached(relation.database, relation.schema) if is_cached: self.cache.drop(relation) - conn = self.connections.get(model_name) + conn = self.connections.get_thread_connection() client = conn.handle dataset = self.connections.dataset(relation.database, relation.schema, @@ -79,32 +65,37 @@ def drop_relation(self, relation, model_name=None): relation_object = dataset.table(relation.identifier) client.delete_table(relation_object) - def truncate_relation(self, relation, model_name=None): + def truncate_relation(self, relation): raise dbt.exceptions.NotImplementedException( '`truncate` is not implemented for this adapter!' ) - def rename_relation(self, from_relation, to_relation, model_name=None): + def rename_relation(self, from_relation, to_relation): raise dbt.exceptions.NotImplementedException( '`rename_relation` is not implemented for this adapter!' ) - def list_schemas(self, database, model_name=None): - conn = self.connections.get(model_name) + @available + def list_schemas(self, database): + conn = self.connections.get_thread_connection() client = conn.handle - with self.connections.exception_handler('list dataset', conn.name): + with self.connections.exception_handler('list dataset'): all_datasets = client.list_datasets(project=database, include_all=True) return [ds.dataset_id for ds in all_datasets] - def get_columns_in_relation(self, relation, model_name=None): + @available + def check_schema_exists(self, database, schema): + superself = super(BigQueryAdapter, self) + return superself.check_schema_exists(database, schema) + + def get_columns_in_relation(self, relation): try: table = self.connections.get_bq_table( database=relation.database, schema=relation.schema, - identifier=relation.table_name, - conn_name=model_name + identifier=relation.table_name ) return self._get_dbt_columns_from_bq_table(table) @@ -112,17 +103,17 @@ def get_columns_in_relation(self, relation, model_name=None): logger.debug("get_columns_in_relation error: {}".format(e)) return [] - def expand_column_types(self, goal, current, model_name=None): + def expand_column_types(self, goal, current): # This is a no-op on BigQuery pass - def list_relations_without_caching(self, database, schema, - model_name=None): - connection = self.connections.get(model_name) + def list_relations_without_caching(self, information_schema, schema): + connection = self.connections.get_thread_connection() client = connection.handle - bigquery_dataset = self.connections.dataset(database, schema, - connection) + bigquery_dataset = self.connections.dataset( + information_schema.database, schema, connection + ) all_tables = client.list_tables( bigquery_dataset, @@ -140,18 +131,17 @@ def list_relations_without_caching(self, database, schema, # the implementation of list_relations for other adapters try: return [self._bq_table_to_relation(table) for table in all_tables] - except google.api_core.exceptions.NotFound as e: + except google.api_core.exceptions.NotFound: return [] - def get_relation(self, database, schema, identifier, model_name=None): - if self._schema_is_cached(database, schema, model_name): + def get_relation(self, database, schema, identifier): + if self._schema_is_cached(database, schema): # if it's in the cache, use the parent's model of going through # the relations cache and picking out the relation return super(BigQueryAdapter, self).get_relation( database=database, schema=schema, - identifier=identifier, - model_name=model_name + identifier=identifier ) try: @@ -160,16 +150,16 @@ def get_relation(self, database, schema, identifier, model_name=None): table = None return self._bq_table_to_relation(table) - def create_schema(self, database, schema, model_name=None): + def create_schema(self, database, schema): logger.debug('Creating schema "%s.%s".', database, schema) - self.connections.create_dataset(database, schema, model_name) + self.connections.create_dataset(database, schema) - def drop_schema(self, database, schema, model_name=None): + def drop_schema(self, database, schema): logger.debug('Dropping schema "%s.%s".', database, schema) - if not self.check_schema_exists(database, schema, model_name): + if not self.check_schema_exists(database, schema): return - self.connections.drop_dataset(database, schema, model_name) + self.connections.drop_dataset(database, schema) @classmethod def quote(cls, identifier): @@ -231,16 +221,14 @@ def _agate_to_schema(self, agate_table, column_override): def _materialize_as_view(self, model): model_database = model.get('database') model_schema = model.get('schema') - model_name = model.get('name') model_alias = model.get('alias') model_sql = model.get('injected_sql') - logger.debug("Model SQL ({}):\n{}".format(model_name, model_sql)) + logger.debug("Model SQL ({}):\n{}".format(model_alias, model_sql)) self.connections.create_view( database=model_database, schema=model_schema, table_name=model_alias, - conn_name=model_name, sql=model_sql ) return "CREATE VIEW" @@ -248,7 +236,6 @@ def _materialize_as_view(self, model): def _materialize_as_table(self, model, model_sql, decorator=None): model_database = model.get('database') model_schema = model.get('schema') - model_name = model.get('name') model_alias = model.get('alias') if decorator is None: @@ -260,7 +247,6 @@ def _materialize_as_table(self, model, model_sql, decorator=None): self.connections.create_table( database=model_database, schema=model_schema, - conn_name=model_name, table_name=table_name, sql=model_sql ) @@ -306,10 +292,10 @@ def warning_on_hooks(hook_type): dbt.ui.printer.COLOR_FG_YELLOW) @available - def add_query(self, sql, model_name=None, auto_begin=True, - bindings=None, abridge_sql_log=False): - if model_name in ['on-run-start', 'on-run-end']: - self.warning_on_hooks(model_name) + def add_query(self, sql, auto_begin=True, bindings=None, + abridge_sql_log=False): + if self.nice_connection_name() in ['on-run-start', 'on-run-end']: + self.warning_on_hooks(self.nice_connection_name()) else: raise dbt.exceptions.NotImplementedException( '`add_query` is not implemented for this adapter!') @@ -318,24 +304,24 @@ def add_query(self, sql, model_name=None, auto_begin=True, # Special bigquery adapter methods ### @available - def make_date_partitioned_table(self, relation, model_name=None): + def make_date_partitioned_table(self, relation): return self.connections.create_date_partitioned_table( database=relation.database, schema=relation.schema, - table_name=relation.identifier, - conn_name=model_name + table_name=relation.identifier ) @available def execute_model(self, model, materialization, sql_override=None, - decorator=None, model_name=None): + decorator=None): if sql_override is None: sql_override = model.get('injected_sql') if flags.STRICT_MODE: - connection = self.connections.get(model.get('name')) + connection = self.connections.get_thread_connection() assert isinstance(connection, Connection) + assert(connection.name == model.get('name')) if materialization == 'view': res = self._materialize_as_view(model) @@ -348,10 +334,10 @@ def execute_model(self, model, materialization, sql_override=None, return res @available - def create_temporary_table(self, sql, model_name=None, **kwargs): + def create_temporary_table(self, sql, **kwargs): # BQ queries always return a temp table with their results - query_job, _ = self.connections.raw_execute(sql, model_name) + query_job, _ = self.connections.raw_execute(sql) bq_table = query_job.destination return self.Relation.create( @@ -365,12 +351,12 @@ def create_temporary_table(self, sql, model_name=None, **kwargs): type=BigQueryRelation.Table) @available - def alter_table_add_columns(self, relation, columns, model_name=None): + def alter_table_add_columns(self, relation, columns): logger.debug('Adding columns ({}) to table {}".'.format( columns, relation)) - conn = self.connections.get(model_name) + conn = self.connections.get_thread_connection() client = conn.handle table_ref = self.connections.table_ref(relation.database, @@ -378,7 +364,7 @@ def alter_table_add_columns(self, relation, columns, model_name=None): relation.identifier, conn) table = client.get_table(table_ref) - new_columns = [column_to_bq_schema(col) for col in columns] + new_columns = [col.column_to_bq_schema() for col in columns] new_schema = table.schema + new_columns new_table = google.cloud.bigquery.Table(table_ref, schema=new_schema) @@ -386,9 +372,9 @@ def alter_table_add_columns(self, relation, columns, model_name=None): @available def load_dataframe(self, database, schema, table_name, agate_table, - column_override, model_name=None): + column_override): bq_schema = self._agate_to_schema(agate_table, column_override) - conn = self.connections.get(model_name) + conn = self.connections.get_thread_connection() client = conn.handle table = self.connections.table_ref(database, schema, table_name, conn) @@ -402,7 +388,7 @@ def load_dataframe(self, database, schema, table_name, agate_table, job_config=load_config) timeout = self.connections.get_timeout(conn) - with self.connections.exception_handler("LOAD TABLE", conn.name): + with self.connections.exception_handler("LOAD TABLE"): self.poll_until_job_completes(job, timeout) ### @@ -473,7 +459,7 @@ def _get_stats_columns(cls, table, relation_type): return zip(column_names, column_values) def get_catalog(self, manifest): - connection = self.connections.get('catalog') + connection = self.connections.get_thread_connection() client = connection.handle schemas = manifest.get_used_schemas() diff --git a/plugins/bigquery/dbt/adapters/bigquery/relation.py b/plugins/bigquery/dbt/adapters/bigquery/relation.py index a489512d607..e4f982b2cbe 100644 --- a/plugins/bigquery/dbt/adapters/bigquery/relation.py +++ b/plugins/bigquery/dbt/adapters/bigquery/relation.py @@ -1,6 +1,8 @@ -from dbt.adapters.base.relation import BaseRelation +from dbt.adapters.base.relation import BaseRelation, Column from dbt.utils import filter_null_values +import google.cloud.bigquery + class BigQueryRelation(BaseRelation): External = "external" @@ -107,3 +109,101 @@ def dataset(self): @property def identifier(self): return self.path.get('identifier') + + +class BigQueryColumn(Column): + TYPE_LABELS = { + 'STRING': 'STRING', + 'TIMESTAMP': 'TIMESTAMP', + 'FLOAT': 'FLOAT64', + 'INTEGER': 'INT64', + 'RECORD': 'RECORD', + } + + def __init__(self, column, dtype, fields=None, mode='NULLABLE'): + super(BigQueryColumn, self).__init__(column, dtype) + + if fields is None: + fields = [] + + self.fields = self.wrap_subfields(fields) + self.mode = mode + + @classmethod + def wrap_subfields(cls, fields): + return [BigQueryColumn.create_from_field(field) for field in fields] + + @classmethod + def create_from_field(cls, field): + return BigQueryColumn(field.name, cls.translate_type(field.field_type), + field.fields, field.mode) + + @classmethod + def _flatten_recursive(cls, col, prefix=None): + if prefix is None: + prefix = [] + + if len(col.fields) == 0: + prefixed_name = ".".join(prefix + [col.column]) + new_col = BigQueryColumn(prefixed_name, col.dtype, col.fields, + col.mode) + return [new_col] + + new_fields = [] + for field in col.fields: + new_prefix = prefix + [col.column] + new_fields.extend(cls._flatten_recursive(field, new_prefix)) + + return new_fields + + def flatten(self): + return self._flatten_recursive(self) + + @property + def quoted(self): + return '`{}`'.format(self.column) + + def literal(self, value): + return "cast({} as {})".format(value, self.dtype) + + @property + def data_type(self): + if self.dtype.upper() == 'RECORD': + subcols = [ + "{} {}".format(col.name, col.data_type) for col in self.fields + ] + field_type = 'STRUCT<{}>'.format(", ".join(subcols)) + + else: + field_type = self.dtype + + if self.mode.upper() == 'REPEATED': + return 'ARRAY<{}>'.format(field_type) + + else: + return field_type + + def is_string(self): + return self.dtype.lower() == 'string' + + def is_numeric(self): + return False + + def can_expand_to(self, other_column): + """returns True if both columns are strings""" + return self.is_string() and other_column.is_string() + + def __repr__(self): + return "".format(self.name, self.data_type, + self.mode) + + def column_to_bq_schema(self): + """Convert a column to a bigquery schema object. + """ + kwargs = {} + if len(self.fields) > 0: + fields = [field.column_to_bq_schema() for field in self.fields] + kwargs = {"fields": fields} + + return google.cloud.bigquery.SchemaField(self.name, self.dtype, + self.mode, **kwargs) diff --git a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql index d9700f3591a..1c87ce4dc18 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql @@ -59,11 +59,21 @@ {% endmacro %} -{% macro bigquery__list_relations_without_caching(database, schema) -%} - {{ return(adapter.list_relations_without_caching(database, schema)) }} +{% macro bigquery__list_relations_without_caching(information_schema, schema) -%} + {{ return(adapter.list_relations_without_caching(information_schema, schema)) }} {% endmacro %} {% macro bigquery__current_timestamp() -%} CURRENT_TIMESTAMP() {%- endmacro %} + + +{% macro bigquery__list_schemas(database) %} + {{ return(adapter.list_schemas()) }} +{% endmacro %} + + +{% macro bigquery__check_schema_exists(information_schema, schema) %} + {{ return(adapter.check_schema_exists(information_schema.database, schema)) }} +{% endmacro %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql index 5548b71a7e8..7a95f440f83 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/archive.sql @@ -4,11 +4,10 @@ {% endmacro %} -{% macro bigquery__archive_scd_hash() %} - to_hex(md5(concat(cast(`dbt_pk` as string), '|', cast(`dbt_updated_at` as string)))) +{% macro bigquery__archive_hash_arguments(args) %} + to_hex(md5(concat({% for arg in args %}coalesce(cast({{ arg }} as string), ''){% if not loop.last %}, '|',{% endif %}{% endfor %}))) {% endmacro %} - {% macro bigquery__create_columns(relation, columns) %} {{ adapter.alter_table_add_columns(relation, columns) }} {% endmacro %} @@ -16,8 +15,8 @@ {% macro bigquery__archive_update(target_relation, tmp_relation) %} update {{ target_relation }} as dest - set dest.{{ adapter.quote('valid_to') }} = tmp.{{ adapter.quote('valid_to') }} + set dest.dbt_valid_to = tmp.dbt_valid_to from {{ tmp_relation }} as tmp - where tmp.{{ adapter.quote('scd_id') }} = dest.{{ adapter.quote('scd_id') }} - and {{ adapter.quote('change_type') }} = 'update'; + where tmp.dbt_scd_id = dest.dbt_scd_id + and change_type = 'update'; {% endmacro %} diff --git a/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql b/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql index d561ea487c4..98c65a1dddc 100644 --- a/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql +++ b/plugins/bigquery/dbt/include/bigquery/macros/materializations/incremental.sql @@ -2,7 +2,6 @@ {% materialization incremental, adapter='bigquery' -%} {%- set unique_key = config.get('unique_key') -%} - {%- set sql_where = config.get('sql_where') -%} {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} @@ -34,12 +33,7 @@ {% set source_sql -%} {#-- wrap sql in parens to make it a subquery --#} ( - select * from ( - {{ sql }} - ) - {% if sql_where %} - where ({{ sql_where }}) or ({{ sql_where }}) is null - {% endif %} + {{ sql }} ) {%- endset -%} diff --git a/plugins/bigquery/setup.py b/plugins/bigquery/setup.py index d563e6b3692..1d21eba4e23 100644 --- a/plugins/bigquery/setup.py +++ b/plugins/bigquery/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-bigquery" -package_version = "0.13.0a1" +package_version = "0.13.0" description = """The bigquery adapter plugin for dbt (data build tool)""" diff --git a/plugins/postgres/dbt/adapters/postgres/__init__.py b/plugins/postgres/dbt/adapters/postgres/__init__.py index f139484e807..89d7b5edee4 100644 --- a/plugins/postgres/dbt/adapters/postgres/__init__.py +++ b/plugins/postgres/dbt/adapters/postgres/__init__.py @@ -1,4 +1,5 @@ -from dbt.adapters.postgres.connections import PostgresConnectionManager +# these are mostly just exports, #noqa them so flake8 will be happy +from dbt.adapters.postgres.connections import PostgresConnectionManager # noqa from dbt.adapters.postgres.connections import PostgresCredentials from dbt.adapters.postgres.impl import PostgresAdapter diff --git a/plugins/postgres/dbt/adapters/postgres/connections.py b/plugins/postgres/dbt/adapters/postgres/connections.py index 664d79ff541..360a130a936 100644 --- a/plugins/postgres/dbt/adapters/postgres/connections.py +++ b/plugins/postgres/dbt/adapters/postgres/connections.py @@ -61,7 +61,7 @@ class PostgresConnectionManager(SQLConnectionManager): TYPE = 'postgres' @contextmanager - def exception_handler(self, sql, connection_name='master'): + def exception_handler(self, sql): try: yield @@ -70,7 +70,7 @@ def exception_handler(self, sql, connection_name='master'): try: # attempt to release the connection - self.release(connection_name) + self.release() except psycopg2.Error: logger.debug("Failed to release connection!") pass @@ -81,7 +81,13 @@ def exception_handler(self, sql, connection_name='master'): except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") - self.release(connection_name) + self.release() + if isinstance(e, dbt.exceptions.RuntimeException): + # during a sql query, an internal to dbt exception was raised. + # this sounds a lot like a signal handler and probably has + # useful information, so raise it without modification. + raise + raise dbt.exceptions.RuntimeException(e) @classmethod @@ -90,7 +96,6 @@ def open(cls, connection): logger.debug('Connection is already open, skipping open.') return connection - base_credentials = connection.credentials credentials = cls.get_credentials(connection.credentials.incorporate()) kwargs = {} keepalives_idle = credentials.get('keepalives_idle', @@ -132,7 +137,7 @@ def cancel(self, connection): logger.debug("Cancelling query '{}' ({})".format(connection_name, pid)) - _, cursor = self.add_query(sql, 'master') + _, cursor = self.add_query(sql) res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res)) diff --git a/plugins/postgres/dbt/adapters/postgres/impl.py b/plugins/postgres/dbt/adapters/postgres/impl.py index a5b0087a711..88be130cabb 100644 --- a/plugins/postgres/dbt/adapters/postgres/impl.py +++ b/plugins/postgres/dbt/adapters/postgres/impl.py @@ -1,15 +1,8 @@ -import psycopg2 - -import time - -from dbt.adapters.base.meta import available_raw +from dbt.adapters.base.meta import available from dbt.adapters.sql import SQLAdapter from dbt.adapters.postgres import PostgresConnectionManager import dbt.compat import dbt.exceptions -import agate - -from dbt.logger import GLOBAL_LOGGER as logger # note that this isn't an adapter macro, so just a single underscore @@ -23,7 +16,7 @@ class PostgresAdapter(SQLAdapter): def date_function(cls): return 'now()' - @available_raw + @available def verify_database(self, database): database = database.strip('"') expected = self.config.credentials.database @@ -56,16 +49,26 @@ def _link_cached_database_relations(self, schemas): if refed_schema.lower() in schemas: self.cache.add_link(dependent, referenced) + def _get_cache_schemas(self, manifest, exec_only=False): + # postgres/redshift only allow one database (the main one) + superself = super(PostgresAdapter, self) + schemas = superself._get_cache_schemas(manifest, exec_only=exec_only) + try: + return schemas.flatten() + except dbt.exceptions.RuntimeException as exc: + dbt.exceptions.raise_compiler_error( + 'Cross-db references not allowed in adapter {}: Got {}'.format( + self.type(), exc.msg + ) + ) + def _link_cached_relations(self, manifest): schemas = set() for db, schema in manifest.get_used_schemas(): self.verify_database(db) schemas.add(schema) - try: - self._link_cached_database_relations(schemas) - finally: - self.release_connection(GET_RELATIONS_MACRO_NAME) + self._link_cached_database_relations(schemas) def _relations_cache_for_schemas(self, manifest): super(PostgresAdapter, self)._relations_cache_for_schemas(manifest) diff --git a/plugins/postgres/dbt/include/postgres/macros/adapters.sql b/plugins/postgres/dbt/include/postgres/macros/adapters.sql index b51df9f42c5..0bda7fc9ad4 100644 --- a/plugins/postgres/dbt/include/postgres/macros/adapters.sql +++ b/plugins/postgres/dbt/include/postgres/macros/adapters.sql @@ -26,7 +26,7 @@ numeric_precision, numeric_scale - from {{ information_schema_name(relation.database) }}.columns + from {{ relation.information_schema('columns') }} where table_name = '{{ relation.identifier }}' {% if relation.schema %} and table_schema = '{{ relation.schema }}' @@ -39,10 +39,10 @@ {% endmacro %} -{% macro postgres__list_relations_without_caching(database, schema) %} +{% macro postgres__list_relations_without_caching(information_schema, schema) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} select - '{{ database }}' as database, + '{{ information_schema.database.lower() }}' as database, tablename as name, schemaname as schema, 'table' as type @@ -50,7 +50,7 @@ where schemaname ilike '{{ schema }}' union all select - '{{ database }}' as database, + '{{ information_schema.database.lower() }}' as database, viewname as name, schemaname as schema, 'view' as type @@ -77,9 +77,9 @@ {{ return(load_result('list_schemas').table) }} {% endmacro %} -{% macro postgres__check_schema_exists(database, schema) -%} +{% macro postgres__check_schema_exists(information_schema, schema) -%} {% if database -%} - {{ adapter.verify_database(database) }} + {{ adapter.verify_database(information_schema.database) }} {%- endif -%} {% call statement('check_schema_exists', fetch_result=True, auto_begin=False) %} select count(*) from pg_namespace where nspname = '{{ schema }}' diff --git a/plugins/postgres/dbt/include/postgres/macros/catalog.sql b/plugins/postgres/dbt/include/postgres/macros/catalog.sql index e04e521ea94..3558f3ff649 100644 --- a/plugins/postgres/dbt/include/postgres/macros/catalog.sql +++ b/plugins/postgres/dbt/include/postgres/macros/catalog.sql @@ -1,11 +1,11 @@ -{% macro postgres__get_catalog() -%} +{% macro postgres__get_catalog(information_schemas) -%} {%- call statement('catalog', fetch_result=True) -%} - {% if (databases | length) != 1 %} - exceptions.raise_compiler_error('postgres get_catalog requires exactly one database') + {% if (information_schemas | length) != 1 %} + {{ exceptions.raise_compiler_error('postgres get_catalog requires exactly one database') }} {% endif %} - {% set database = databases[0] %} + {% set database = information_schemas[0].database %} {{ adapter.verify_database(database) }} with table_owners as ( diff --git a/plugins/postgres/setup.py b/plugins/postgres/setup.py index d18988e6cf5..9ea197201f0 100644 --- a/plugins/postgres/setup.py +++ b/plugins/postgres/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-postgres" -package_version = "0.13.0a1" +package_version = "0.13.0" description = """The postgres adpter plugin for dbt (data build tool)""" setup( diff --git a/plugins/redshift/dbt/adapters/redshift/__init__.py b/plugins/redshift/dbt/adapters/redshift/__init__.py index 336eb55d17d..b26665a8ddf 100644 --- a/plugins/redshift/dbt/adapters/redshift/__init__.py +++ b/plugins/redshift/dbt/adapters/redshift/__init__.py @@ -1,4 +1,4 @@ -from dbt.adapters.redshift.connections import RedshiftConnectionManager +from dbt.adapters.redshift.connections import RedshiftConnectionManager # noqa from dbt.adapters.redshift.connections import RedshiftCredentials from dbt.adapters.redshift.impl import RedshiftAdapter diff --git a/plugins/redshift/dbt/adapters/redshift/connections.py b/plugins/redshift/dbt/adapters/redshift/connections.py index fe85a0e1858..d5450a117fc 100644 --- a/plugins/redshift/dbt/adapters/redshift/connections.py +++ b/plugins/redshift/dbt/adapters/redshift/connections.py @@ -95,16 +95,16 @@ def fresh_transaction(self, name=None): """ with drop_lock: - connection = self.get(name) + connection = self.get_thread_connection() if connection.transaction_open: - self.commit(connection) + self.commit() - self.begin(connection.name) + self.begin() yield - self.commit(connection) - self.begin(connection.name) + self.commit() + self.begin() @classmethod def fetch_cluster_credentials(cls, db_user, db_name, cluster_id, @@ -123,8 +123,8 @@ def fetch_cluster_credentials(cls, db_user, db_name, cluster_id, except boto_client.exceptions.ClientError as e: raise dbt.exceptions.FailedToConnectException( - "Unable to get temporary Redshift cluster credentials: " - "{}".format(e)) + "Unable to get temporary Redshift cluster credentials: {}" + .format(e)) @classmethod def get_tmp_iam_cluster_credentials(cls, credentials): @@ -136,8 +136,8 @@ def get_tmp_iam_cluster_credentials(cls, credentials): if not cluster_id: raise dbt.exceptions.FailedToConnectException( - "'cluster_id' must be provided in profile if IAM " - "authentication method selected") + "'cluster_id' must be provided in profile if IAM " + "authentication method selected") cluster_creds = cls.fetch_cluster_credentials( credentials.user, @@ -167,4 +167,4 @@ def get_credentials(cls, credentials): else: raise dbt.exceptions.FailedToConnectException( - "Invalid 'method' in profile: '{}'".format(method)) + "Invalid 'method' in profile: '{}'".format(method)) diff --git a/plugins/redshift/dbt/adapters/redshift/impl.py b/plugins/redshift/dbt/adapters/redshift/impl.py index 08f0dcff0e4..50934fba862 100644 --- a/plugins/redshift/dbt/adapters/redshift/impl.py +++ b/plugins/redshift/dbt/adapters/redshift/impl.py @@ -1,7 +1,6 @@ from dbt.adapters.postgres import PostgresAdapter from dbt.adapters.redshift import RedshiftConnectionManager from dbt.logger import GLOBAL_LOGGER as logger # noqa -import dbt.exceptions class RedshiftAdapter(PostgresAdapter): @@ -13,7 +12,7 @@ class RedshiftAdapter(PostgresAdapter): def date_function(cls): return 'getdate()' - def drop_relation(self, relation, model_name=None): + def drop_relation(self, relation): """ In Redshift, DROP TABLE ... CASCADE should not be used inside a transaction. Redshift doesn't prevent the CASCADE @@ -28,9 +27,9 @@ def drop_relation(self, relation, model_name=None): https://docs.aws.amazon.com/redshift/latest/dg/r_DROP_TABLE.html """ - with self.connections.fresh_transaction(model_name): + with self.connections.fresh_transaction(): parent = super(RedshiftAdapter, self) - return parent.drop_relation(relation, model_name) + return parent.drop_relation(relation) @classmethod def convert_text_type(cls, agate_table, col_idx): diff --git a/plugins/redshift/dbt/include/redshift/__init__.py b/plugins/redshift/dbt/include/redshift/__init__.py index 8b17c9fbfc5..b177e5d4932 100644 --- a/plugins/redshift/dbt/include/redshift/__init__.py +++ b/plugins/redshift/dbt/include/redshift/__init__.py @@ -1,3 +1,3 @@ import os -from dbt.include.postgres import PACKAGE_PATH as POSTGRES_PACKAGE_PATH + PACKAGE_PATH = os.path.dirname(__file__) diff --git a/plugins/redshift/dbt/include/redshift/macros/adapters.sql b/plugins/redshift/dbt/include/redshift/macros/adapters.sql index 7fd7063bcf5..29f6ad0b16f 100644 --- a/plugins/redshift/dbt/include/redshift/macros/adapters.sql +++ b/plugins/redshift/dbt/include/redshift/macros/adapters.sql @@ -62,7 +62,7 @@ {{ column_list_for_create_table(columns) }} ) {{ dist('dbt_updated_at') }} - {{ sort('compound', ['scd_id']) }}; + {{ sort('compound', ['dbt_scd_id']) }}; {%- endmacro %} @@ -88,7 +88,7 @@ numeric_precision, numeric_scale - from information_schema.columns + from {{ relation.information_schema('columns') }} where table_name = '{{ relation.identifier }}' ), @@ -153,8 +153,8 @@ {% endmacro %} -{% macro redshift__list_relations_without_caching(database, schema) %} - {{ return(postgres__list_relations_without_caching(database, schema)) }} +{% macro redshift__list_relations_without_caching(information_schema, schema) %} + {{ return(postgres__list_relations_without_caching(information_schema, schema)) }} {% endmacro %} @@ -168,8 +168,8 @@ {%- endmacro %} -{% macro redshift__check_schema_exists(database, schema) -%} - {{ return(postgres__check_schema_exists(database, schema)) }} +{% macro redshift__check_schema_exists(information_schema, schema) -%} + {{ return(postgres__check_schema_exists(information_schema, schema)) }} {%- endmacro %} list_schemas diff --git a/plugins/redshift/dbt/include/redshift/macros/catalog.sql b/plugins/redshift/dbt/include/redshift/macros/catalog.sql index 34529df8b02..c6788d9c66c 100644 --- a/plugins/redshift/dbt/include/redshift/macros/catalog.sql +++ b/plugins/redshift/dbt/include/redshift/macros/catalog.sql @@ -1,10 +1,10 @@ -{% macro redshift__get_base_catalog() -%} +{% macro redshift__get_base_catalog(information_schemas) -%} {%- call statement('base_catalog', fetch_result=True) -%} - {% if (databases | length) != 1 %} - exceptions.raise_compiler_error('redshift get_catalog requires exactly one database') + {% if (information_schemas | length) != 1 %} + {{ exceptions.raise_compiler_error('redshift get_catalog requires exactly one database') }} {% endif %} - {% set database = databases[0] %} + {% set database = information_schemas[0].database %} {{ adapter.verify_database(database) }} with late_binding as ( @@ -106,7 +106,7 @@ {{ return(load_result('base_catalog').table) }} {%- endmacro %} -{% macro redshift__get_extended_catalog() %} +{% macro redshift__get_extended_catalog(information_schemas) %} {%- call statement('extended_catalog', fetch_result=True) -%} select @@ -218,12 +218,12 @@ {% endmacro %} -{% macro redshift__get_catalog() %} +{% macro redshift__get_catalog(information_schemas) %} {#-- Compute a left-outer join in memory. Some Redshift queries are -- leader-only, and cannot be joined to other compute-based queries #} - {% set catalog = redshift__get_base_catalog() %} + {% set catalog = redshift__get_base_catalog(information_schemas) %} {% set select_extended = redshift__can_select_from('svv_table_info') %} {% if select_extended %} diff --git a/plugins/redshift/setup.py b/plugins/redshift/setup.py index be5bc45d896..4f7d5829141 100644 --- a/plugins/redshift/setup.py +++ b/plugins/redshift/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-redshift" -package_version = "0.13.0a1" +package_version = "0.13.0" description = """The redshift adapter plugin for dbt (data build tool)""" diff --git a/plugins/snowflake/dbt/adapters/snowflake/__init__.py b/plugins/snowflake/dbt/adapters/snowflake/__init__.py index 1ac7dcbdf2f..40f8aee62d4 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/__init__.py +++ b/plugins/snowflake/dbt/adapters/snowflake/__init__.py @@ -1,6 +1,6 @@ -from dbt.adapters.snowflake.connections import SnowflakeConnectionManager +from dbt.adapters.snowflake.connections import SnowflakeConnectionManager # noqa from dbt.adapters.snowflake.connections import SnowflakeCredentials -from dbt.adapters.snowflake.relation import SnowflakeRelation +from dbt.adapters.snowflake.relation import SnowflakeRelation # noqa from dbt.adapters.snowflake.impl import SnowflakeAdapter from dbt.adapters.base import AdapterPlugin diff --git a/plugins/snowflake/dbt/adapters/snowflake/connections.py b/plugins/snowflake/dbt/adapters/snowflake/connections.py index c7f117a060c..a2116e9c734 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/connections.py +++ b/plugins/snowflake/dbt/adapters/snowflake/connections.py @@ -72,7 +72,7 @@ class SnowflakeConnectionManager(SQLConnectionManager): TYPE = 'snowflake' @contextmanager - def exception_handler(self, sql, connection_name='master'): + def exception_handler(self, sql): try: yield except snowflake.connector.errors.ProgrammingError as e: @@ -83,7 +83,7 @@ def exception_handler(self, sql, connection_name='master'): if 'Empty SQL statement' in msg: logger.debug("got empty sql statement, moving on") elif 'This session does not have a current database' in msg: - self.release(connection_name) + self.release() raise dbt.exceptions.FailedToConnectException( ('{}\n\nThis error sometimes occurs when invalid ' 'credentials are provided, or when your default role ' @@ -91,12 +91,17 @@ def exception_handler(self, sql, connection_name='master'): 'Please double check your profile and try again.') .format(msg)) else: - self.release(connection_name) + self.release() raise dbt.exceptions.DatabaseException(msg) except Exception as e: logger.debug("Error running SQL: %s", sql) logger.debug("Rolling back transaction.") - self.release(connection_name) + self.release() + if isinstance(e, dbt.exceptions.RuntimeException): + # during a sql query, an internal to dbt exception was raised. + # this sounds a lot like a signal handler and probably has + # useful information, so raise it without modification. + raise raise dbt.exceptions.RuntimeException(e.msg) @classmethod @@ -141,8 +146,6 @@ def open(cls, connection): raise dbt.exceptions.FailedToConnectException(str(e)) - return connection - def cancel(self, connection): handle = connection.handle sid = handle.session_id @@ -153,7 +156,7 @@ def cancel(self, connection): logger.debug("Cancelling query '{}' ({})".format(connection_name, sid)) - _, cursor = self.add_query(sql, 'master') + _, cursor = self.add_query(sql) res = cursor.fetchone() logger.debug("Cancel query '{}': {}".format(connection_name, res)) @@ -193,7 +196,7 @@ def _get_private_key(cls, private_key_path, private_key_passphrase): format=serialization.PrivateFormat.PKCS8, encryption_algorithm=serialization.NoEncryption()) - def add_query(self, sql, model_name=None, auto_begin=True, + def add_query(self, sql, auto_begin=True, bindings=None, abridge_sql_log=False): connection = None @@ -219,21 +222,24 @@ def add_query(self, sql, model_name=None, auto_begin=True, parent = super(SnowflakeConnectionManager, self) connection, cursor = parent.add_query( - individual_query, model_name, auto_begin, + individual_query, auto_begin, bindings=bindings, abridge_sql_log=abridge_sql_log ) if cursor is None: raise dbt.exceptions.RuntimeException( - "Tried to run an empty query on model '{}'. If you are " - "conditionally running\nsql, eg. in a model hook, make " - "sure your `else` clause contains valid sql!\n\n" - "Provided SQL:\n{}".format(model_name, sql)) + "Tried to run an empty query on model '{}'. If you are " + "conditionally running\nsql, eg. in a model hook, make " + "sure your `else` clause contains valid sql!\n\n" + "Provided SQL:\n{}" + .format(self.nice_connection_name(), sql) + ) return connection, cursor - def _rollback_handle(self, connection): + @classmethod + def _rollback_handle(cls, connection): """On snowflake, rolling back the handle of an aborted session raises an exception. """ diff --git a/plugins/snowflake/dbt/adapters/snowflake/impl.py b/plugins/snowflake/dbt/adapters/snowflake/impl.py index 00da5026482..c6df92f2ee0 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/impl.py +++ b/plugins/snowflake/dbt/adapters/snowflake/impl.py @@ -1,12 +1,8 @@ from __future__ import absolute_import -import dbt.compat -import dbt.exceptions - from dbt.adapters.sql import SQLAdapter from dbt.adapters.snowflake import SnowflakeConnectionManager from dbt.adapters.snowflake import SnowflakeRelation -from dbt.logger import GLOBAL_LOGGER as logger from dbt.utils import filter_null_values diff --git a/plugins/snowflake/dbt/adapters/snowflake/relation.py b/plugins/snowflake/dbt/adapters/snowflake/relation.py index a494fb89363..e89b71d89cf 100644 --- a/plugins/snowflake/dbt/adapters/snowflake/relation.py +++ b/plugins/snowflake/dbt/adapters/snowflake/relation.py @@ -1,5 +1,4 @@ from dbt.adapters.base.relation import BaseRelation -import dbt.utils class SnowflakeRelation(BaseRelation): @@ -9,7 +8,7 @@ class SnowflakeRelation(BaseRelation): }, 'quote_character': '"', 'quote_policy': { - 'database': True, + 'database': False, 'schema': False, 'identifier': False, }, diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index c608f496916..a339bb54b12 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -48,7 +48,7 @@ numeric_scale from - {{ information_schema_name(relation.database) }}.columns + {{ relation.information_schema('columns') }} where table_name ilike '{{ relation.identifier }}' {% if relation.schema %} @@ -67,7 +67,7 @@ {% endmacro %} -{% macro snowflake__list_relations_without_caching(database, schema) %} +{% macro snowflake__list_relations_without_caching(information_schema, schema) %} {% call statement('list_relations_without_caching', fetch_result=True) -%} select table_catalog as database, @@ -77,20 +77,20 @@ when table_type = 'VIEW' then 'view' else table_type end as table_type - from {{ information_schema_name(database) }}.tables + from {{ information_schema }}.tables where table_schema ilike '{{ schema }}' - and table_catalog ilike '{{ database }}' + and table_catalog ilike '{{ information_schema.database.lower() }}' {% endcall %} {{ return(load_result('list_relations_without_caching').table) }} {% endmacro %} -{% macro snowflake__check_schema_exists(database, schema) -%} +{% macro snowflake__check_schema_exists(information_schema, schema) -%} {% call statement('check_schema_exists', fetch_result=True) -%} select count(*) - from {{ information_schema_name(database) }}.schemata + from {{ information_schema }}.schemata where upper(schema_name) = upper('{{ schema }}') - and upper(catalog_name) = upper('{{ database }}') + and upper(catalog_name) = upper('{{ information_schema.database }}') {%- endcall %} {{ return(load_result('check_schema_exists').table) }} {%- endmacro %} @@ -98,3 +98,10 @@ {% macro snowflake__current_timestamp() -%} convert_timezone('UTC', current_timestamp()) {%- endmacro %} + + +{% macro snowflake__rename_relation(from_relation, to_relation) -%} + {% call statement('rename_relation') -%} + alter table {{ from_relation }} rename to {{ to_relation }} + {%- endcall %} +{% endmacro %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql b/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql index d235e687607..fe68cd2e46e 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/catalog.sql @@ -1,8 +1,8 @@ -{% macro snowflake__get_catalog() -%} +{% macro snowflake__get_catalog(information_schemas) -%} {%- call statement('catalog', fetch_result=True) -%} - {% for database in databases %} + {% for information_schema in information_schemas %} ( with tables as ( @@ -31,7 +31,7 @@ 'Approximate size of the table as reported by Snowflake' as "stats:bytes:description", (bytes is not null) as "stats:bytes:include" - from {{ information_schema_name(database) }}.tables + from {{ information_schema }}.tables ), @@ -48,7 +48,7 @@ data_type as "column_type", null as "column_comment" - from {{ adapter.quote_as_configured(database, "database") }}.information_schema.columns + from {{ information_schema }}.columns ) @@ -56,7 +56,6 @@ from tables join columns using ("table_database", "table_schema", "table_name") where "table_schema" != 'INFORMATION_SCHEMA' - and "table_database" = {{ adapter.quote_as_configured(database, "database").replace('"', "'") }} order by "column_index" ) {% if not loop.last %} union all {% endif %} diff --git a/plugins/snowflake/setup.py b/plugins/snowflake/setup.py index ed3c1afc63d..aa28d30ff49 100644 --- a/plugins/snowflake/setup.py +++ b/plugins/snowflake/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup package_name = "dbt-snowflake" -package_version = "0.13.0a1" +package_version = "0.13.0" description = """The snowflake adapter plugin for dbt (data build tool)""" diff --git a/scripts/build-sdists.sh b/scripts/build-sdists.sh new file mode 100755 index 00000000000..8d0db621b23 --- /dev/null +++ b/scripts/build-sdists.sh @@ -0,0 +1,23 @@ +#!/bin/bash -eo pipefail + +DBT_PATH="$( cd "$(dirname "$0")/.." ; pwd -P )" + +echo $SCRIPTPATH + +set -x + +rm -rf "$DBT_PATH"/dist +mkdir -p "$DBT_PATH"/dist + +for SUBPATH in core plugins/postgres plugins/redshift plugins/bigquery plugins/snowflake +do + rm -rf "$DBT_PATH"/"$SUBPATH"/dist + cd "$DBT_PATH"/"$SUBPATH" + python setup.py sdist + cp -r "$DBT_PATH"/"$SUBPATH"/dist/* "$DBT_PATH"/dist/ +done + +cd "$DBT_PATH" +python setup.py sdist + +set +x diff --git a/setup.py b/setup.py index ffa6cef5164..a1335fb0b2f 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ def read(fname): package_name = "dbt" -package_version = "0.13.0a1" +package_version = "0.13.0" description = """dbt (data build tool) is a command line tool that helps \ analysts and engineers transform data in their warehouse more effectively""" diff --git a/test.env.sample b/test.env.sample index 8cb09b20ef3..163e618f877 100644 --- a/test.env.sample +++ b/test.env.sample @@ -18,6 +18,6 @@ BIGQUERY_CLIENT_X509_CERT_URL= REDSHIFT_TEST_HOST= REDSHIFT_TEST_USER= -REDSHIFT_TEST_PASSWORD= +REDSHIFT_TEST_PASS= REDSHIFT_TEST_PORT= REDSHIFT_TEST_DBNAME= diff --git a/test/integration/001_simple_copy_test/test_simple_copy.py b/test/integration/001_simple_copy_test/test_simple_copy.py index bbe9349e439..281c64d1299 100644 --- a/test/integration/001_simple_copy_test/test_simple_copy.py +++ b/test/integration/001_simple_copy_test/test_simple_copy.py @@ -1,4 +1,3 @@ -from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, use_profile diff --git a/test/integration/004_simple_archive_test/invalidate_bigquery.sql b/test/integration/004_simple_archive_test/invalidate_bigquery.sql index eab9c56999e..32ac3f0eceb 100644 --- a/test/integration/004_simple_archive_test/invalidate_bigquery.sql +++ b/test/integration/004_simple_archive_test/invalidate_bigquery.sql @@ -8,5 +8,5 @@ where id >= 10 and id <= 20; -- invalidate records 11 - 21 update {database}.{schema}.archive_expected set - valid_to = timestamp_add(updated_at, interval 1 hour) + dbt_valid_to = timestamp_add(updated_at, interval 1 hour) where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/invalidate_postgres.sql b/test/integration/004_simple_archive_test/invalidate_postgres.sql index 30902625059..491afccfac2 100644 --- a/test/integration/004_simple_archive_test/invalidate_postgres.sql +++ b/test/integration/004_simple_archive_test/invalidate_postgres.sql @@ -1,12 +1,27 @@ -- update records 11 - 21. Change email and updated_at field update {schema}.seed set - "updated_at" = "updated_at" + interval '1 hour', - "email" = 'new_' || "email" -where "id" >= 10 and "id" <= 20; + updated_at = updated_at + interval '1 hour', + email = 'new_' || email +where id >= 10 and id <= 20; -- invalidate records 11 - 21 update {schema}.archive_expected set - "valid_to" = "updated_at" + interval '1 hour' -where "id" >= 10 and "id" <= 20; + dbt_valid_to = updated_at + interval '1 hour' +where id >= 10 and id <= 20; + + +update {schema}.archive_castillo_expected set + dbt_valid_to = updated_at + interval '1 hour' +where id >= 10 and id <= 20; + + +update {schema}.archive_alvarez_expected set + dbt_valid_to = updated_at + interval '1 hour' +where id >= 10 and id <= 20; + + +update {schema}.archive_kelly_expected set + dbt_valid_to = updated_at + interval '1 hour' +where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/invalidate_snowflake.sql b/test/integration/004_simple_archive_test/invalidate_snowflake.sql index 4e45144a611..86e3a3c7a40 100644 --- a/test/integration/004_simple_archive_test/invalidate_snowflake.sql +++ b/test/integration/004_simple_archive_test/invalidate_snowflake.sql @@ -1,12 +1,12 @@ -- update records 11 - 21. Change email and updated_at field update {database}.{schema}.seed set - "updated_at" = DATEADD(hour, 1, "updated_at"), - "email" = 'new_' || "email" -where "id" >= 10 and "id" <= 20; + updated_at = DATEADD(hour, 1, updated_at), + email = 'new_' || email +where id >= 10 and id <= 20; -- invalidate records 11 - 21 update {database}.{schema}.archive_expected set - "valid_to" = DATEADD(hour, 1, "updated_at") -where "id" >= 10 and "id" <= 20; + dbt_valid_to = DATEADD(hour, 1, updated_at) +where id >= 10 and id <= 20; diff --git a/test/integration/004_simple_archive_test/models/ref_archive.sql b/test/integration/004_simple_archive_test/models/ref_archive.sql new file mode 100644 index 00000000000..5e92ea1da6e --- /dev/null +++ b/test/integration/004_simple_archive_test/models/ref_archive.sql @@ -0,0 +1 @@ +select * from {{ ref('archive_actual') }} diff --git a/test/integration/004_simple_archive_test/seed.sql b/test/integration/004_simple_archive_test/seed.sql index 6113209244b..9edda7a3565 100644 --- a/test/integration/004_simple_archive_test/seed.sql +++ b/test/integration/004_simple_archive_test/seed.sql @@ -1,32 +1,32 @@ -create table {database}.{schema}.seed ( - "id" INTEGER, - "first_name" VARCHAR(50), - "last_name" VARCHAR(50), - "email" VARCHAR(50), - "gender" VARCHAR(50), - "ip_address" VARCHAR(20), - "updated_at" TIMESTAMP WITHOUT TIME ZONE + create table {database}.{schema}.seed ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + updated_at TIMESTAMP WITHOUT TIME ZONE ); create table {database}.{schema}.archive_expected ( - "id" INTEGER, - "first_name" VARCHAR(50), - "last_name" VARCHAR(50), - "email" VARCHAR(50), - "gender" VARCHAR(50), - "ip_address" VARCHAR(20), + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), -- archival fields - "updated_at" TIMESTAMP WITHOUT TIME ZONE, - "valid_from" TIMESTAMP WITHOUT TIME ZONE, - "valid_to" TIMESTAMP WITHOUT TIME ZONE, - "scd_id" VARCHAR(255), - "dbt_updated_at" TIMESTAMP WITHOUT TIME ZONE + updated_at TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, + dbt_scd_id VARCHAR(255), + dbt_updated_at TIMESTAMP WITHOUT TIME ZONE ); -- seed inserts -insert into {database}.{schema}.seed ("id", "first_name", "last_name", "email", "gender", "ip_address", "updated_at") values +insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values (1, 'Judith', 'Kennedy', 'jkennedy0@phpbb.com', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), (2, 'Arthur', 'Kelly', 'akelly1@eepurl.com', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), (3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), @@ -51,30 +51,171 @@ insert into {database}.{schema}.seed ("id", "first_name", "last_name", "email", -- populate archive table insert into {database}.{schema}.archive_expected ( - "id", - "first_name", - "last_name", - "email", - "gender", - "ip_address", - "updated_at", - "valid_from", - "valid_to", - "dbt_updated_at", - "scd_id" + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id ) select - "id", - "first_name", - "last_name", - "email", - "gender", - "ip_address", - "updated_at", + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, -- fields added by archival - "updated_at" as valid_from, - null::timestamp as valid_to, - "updated_at" as dbt_updated_at, - md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as scd_id + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed; + + + +create table {database}.{schema}.archive_castillo_expected ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- archival fields + updated_at TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, + dbt_scd_id VARCHAR(255), + dbt_updated_at TIMESTAMP WITHOUT TIME ZONE +); + +-- one entry +insert into {database}.{schema}.archive_castillo_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed where last_name = 'Castillo'; + +create table {database}.{schema}.archive_alvarez_expected ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- archival fields + updated_at TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, + dbt_scd_id VARCHAR(255), + dbt_updated_at TIMESTAMP WITHOUT TIME ZONE +); + +-- 0 entries +insert into {database}.{schema}.archive_alvarez_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed where last_name = 'Alvarez'; + +create table {database}.{schema}.archive_kelly_expected ( + id INTEGER, + first_name VARCHAR(50), + last_name VARCHAR(50), + email VARCHAR(50), + gender VARCHAR(50), + ip_address VARCHAR(20), + + -- archival fields + updated_at TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_from TIMESTAMP WITHOUT TIME ZONE, + dbt_valid_to TIMESTAMP WITHOUT TIME ZONE, + dbt_scd_id VARCHAR(255), + dbt_updated_at TIMESTAMP WITHOUT TIME ZONE +); + + +-- 2 entries +insert into {database}.{schema}.archive_kelly_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed where last_name = 'Kelly'; diff --git a/test/integration/004_simple_archive_test/seed_bq.sql b/test/integration/004_simple_archive_test/seed_bq.sql index 17b45f1e945..9ef91799604 100644 --- a/test/integration/004_simple_archive_test/seed_bq.sql +++ b/test/integration/004_simple_archive_test/seed_bq.sql @@ -1,32 +1,32 @@ create table {database}.{schema}.seed ( - `id` INT64, - `first_name` STRING, - `last_name` STRING, - `email` STRING, - `gender` STRING, - `ip_address` STRING, - `updated_at` TIMESTAMP + id INT64, + first_name STRING, + last_name STRING, + email STRING, + gender STRING, + ip_address STRING, + updated_at TIMESTAMP ); create table {database}.{schema}.archive_expected ( - `id` INT64, - `first_name` STRING, - `last_name` STRING, - `email` STRING, - `gender` STRING, - `ip_address` STRING, + id INT64, + first_name STRING, + last_name STRING, + email STRING, + gender STRING, + ip_address STRING, -- archival fields - `updated_at` TIMESTAMP, - `valid_from` TIMESTAMP, - `valid_to` TIMESTAMP, - `scd_id` STRING, - `dbt_updated_at` TIMESTAMP + updated_at TIMESTAMP, + dbt_valid_from TIMESTAMP, + dbt_valid_to TIMESTAMP, + dbt_scd_id STRING, + dbt_updated_at TIMESTAMP ); -- seed inserts -insert {database}.{schema}.seed (`id`, `first_name`, `last_name`, `email`, `gender`, `ip_address`, `updated_at`) values +insert {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values (1, 'Judith', 'Kennedy', 'jkennedy0@phpbb.com', 'Female', '54.60.24.128', '2015-12-24 12:19:28'), (2, 'Arthur', 'Kelly', 'akelly1@eepurl.com', 'Male', '62.56.24.215', '2015-10-28 16:22:15'), (3, 'Rachel', 'Moreno', 'rmoreno2@msu.edu', 'Female', '31.222.249.23', '2016-04-05 02:05:30'), @@ -51,31 +51,31 @@ insert {database}.{schema}.seed (`id`, `first_name`, `last_name`, `email`, `gend -- populate archive table insert {database}.{schema}.archive_expected ( - `id`, - `first_name`, - `last_name`, - `email`, - `gender`, - `ip_address`, - `updated_at`, - `valid_from`, - `valid_to`, - `dbt_updated_at`, - `scd_id` + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id ) select - `id`, - `first_name`, - `last_name`, - `email`, - `gender`, - `ip_address`, - `updated_at`, + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, -- fields added by archival - `updated_at` as valid_from, - cast(null as timestamp) as valid_to, - `updated_at` as dbt_updated_at, - to_hex(md5(concat(cast(`id` as string), '-', `first_name`, '|', cast(`updated_at` as string)))) as scd_id + updated_at as dbt_valid_from, + cast(null as timestamp) as dbt_valid_to, + updated_at as dbt_updated_at, + to_hex(md5(concat(cast(id as string), '-', first_name, '|', cast(updated_at as string)))) as dbt_scd_id from {database}.{schema}.seed; diff --git a/test/integration/004_simple_archive_test/test-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql new file mode 100644 index 00000000000..d7dec9d043e --- /dev/null +++ b/test/integration/004_simple_archive_test/test-archives-bq/archive.sql @@ -0,0 +1,14 @@ +{% archive archive_actual %} + + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='concat(cast(id as string) , "-", first_name)', + strategy='timestamp', + updated_at='updated_at', + ) + }} + select * from `{{database}}`.`{{schema}}`.seed + +{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql new file mode 100644 index 00000000000..3bbe49664c1 --- /dev/null +++ b/test/integration/004_simple_archive_test/test-archives-invalid/archive.sql @@ -0,0 +1,12 @@ +{% archive no_target_database %} + {{ + config( + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='timestamp', + updated_at='updated_at', + ) + }} + select * from {{database}}.{{schema}}.seed + +{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-pg/archive.sql b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql new file mode 100644 index 00000000000..9117a8df1a4 --- /dev/null +++ b/test/integration/004_simple_archive_test/test-archives-pg/archive.sql @@ -0,0 +1,14 @@ +{% archive archive_actual %} + + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='timestamp', + updated_at='updated_at', + ) + }} + select * from {{database}}.{{schema}}.seed + +{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-archives-select/archives.sql b/test/integration/004_simple_archive_test/test-archives-select/archives.sql new file mode 100644 index 00000000000..30e78fe720d --- /dev/null +++ b/test/integration/004_simple_archive_test/test-archives-select/archives.sql @@ -0,0 +1,45 @@ +{% archive archive_castillo %} + + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='timestamp', + updated_at='updated_at', + ) + }} + select * from {{database}}.{{schema}}.seed where last_name = 'Castillo' + +{% endarchive %} + +{% archive archive_alvarez %} + + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='timestamp', + updated_at='updated_at', + ) + }} + select * from {{database}}.{{schema}}.seed where last_name = 'Alvarez' + +{% endarchive %} + + +{% archive archive_kelly %} + + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='timestamp', + updated_at='updated_at', + ) + }} + select * from {{database}}.{{schema}}.seed where last_name = 'Kelly' + +{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql new file mode 100644 index 00000000000..40a2563291f --- /dev/null +++ b/test/integration/004_simple_archive_test/test-check-col-archives-bq/archive.sql @@ -0,0 +1,27 @@ +{% archive archive_actual %} + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='concat(cast(id as string) , "-", first_name)', + strategy='check', + check_cols=('email',), + ) + }} + select * from `{{database}}`.`{{schema}}`.seed +{% endarchive %} + + +{# This should be exactly the same #} +{% archive archive_checkall %} + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='concat(cast(id as string) , "-", first_name)', + strategy='check', + check_cols='all', + ) + }} + select * from `{{database}}`.`{{schema}}`.seed +{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql new file mode 100644 index 00000000000..c3ee6fe2038 --- /dev/null +++ b/test/integration/004_simple_archive_test/test-check-col-archives/archive.sql @@ -0,0 +1,28 @@ +{% archive archive_actual %} + + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='check', + check_cols=['email'], + ) + }} + select * from {{database}}.{{schema}}.seed + +{% endarchive %} + +{# This should be exactly the same #} +{% archive archive_checkall %} + {{ + config( + target_database=var('target_database', database), + target_schema=schema, + unique_key='id || ' ~ "'-'" ~ ' || first_name', + strategy='check', + check_cols='all', + ) + }} + select * from {{database}}.{{schema}}.seed +{% endarchive %} diff --git a/test/integration/004_simple_archive_test/test_simple_archive.py b/test/integration/004_simple_archive_test/test_simple_archive.py index b2ee1129511..e2b36b53a28 100644 --- a/test/integration/004_simple_archive_test/test_simple_archive.py +++ b/test/integration/004_simple_archive_test/test_simple_archive.py @@ -1,7 +1,9 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile +import dbt.exceptions + class TestSimpleArchive(DBTIntegrationTest): + NUM_ARCHIVE_MODELS = 1 @property def schema(self): @@ -11,6 +13,9 @@ def schema(self): def models(self): return "test/integration/004_simple_archive_test/models" + def run_archive(self): + return self.run_dbt(['archive']) + @property def project_config(self): source_table = 'seed' @@ -28,8 +33,8 @@ def project_config(self): { "source_table": source_table, "target_table": "archive_actual", - "updated_at": '"updated_at"', - "unique_key": '''"id" || '-' || "first_name"''' + "updated_at": 'updated_at', + "unique_key": '''id || '-' || first_name''' }, ], }, @@ -39,63 +44,71 @@ def project_config(self): def dbt_run_seed_archive(self): self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') - results = self.run_dbt(["archive"]) - self.assertEqual(len(results), 1) + results = self.run_archive() + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) + + def assert_case_tables_equal(self, actual, expected): + if self.adapter_type == 'snowflake': + actual = actual.upper() + expected = expected.upper() + + self.assertTablesEqual(actual, expected) + def assert_expected(self): + self.assert_case_tables_equal('archive_actual', 'archive_expected') - @attr(type='postgres') + @use_profile('postgres') def test__postgres__simple_archive(self): self.dbt_run_seed_archive() - self.assertTablesEqual("archive_expected","archive_actual") + self.assert_expected() self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_dbt(["archive"]) - self.assertEqual(len(results), 1) + results = self.run_archive() + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - self.assertTablesEqual("archive_expected","archive_actual") + self.assert_expected() - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__simple_archive(self): self.dbt_run_seed_archive() - self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL") + self.assert_expected() self.run_sql_file("test/integration/004_simple_archive_test/invalidate_snowflake.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_dbt(["archive"]) - self.assertEqual(len(results), 1) + results = self.run_archive() + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL") + self.assert_expected() - @attr(type='redshift') + @use_profile('redshift') def test__redshift__simple_archive(self): self.dbt_run_seed_archive() - self.assertTablesEqual("archive_expected","archive_actual") + self.assert_expected() self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_dbt(["archive"]) - self.assertEqual(len(results), 1) + results = self.run_archive() + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) - self.assertTablesEqual("archive_expected","archive_actual") + self.assert_expected() - @attr(type='presto') + @use_profile('presto') def test__presto__simple_archive_disabled(self): results = self.run_dbt(["seed"]) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) # presto does not run archives results = self.run_dbt(["archive"], expect_pass=False) - self.assertEqual(len(results), 1) + self.assertEqual(len(results), self.NUM_ARCHIVE_MODELS) self.assertIn('not implemented for presto', results[0].error) - class TestSimpleArchiveBigquery(DBTIntegrationTest): @property @@ -125,7 +138,10 @@ def project_config(self): ] } - @attr(type='bigquery') + def assert_expected(self): + self.assertTablesEqual('archive_actual', 'archive_expected') + + @use_profile('bigquery') def test__bigquery__simple_archive(self): self.use_default_project() self.use_profile('bigquery') @@ -134,17 +150,17 @@ def test__bigquery__simple_archive(self): self.run_dbt(["archive"]) - self.assertTablesEqual("archive_expected", "archive_actual") + self.assert_expected() self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") self.run_dbt(["archive"]) - self.assertTablesEqual("archive_expected", "archive_actual") + self.assert_expected() - @attr(type='bigquery') + @use_profile('bigquery') def test__bigquery__archive_with_new_field(self): self.use_default_project() self.use_profile('bigquery') @@ -206,8 +222,8 @@ def archive_project_config(self): return { "source_table": 'SEED', "target_table": "archive_actual", - "updated_at": '"updated_at"', - "unique_key": '''"id" || '-' || "first_name"''' + "updated_at": 'updated_at', + "unique_key": '''id || '-' || first_name''' } else: return { @@ -230,11 +246,14 @@ def project_config(self): ] } - @attr(type='snowflake') + def run_archive(self): + return self.run_dbt(['archive']) + + @use_profile('snowflake') def test__snowflake__cross_archive(self): self.run_sql_file("test/integration/004_simple_archive_test/seed.sql") - results = self.run_dbt(["archive"]) + results = self.run_archive() self.assertEqual(len(results), 1) self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL", table_b_db=self.alternative_database) @@ -242,23 +261,240 @@ def test__snowflake__cross_archive(self): self.run_sql_file("test/integration/004_simple_archive_test/invalidate_snowflake.sql") self.run_sql_file("test/integration/004_simple_archive_test/update.sql") - results = self.run_dbt(["archive"]) + results = self.run_archive() self.assertEqual(len(results), 1) self.assertTablesEqual("ARCHIVE_EXPECTED", "ARCHIVE_ACTUAL", table_b_db=self.alternative_database) - @attr(type='bigquery') + @use_profile('bigquery') def test__bigquery__cross_archive(self): self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") - self.run_dbt(["archive"]) + self.run_archive() self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") - self.run_dbt(["archive"]) + self.run_archive() self.assertTablesEqual("archive_expected", "archive_actual", table_b_db=self.alternative_database) + +class TestSimpleArchiveFiles(TestSimpleArchive): + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_archive_test/data'], + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-pg'], + } + + @use_profile('postgres') + def test__postgres_ref_archive(self): + self.dbt_run_seed_archive() + results = self.run_dbt(['run']) + self.assertEqual(len(results), 1) + + +class TestSimpleArchiveFileSelects(DBTIntegrationTest): + @property + def schema(self): + return "simple_archive_004" + + @property + def models(self): + return "test/integration/004_simple_archive_test/models" + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_archive_test/data'], + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-select', + 'test/integration/004_simple_archive_test/test-archives-pg'], + } + + @use_profile('postgres') + def test__postgres__select_archives(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') + + results = self.run_dbt(['archive']) + self.assertEqual(len(results), 4) + self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') + self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') + self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') + self.assertTablesEqual('archive_actual', 'archive_expected') + + self.run_sql_file("test/integration/004_simple_archive_test/invalidate_postgres.sql") + self.run_sql_file("test/integration/004_simple_archive_test/update.sql") + + results = self.run_dbt(['archive']) + self.assertEqual(len(results), 4) + self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') + self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') + self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') + self.assertTablesEqual('archive_actual', 'archive_expected') + + @use_profile('postgres') + def test__postgres_exclude_archives(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') + results = self.run_dbt(['archive', '--exclude', 'archive_castillo']) + self.assertEqual(len(results), 3) + self.assertTableDoesNotExist('archive_castillo') + self.assertTablesEqual('archive_alvarez', 'archive_alvarez_expected') + self.assertTablesEqual('archive_kelly', 'archive_kelly_expected') + self.assertTablesEqual('archive_actual', 'archive_expected') + + @use_profile('postgres') + def test__postgres_select_archives(self): + self.run_sql_file('test/integration/004_simple_archive_test/seed.sql') + results = self.run_dbt(['archive', '--models', 'archive_castillo']) + self.assertEqual(len(results), 1) + self.assertTablesEqual('archive_castillo', 'archive_castillo_expected') + self.assertTableDoesNotExist('archive_alvarez') + self.assertTableDoesNotExist('archive_kelly') + self.assertTableDoesNotExist('archive_actual') + + +class TestSimpleArchiveFilesBigquery(TestSimpleArchiveBigquery): + @property + def project_config(self): + return { + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-bq'], + } + + +class TestCrossDBArchiveFiles(TestCrossDBArchive): + @property + def project_config(self): + if self.adapter_type == 'snowflake': + paths = ['test/integration/004_simple_archive_test/test-archives-pg'] + else: + paths = ['test/integration/004_simple_archive_test/test-archives-bq'] + return { + 'archive-paths': paths, + } + + def run_archive(self): + return self.run_dbt(['archive', '--vars', '{{"target_database": {}}}'.format(self.alternative_database)]) + + +class TestBadArchive(DBTIntegrationTest): + @property + def schema(self): + return "simple_archive_004" + + @property + def models(self): + return "test/integration/004_simple_archive_test/models" + + @property + def project_config(self): + return { + "archive-paths": ['test/integration/004_simple_archive_test/test-archives-invalid'], + } + + @use_profile('postgres') + def test__postgres__invalid(self): + with self.assertRaises(dbt.exceptions.CompilationException) as exc: + self.run_dbt(['compile'], expect_pass=False) + + self.assertIn('target_database', str(exc.exception)) + + +class TestCheckCols(TestSimpleArchiveFiles): + NUM_ARCHIVE_MODELS = 2 + def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): + # When building the equality tests, only test columns that don't start + # with 'dbt_', because those are time-sensitive + if columns is None: + columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] + return super(TestCheckCols, self)._assertTablesEqualSql( + relation_a, + relation_b, + columns=columns + ) + + def assert_expected(self): + super(TestCheckCols, self).assert_expected() + self.assert_case_tables_equal('archive_checkall', 'archive_expected') + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_archive_test/data'], + "archive-paths": ['test/integration/004_simple_archive_test/test-check-col-archives'], + } + + +class TestCheckColsBigquery(TestSimpleArchiveFilesBigquery): + def _assertTablesEqualSql(self, relation_a, relation_b, columns=None): + # When building the equality tests, only test columns that don't start + # with 'dbt_', because those are time-sensitive + if columns is None: + columns = [c for c in self.get_relation_columns(relation_a) if not c[0].lower().startswith('dbt_')] + return super(TestCheckColsBigquery, self)._assertTablesEqualSql( + relation_a, + relation_b, + columns=columns + ) + + def assert_expected(self): + super(TestCheckColsBigquery, self).assert_expected() + self.assertTablesEqual('archive_checkall', 'archive_expected') + + @property + def project_config(self): + return { + "data-paths": ['test/integration/004_simple_archive_test/data'], + "archive-paths": ['test/integration/004_simple_archive_test/test-check-col-archives-bq'], + } + + @use_profile('bigquery') + def test__bigquery__archive_with_new_field(self): + self.use_default_project() + self.use_profile('bigquery') + + self.run_sql_file("test/integration/004_simple_archive_test/seed_bq.sql") + + self.run_dbt(["archive"]) + + self.assertTablesEqual("archive_expected", "archive_actual") + self.assertTablesEqual("archive_expected", "archive_checkall") + + self.run_sql_file("test/integration/004_simple_archive_test/invalidate_bigquery.sql") + self.run_sql_file("test/integration/004_simple_archive_test/update_bq.sql") + + # This adds new fields to the source table, and updates the expected archive output accordingly + self.run_sql_file("test/integration/004_simple_archive_test/add_column_to_source_bq.sql") + + # this should fail because `check="all"` will try to compare the nested field + self.run_dbt(['archive'], expect_pass=False) + + self.run_dbt(["archive", '-m', 'archive_actual']) + + # A more thorough test would assert that archived == expected, but BigQuery does not support the + # "EXCEPT DISTINCT" operator on nested fields! Instead, just check that schemas are congruent. + + expected_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='archive_expected' + ) + archived_cols = self.get_table_columns( + database=self.default_database, + schema=self.unique_schema(), + table='archive_actual' + ) + + self.assertTrue(len(expected_cols) > 0, "source table does not exist -- bad test") + self.assertEqual(len(expected_cols), len(archived_cols), "actual and expected column lengths are different") + + for (expected_col, actual_col) in zip(expected_cols, archived_cols): + expected_name, expected_type, _ = expected_col + actual_name, actual_type, _ = actual_col + self.assertTrue(expected_name is not None) + self.assertTrue(expected_type is not None) + + self.assertEqual(expected_name, actual_name, "names are different") + self.assertEqual(expected_type, actual_type, "data types are different") diff --git a/test/integration/004_simple_archive_test/update.sql b/test/integration/004_simple_archive_test/update.sql index f353e355875..0959cf9fa3f 100644 --- a/test/integration/004_simple_archive_test/update.sql +++ b/test/integration/004_simple_archive_test/update.sql @@ -1,38 +1,130 @@ -- insert v2 of the 11 - 21 records insert into {database}.{schema}.archive_expected ( - "id", - "first_name", - "last_name", - "email", - "gender", - "ip_address", - "updated_at", - "valid_from", - "valid_to", - "dbt_updated_at", - "scd_id" + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id ) select - "id", - "first_name", - "last_name", - "email", - "gender", - "ip_address", - "updated_at", + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, -- fields added by archival - "updated_at" as "valid_from", - null::timestamp as "valid_to", - "updated_at" as "dbt_updated_at", - md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as "scd_id" + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed -where "id" >= 10 and "id" <= 20; +where id >= 10 and id <= 20; +insert into {database}.{schema}.archive_castillo_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed +where id >= 10 and id <= 20 and last_name = 'Castillo'; + + +insert into {database}.{schema}.archive_alvarez_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed +where id >= 10 and id <= 20 and last_name = 'Alvarez'; + + +insert into {database}.{schema}.archive_kelly_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed +where id >= 10 and id <= 20 and last_name = 'Kelly'; + -- insert 10 new records -insert into {database}.{schema}.seed ("id", "first_name", "last_name", "email", "gender", "ip_address", "updated_at") values +insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values (21, 'Judy', 'Robinson', 'jrobinsonk@blogs.com', 'Female', '208.21.192.232', '2016-09-18 08:27:38'), (22, 'Kevin', 'Alvarez', 'kalvarezl@buzzfeed.com', 'Male', '228.106.146.9', '2016-07-29 03:07:37'), (23, 'Barbara', 'Carr', 'bcarrm@pen.io', 'Female', '106.165.140.17', '2015-09-24 13:27:23'), @@ -47,31 +139,123 @@ insert into {database}.{schema}.seed ("id", "first_name", "last_name", "email", -- add these new records to the archive table insert into {database}.{schema}.archive_expected ( - "id", - "first_name", - "last_name", - "email", - "gender", - "ip_address", - "updated_at", - "valid_from", - "valid_to", - "dbt_updated_at", - "scd_id" + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed +where id > 20; + + +-- add these new records to the archive table +insert into {database}.{schema}.archive_castillo_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed +where id > 20 and last_name = 'Castillo'; + +insert into {database}.{schema}.archive_alvarez_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id +) + +select + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + -- fields added by archival + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id +from {database}.{schema}.seed +where id > 20 and last_name = 'Alvarez'; + +insert into {database}.{schema}.archive_kelly_expected ( + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id ) select - "id", - "first_name", - "last_name", - "email", - "gender", - "ip_address", - "updated_at", + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, -- fields added by archival - "updated_at" as "valid_from", - null::timestamp as "valid_to", - "updated_at" as "dbt_updated_at", - md5("id" || '-' || "first_name" || '|' || "updated_at"::text) as "scd_id" + updated_at as dbt_valid_from, + null::timestamp as dbt_valid_to, + updated_at as dbt_updated_at, + md5(id || '-' || first_name || '|' || updated_at::text) as dbt_scd_id from {database}.{schema}.seed -where "id" > 20; +where id > 20 and last_name = 'Kelly'; diff --git a/test/integration/004_simple_archive_test/update_bq.sql b/test/integration/004_simple_archive_test/update_bq.sql index 6ce7835ddd8..aa56fb839a9 100644 --- a/test/integration/004_simple_archive_test/update_bq.sql +++ b/test/integration/004_simple_archive_test/update_bq.sql @@ -1,38 +1,38 @@ -- insert v2 of the 11 - 21 records insert {database}.{schema}.archive_expected ( - `id`, - `first_name`, - `last_name`, - `email`, - `gender`, - `ip_address`, - `updated_at`, - `valid_from`, - `valid_to`, - `dbt_updated_at`, - `scd_id` + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id ) select - `id`, - `first_name`, - `last_name`, - `email`, - `gender`, - `ip_address`, - `updated_at`, + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, -- fields added by archival - `updated_at` as `valid_from`, - cast(null as timestamp) as `valid_to`, - `updated_at` as `dbt_updated_at`, - to_hex(md5(concat(cast(`id` as string), '-', `first_name`, '|', cast(`updated_at` as string)))) as `scd_id` + updated_at as dbt_valid_from, + cast(null as timestamp) as dbt_valid_to, + updated_at as dbt_updated_at, + to_hex(md5(concat(cast(id as string), '-', first_name, '|', cast(updated_at as string)))) as dbt_scd_id from {database}.{schema}.seed -where `id` >= 10 and `id` <= 20; +where id >= 10 and id <= 20; -- insert 10 new records -insert into {database}.{schema}.seed (`id`, `first_name`, `last_name`, `email`, `gender`, `ip_address`, `updated_at`) values +insert into {database}.{schema}.seed (id, first_name, last_name, email, gender, ip_address, updated_at) values (21, 'Judy', 'Robinson', 'jrobinsonk@blogs.com', 'Female', '208.21.192.232', '2016-09-18 08:27:38'), (22, 'Kevin', 'Alvarez', 'kalvarezl@buzzfeed.com', 'Male', '228.106.146.9', '2016-07-29 03:07:37'), (23, 'Barbara', 'Carr', 'bcarrm@pen.io', 'Female', '106.165.140.17', '2015-09-24 13:27:23'), @@ -47,32 +47,32 @@ insert into {database}.{schema}.seed (`id`, `first_name`, `last_name`, `email`, -- add these new records to the archive table insert {database}.{schema}.archive_expected ( - `id`, - `first_name`, - `last_name`, - `email`, - `gender`, - `ip_address`, - `updated_at`, - `valid_from`, - `valid_to`, - `dbt_updated_at`, - `scd_id` + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, + dbt_valid_from, + dbt_valid_to, + dbt_updated_at, + dbt_scd_id ) select - `id`, - `first_name`, - `last_name`, - `email`, - `gender`, - `ip_address`, - `updated_at`, + id, + first_name, + last_name, + email, + gender, + ip_address, + updated_at, -- fields added by archival - `updated_at` as `valid_from`, - cast(null as timestamp) as `valid_to`, - `updated_at` as `dbt_updated_at`, - to_hex(md5(concat(cast(`id` as string), '-', `first_name`, '|', cast(`updated_at` as string)))) as `scd_id` + updated_at as dbt_valid_from, + cast(null as timestamp) as dbt_valid_to, + updated_at as dbt_updated_at, + to_hex(md5(concat(cast(id as string), '-', first_name, '|', cast(updated_at as string)))) as dbt_scd_id from {database}.{schema}.seed -where `id` > 20; +where id > 20; diff --git a/test/integration/005_simple_seed_test/test_seed_type_override.py b/test/integration/005_simple_seed_test/test_seed_type_override.py index ac585de6453..e565d4b3bc2 100644 --- a/test/integration/005_simple_seed_test/test_seed_type_override.py +++ b/test/integration/005_simple_seed_test/test_seed_type_override.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestSimpleSeedColumnOverride(DBTIntegrationTest): @@ -40,7 +39,7 @@ def seed_types(self): "birthday": "date", } - @attr(type='postgres') + @use_profile('postgres') def test_simple_seed_with_column_override_postgres(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -63,7 +62,7 @@ def seed_types(self): def profile_config(self): return self.snowflake_profile() - @attr(type='snowflake') + @use_profile('snowflake') def test_simple_seed_with_column_override_snowflake(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -86,7 +85,7 @@ def seed_types(self): def profile_config(self): return self.bigquery_profile() - @attr(type='bigquery') + @use_profile('bigquery') def test_simple_seed_with_column_override_bigquery(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) diff --git a/test/integration/005_simple_seed_test/test_simple_seed.py b/test/integration/005_simple_seed_test/test_simple_seed.py index af3f4b01dfd..94f6fa86dc2 100644 --- a/test/integration/005_simple_seed_test/test_simple_seed.py +++ b/test/integration/005_simple_seed_test/test_simple_seed.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile from dbt.exceptions import CompilationException @@ -24,7 +23,7 @@ def project_config(self): "data-paths": ['test/integration/005_simple_seed_test/data'] } - @attr(type='postgres') + @use_profile('postgres') def test_simple_seed(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -37,7 +36,7 @@ def test_simple_seed(self): self.assertTablesEqual("seed_actual","seed_expected") - @attr(type='postgres') + @use_profile('postgres') def test_simple_seed_with_drop(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -72,7 +71,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test_simple_seed_with_schema(self): schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema') @@ -86,7 +85,7 @@ def test_simple_seed_with_schema(self): self.assertTablesEqual("seed_actual","seed_expected", table_a_schema=schema_name) - @attr(type='postgres') + @use_profile('postgres') def test_simple_seed_with_drop_and_schema(self): schema_name = "{}_{}".format(self.unique_schema(), 'custom_schema') @@ -126,7 +125,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test_simple_seed_with_disabled(self): results = self.run_dbt(["seed"]) self.assertEqual(len(results), 1) @@ -153,7 +152,7 @@ def project_config(self): "data-paths": ['test/integration/005_simple_seed_test/data-bad'] } - @attr(type='postgres') + @use_profile('postgres') def test_postgres_dbt_run_skips_seeds(self): # run does not try to parse the seed files self.assertEqual(len(self.run_dbt(['run'])), 1) diff --git a/test/integration/006_simple_dependency_test/test_local_dependency.py b/test/integration/006_simple_dependency_test/test_local_dependency.py index 17c4c50e16f..abc22e5e2c4 100644 --- a/test/integration/006_simple_dependency_test/test_local_dependency.py +++ b/test/integration/006_simple_dependency_test/test_local_dependency.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import mock import dbt.semver @@ -32,7 +31,7 @@ def base_schema(self): def configured_schema(self): return self.unique_schema() + '_configured' - @attr(type='postgres') + @use_profile('postgres') def test_postgres_local_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -62,16 +61,16 @@ def base_schema(self): def configured_schema(self): return 'configured_{}_macro'.format(self.unique_schema()) - @attr(type='postgres') + @use_profile('postgres') @mock.patch('dbt.config.project.get_installed_version') def test_postgres_local_dependency_out_of_date(self, mock_get): mock_get.return_value = dbt.semver.VersionSpecifier.from_version_string('0.0.1') self.run_dbt(['deps']) - with self.assertRaises(dbt.exceptions.DbtProjectError) as e: + with self.assertRaises(dbt.exceptions.DbtProjectError) as exc: self.run_dbt(['run']) - self.assertIn('--no-version-check', str(e.exception)) + self.assertIn('--no-version-check', str(exc.exception)) - @attr(type='postgres') + @use_profile('postgres') @mock.patch('dbt.config.project.get_installed_version') def test_postgres_local_dependency_out_of_date_no_check(self, mock_get): mock_get.return_value = dbt.semver.VersionSpecifier.from_version_string('0.0.1') diff --git a/test/integration/006_simple_dependency_test/test_simple_dependency.py b/test/integration/006_simple_dependency_test/test_simple_dependency.py index e20b4b2b4eb..1eac417cab8 100644 --- a/test/integration/006_simple_dependency_test/test_simple_dependency.py +++ b/test/integration/006_simple_dependency_test/test_simple_dependency.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestSimpleDependency(DBTIntegrationTest): @@ -25,7 +24,7 @@ def packages_config(self): ] } - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -47,7 +46,7 @@ def test_simple_dependency(self): self.assertTablesEqual("seed","view_model") self.assertTablesEqual("seed","incremental") - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency_with_models(self): self.run_dbt(["deps"]) results = self.run_dbt(["run", '--models', 'view_model+']) @@ -105,7 +104,7 @@ def deps_run_assert_equality(self): self.assertEqual(created_models['view_summary'], 'view') self.assertEqual(created_models['incremental'], 'table') - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency(self): self.deps_run_assert_equality() @@ -115,7 +114,7 @@ def test_simple_dependency(self): self.deps_run_assert_equality() - @attr(type='postgres') + @use_profile('postgres') def test_empty_models_not_compiled_in_dependencies(self): self.deps_run_assert_equality() diff --git a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py index ae96afd7f41..aa4877ef546 100644 --- a/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py +++ b/test/integration/006_simple_dependency_test/test_simple_dependency_with_configs.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class BaseTestSimpleDependencyWithConfigs(DBTIntegrationTest): @@ -40,7 +39,7 @@ def project_config(self): }, } - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -83,7 +82,7 @@ def project_config(self): } - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -127,7 +126,7 @@ def project_config(self): } - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency(self): self.use_default_project() @@ -183,7 +182,7 @@ def project_config(self): } - @attr(type='postgres') + @use_profile('postgres') def test_simple_dependency(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) diff --git a/test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql b/test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql new file mode 100644 index 00000000000..f539772cbb2 --- /dev/null +++ b/test/integration/007_graph_selection_tests/models/users_rollup_dependency.sql @@ -0,0 +1,5 @@ +{{ + config(materialized='table') +}} + +select * from {{ ref('users_rollup') }} diff --git a/test/integration/007_graph_selection_tests/test_graph_selection.py b/test/integration/007_graph_selection_tests/test_graph_selection.py index 7f6bfa87d73..5c830e360d5 100644 --- a/test/integration/007_graph_selection_tests/test_graph_selection.py +++ b/test/integration/007_graph_selection_tests/test_graph_selection.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestGraphSelection(DBTIntegrationTest): @@ -12,22 +11,21 @@ def models(self): return "test/integration/007_graph_selection_tests/models" def assert_correct_schemas(self): - exists = self.adapter.check_schema_exists( - self.default_database, - self.unique_schema(), - '__test' - ) - self.assertTrue(exists) - - schema = self.unique_schema()+'_and_then' - exists = self.adapter.check_schema_exists( - self.default_database, - schema, - '__test' - ) - self.assertFalse(exists) - - @attr(type='postgres') + with self.test_connection(): + exists = self.adapter.check_schema_exists( + self.default_database, + self.unique_schema() + ) + self.assertTrue(exists) + + schema = self.unique_schema()+'_and_then' + exists = self.adapter.check_schema_exists( + self.default_database, + schema + ) + self.assertFalse(exists) + + @use_profile('postgres') def test__postgres__specific_model(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -41,7 +39,7 @@ def test__postgres__specific_model(self): self.assertFalse('emails' in created_models) self.assert_correct_schemas() - @attr(type='postgres') + @use_profile('postgres') def test__postgres__tags(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -55,12 +53,12 @@ def test__postgres__tags(self): self.assertTrue('users_rollup' in created_models) self.assert_correct_schemas() - @attr(type='postgres') + @use_profile('postgres') def test__postgres__tags_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', 'tag:base+']) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 4) created_models = self.get_models_in_schema() self.assertFalse('base_users' in created_models) @@ -70,7 +68,7 @@ def test__postgres__tags_and_children(self): self.assertTrue('users' in created_models) self.assert_correct_schemas() - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__specific_model(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -84,12 +82,12 @@ def test__snowflake__specific_model(self): self.assertFalse('EMAILS' in created_models) self.assert_correct_schemas() - @attr(type='postgres') + @use_profile('postgres') def test__postgres__specific_model_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', 'users+']) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 4) self.assertTablesEqual("seed", "users") self.assertTablesEqual("summary_expected", "users_rollup") @@ -99,12 +97,12 @@ def test__postgres__specific_model_and_children(self): self.assertNotIn('emails', created_models) self.assert_correct_schemas() - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__specific_model_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', 'users+']) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 4) self.assertManyTablesEqual( ["SEED", "USERS"], @@ -115,7 +113,7 @@ def test__snowflake__specific_model_and_children(self): self.assertFalse('EMAILS' in created_models) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__specific_model_and_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -129,7 +127,7 @@ def test__postgres__specific_model_and_parents(self): self.assertFalse('emails' in created_models) self.assert_correct_schemas() - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__specific_model_and_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -146,7 +144,7 @@ def test__snowflake__specific_model_and_parents(self): self.assertFalse('EMAILS' in created_models) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__specific_model_with_exclusion(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -162,7 +160,7 @@ def test__postgres__specific_model_with_exclusion(self): self.assertFalse('emails' in created_models) self.assert_correct_schemas() - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__specific_model_with_exclusion(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") @@ -177,7 +175,7 @@ def test__snowflake__specific_model_with_exclusion(self): self.assertFalse('USERS_ROLLUP' in created_models) self.assertFalse('EMAILS' in created_models) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__locally_qualified_name(self): results = self.run_dbt(['run', '--models', 'test.subdir']) self.assertEqual(len(results), 2) @@ -190,11 +188,11 @@ def test__postgres__locally_qualified_name(self): self.assertIn('nested_users', created_models) self.assert_correct_schemas() - @attr(type='postgres') + @use_profile('postgres') def test__postgres__childrens_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '@base_users']) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 4) created_models = self.get_models_in_schema() self.assertIn('users_rollup', created_models) @@ -203,12 +201,12 @@ def test__postgres__childrens_parents(self): self.assertNotIn('subdir', created_models) self.assertNotIn('nested_users', created_models) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__more_childrens_parents(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '@users']) - # base_users, emails, users_rollup, but not users (ephemeral) - self.assertEqual(len(results), 3) + # base_users, emails, users_rollup, users_rollup_dependency, but not users (ephemeral) + self.assertEqual(len(results), 4) created_models = self.get_models_in_schema() self.assertIn('users_rollup', created_models) @@ -216,3 +214,24 @@ def test__postgres__more_childrens_parents(self): self.assertIn('emails_alt', created_models) self.assertNotIn('subdir', created_models) self.assertNotIn('nested_users', created_models) + + @use_profile('snowflake') + def test__snowflake__skip_intermediate(self): + self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") + results = self.run_dbt(['run', '--models', '@users']) + # base_users, emails, users_rollup, users_rollup_dependency + self.assertEqual(len(results), 4) + + # now re-run, skipping users_rollup + results = self.run_dbt(['run', '--models', '@users', '--exclude', 'users_rollup']) + self.assertEqual(len(results), 3) + + # make sure that users_rollup_dependency and users don't interleave + users = [r for r in results if r.node.name == 'users'][0] + dep = [r for r in results if r.node.name == 'users_rollup_dependency'][0] + user_last_end = users.timing[1]['completed_at'] + dep_first_start = dep.timing[0]['started_at'] + self.assertTrue( + user_last_end < dep_first_start, + 'dependency started before its transitive parent ({} > {})'.format(user_last_end, dep_first_start) + ) diff --git a/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py b/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py index 3068f855f2f..bc5971bd6de 100644 --- a/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py +++ b/test/integration/007_graph_selection_tests/test_schema_test_graph_selection.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest, FakeArgs +from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile from dbt.task.test import TestTask @@ -26,7 +25,7 @@ def run_schema_and_assert(self, include, exclude, expected_tests): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") self.run_dbt(["deps"]) results = self.run_dbt(['run', '--exclude', 'never_selected']) - self.assertEqual(len(results), 8) + self.assertEqual(len(results), 9) args = FakeArgs() args.models = include @@ -40,7 +39,7 @@ def run_schema_and_assert(self, include, exclude, expected_tests): self.assertEqual(ran_tests, expected_sorted) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_no_specifiers(self): self.run_schema_and_assert( None, @@ -51,7 +50,7 @@ def test__postgres__schema_tests_no_specifiers(self): 'unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_model(self): self.run_schema_and_assert( ['users'], @@ -59,7 +58,7 @@ def test__postgres__schema_tests_specify_model(self): ['unique_users_id'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_tag(self): self.run_schema_and_assert( ['tag:bi'], @@ -68,7 +67,7 @@ def test__postgres__schema_tests_specify_tag(self): 'unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_model_and_children(self): self.run_schema_and_assert( ['users+'], @@ -76,7 +75,7 @@ def test__postgres__schema_tests_specify_model_and_children(self): ['unique_users_id', 'unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_tag_and_children(self): self.run_schema_and_assert( ['tag:base+'], @@ -86,7 +85,7 @@ def test__postgres__schema_tests_specify_tag_and_children(self): 'unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_model_and_parents(self): self.run_schema_and_assert( ['+users_rollup'], @@ -94,7 +93,7 @@ def test__postgres__schema_tests_specify_model_and_parents(self): ['unique_users_id', 'unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_model_and_parents_with_exclude(self): self.run_schema_and_assert( ['+users_rollup'], @@ -102,7 +101,7 @@ def test__postgres__schema_tests_specify_model_and_parents_with_exclude(self): ['unique_users_id'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_exclude_only(self): self.run_schema_and_assert( None, @@ -110,7 +109,7 @@ def test__postgres__schema_tests_specify_exclude_only(self): ['unique_emails_email', 'unique_table_model_id', 'unique_users_id'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_specify_model_in_pkg(self): self.run_schema_and_assert( ['test.users_rollup'], @@ -120,7 +119,7 @@ def test__postgres__schema_tests_specify_model_in_pkg(self): ['unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_with_glob(self): self.run_schema_and_assert( ['*'], @@ -128,7 +127,7 @@ def test__postgres__schema_tests_with_glob(self): ['unique_emails_email', 'unique_table_model_id', 'unique_users_rollup_gender'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_dep_package_only(self): self.run_schema_and_assert( ['dbt_integration_project'], @@ -136,7 +135,7 @@ def test__postgres__schema_tests_dep_package_only(self): ['unique_table_model_id'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_model_in_dep_pkg(self): self.run_schema_and_assert( ['dbt_integration_project.table_model'], @@ -144,7 +143,7 @@ def test__postgres__schema_tests_model_in_dep_pkg(self): ['unique_table_model_id'] ) - @attr(type='postgres') + @use_profile('postgres') def test__postgres__schema_tests_exclude_pkg(self): self.run_schema_and_assert( None, diff --git a/test/integration/007_graph_selection_tests/test_tag_selection.py b/test/integration/007_graph_selection_tests/test_tag_selection.py index 1eccf7252ed..bd15dff6340 100644 --- a/test/integration/007_graph_selection_tests/test_tag_selection.py +++ b/test/integration/007_graph_selection_tests/test_tag_selection.py @@ -42,7 +42,7 @@ def test__postgres__select_tag_and_children(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '+tag:specified_in_project+']) - self.assertEqual(len(results), 2) + self.assertEqual(len(results), 3) models_run = [r.node['name'] for r in results] self.assertTrue('users' in models_run) @@ -69,8 +69,10 @@ def test__postgres__select_tag_in_model_with_project_Config(self): self.run_sql_file("test/integration/007_graph_selection_tests/seed.sql") results = self.run_dbt(['run', '--models', '@tag:users']) - self.assertEqual(len(results), 3) + self.assertEqual(len(results), 4) models_run = set(r.node['name'] for r in results) - self.assertEqual({'users', 'users_rollup', 'emails_alt'}, models_run) - + self.assertEqual( + {'users', 'users_rollup', 'emails_alt', 'users_rollup_dependency'}, + models_run + ) diff --git a/test/integration/008_schema_tests_test/ephemeral/ephemeral.sql b/test/integration/008_schema_tests_test/ephemeral/ephemeral.sql new file mode 100644 index 00000000000..c8e21355594 --- /dev/null +++ b/test/integration/008_schema_tests_test/ephemeral/ephemeral.sql @@ -0,0 +1,4 @@ + +{{ config(materialized='ephemeral') }} + +select 1 as id diff --git a/test/integration/008_schema_tests_test/ephemeral/schema.yml b/test/integration/008_schema_tests_test/ephemeral/schema.yml new file mode 100644 index 00000000000..b394a95c221 --- /dev/null +++ b/test/integration/008_schema_tests_test/ephemeral/schema.yml @@ -0,0 +1,8 @@ + +version: 2 +models: + - name: ephemeral + columns: + - name: id + tests: + - unique diff --git a/test/integration/008_schema_tests_test/test_schema_v2_tests.py b/test/integration/008_schema_tests_test/test_schema_v2_tests.py index 9df0bf2b995..69836e42c44 100644 --- a/test/integration/008_schema_tests_test/test_schema_v2_tests.py +++ b/test/integration/008_schema_tests_test/test_schema_v2_tests.py @@ -1,4 +1,3 @@ -from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile import os @@ -27,7 +26,7 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @attr(type='postgres') + @use_profile('postgres') def test_schema_tests(self): results = self.run_dbt() self.assertEqual(len(results), 5) @@ -77,7 +76,7 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @attr(type='postgres') + @use_profile('postgres') def test_malformed_schema_test_wont_brick_run(self): # dbt run should work (Despite broken schema test) results = self.run_dbt(strict=False) @@ -88,12 +87,44 @@ def test_malformed_schema_test_wont_brick_run(self): self.assertEqual(len(ran_tests), 5) self.assertEqual(sum(x.status for x in ran_tests), 0) - @attr(type='postgres') + @use_profile('postgres') def test_malformed_schema_strict_will_break_run(self): with self.assertRaises(CompilationException): self.run_dbt(strict=True) +class TestHooksInTests(DBTIntegrationTest): + + @property + def schema(self): + return "schema_tests_008" + + @property + def models(self): + # test ephemeral models so we don't need to do a run (which would fail) + return "test/integration/008_schema_tests_test/ephemeral" + + @property + def project_config(self): + return { + "on-run-start": ["{{ exceptions.raise_compiler_error('hooks called in tests -- error') if execute }}"], + "on-run-end": ["{{ exceptions.raise_compiler_error('hooks called in tests -- error') if execute }}"], + } + + @use_profile('postgres') + def test_hooks_dont_run_for_tests(self): + # This would fail if the hooks ran + results = self.run_dbt(['test', '--model', 'ephemeral']) + self.assertEqual(len(results), 1) + for result in results: + self.assertIsNone(result.error) + self.assertFalse(result.skipped) + # status = # of failing rows + self.assertEqual( + result.status, 0, + 'test {} failed'.format(result.node.get('name')) + ) + class TestCustomSchemaTests(DBTIntegrationTest): def setUp(self): @@ -137,7 +168,7 @@ def run_schema_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @attr(type='postgres') + @use_profile('postgres') def test_schema_tests(self): self.run_dbt(["deps"]) results = self.run_dbt() diff --git a/test/integration/009_data_tests_test/test_data_tests.py b/test/integration/009_data_tests_test/test_data_tests.py index 7cc5ba6f63c..b7b934a5ba4 100644 --- a/test/integration/009_data_tests_test/test_data_tests.py +++ b/test/integration/009_data_tests_test/test_data_tests.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest, FakeArgs +from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile from dbt.task.test import TestTask import os @@ -30,7 +29,7 @@ def run_data_validations(self): test_task = TestTask(args, self.config) return test_task.run() - @attr(type='postgres') + @use_profile('postgres') def test_postgres_data_tests(self): self.use_profile('postgres') @@ -59,7 +58,7 @@ def test_postgres_data_tests(self): self.assertNotEqual(len(test_results), 0) self.assertEqual(len(test_results), len(defined_tests)) - @attr(type='snowflake') + @use_profile('snowflake') def test_snowflake_data_tests(self): self.use_profile('snowflake') diff --git a/test/integration/010_permission_tests/test_permissions.py b/test/integration/010_permission_tests/test_permissions.py index 0602a8c40fb..d348569ca14 100644 --- a/test/integration/010_permission_tests/test_permissions.py +++ b/test/integration/010_permission_tests/test_permissions.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestPermissions(DBTIntegrationTest): @@ -15,7 +14,7 @@ def schema(self): def models(self): return "test/integration/010_permission_tests/models" - @attr(type='postgres') + @use_profile('postgres') def test_no_create_schema_permissions(self): # the noaccess user does not have permissions to create a schema -- this should fail failed = False @@ -27,7 +26,7 @@ def test_no_create_schema_permissions(self): self.assertTrue(failed) - @attr(type='postgres') + @use_profile('postgres') def test_create_schema_permissions(self): # now it should work! self.run_sql('grant create on database {} to noaccess'.format(self.default_database)) diff --git a/test/integration/011_invalid_model_tests/test_invalid_models.py b/test/integration/011_invalid_model_tests/test_invalid_models.py index ae2ee46c0eb..bbb9850d2df 100644 --- a/test/integration/011_invalid_model_tests/test_invalid_models.py +++ b/test/integration/011_invalid_model_tests/test_invalid_models.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile from dbt.exceptions import ValidationException @@ -19,7 +18,7 @@ def schema(self): def models(self): return "test/integration/011_invalid_model_tests/models-2" - @attr(type='postgres') + @use_profile('postgres') def test_view_with_incremental_attributes(self): try: @@ -45,7 +44,7 @@ def schema(self): def models(self): return "test/integration/011_invalid_model_tests/models-3" - @attr(type='postgres') + @use_profile('postgres') def test_view_with_incremental_attributes(self): try: diff --git a/test/integration/012_deprecation_tests/models/sql_where.sql b/test/integration/012_deprecation_tests/models/sql_where.sql deleted file mode 100644 index 34ca3c36464..00000000000 --- a/test/integration/012_deprecation_tests/models/sql_where.sql +++ /dev/null @@ -1,3 +0,0 @@ -{{ config(sql_where='id > (select max(id) from {{this}})')}} - -select 1 as id diff --git a/test/integration/012_deprecation_tests/test_deprecations.py b/test/integration/012_deprecation_tests/test_deprecations.py index 2642d9acefb..9f9e3544019 100644 --- a/test/integration/012_deprecation_tests/test_deprecations.py +++ b/test/integration/012_deprecation_tests/test_deprecations.py @@ -29,6 +29,6 @@ def test_postgres_deprecations_fail(self): @use_profile('postgres') def test_postgres_deprecations(self): self.assertEqual(deprecations.active_deprecations, set()) - results = self.run_dbt(strict=False) - self.assertEqual({'adapter:already_exists', 'sql_where'}, + self.run_dbt(strict=False) + self.assertEqual({'adapter:already_exists'}, deprecations.active_deprecations) diff --git a/test/integration/013_context_var_tests/test_context_vars.py b/test/integration/013_context_var_tests/test_context_vars.py index 7c93df337ed..4fd789c198d 100644 --- a/test/integration/013_context_var_tests/test_context_vars.py +++ b/test/integration/013_context_var_tests/test_context_vars.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import os @@ -50,7 +49,7 @@ def profile_config(self): 'dev': { 'type': 'postgres', 'threads': 1, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': "root", 'pass': "password", @@ -60,7 +59,7 @@ def profile_config(self): 'prod': { 'type': 'postgres', 'threads': 1, - 'host': 'database', + 'host': self.database_host, 'port': 5432, # root/password 'user': "{{ env_var('DBT_TEST_013_USER') }}", @@ -84,7 +83,7 @@ def get_ctx_vars(self): return ctx - @attr(type='postgres') + @use_profile('postgres') def test_env_vars_dev(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) @@ -99,7 +98,7 @@ def test_env_vars_dev(self): self.assertEqual(ctx['this.table'], 'context') self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], 'database') + self.assertEqual(ctx['target.host'], self.database_host) self.assertEqual(ctx['target.name'], 'dev') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) @@ -110,7 +109,7 @@ def test_env_vars_dev(self): self.assertEqual(ctx['env_var'], '1') - @attr(type='postgres') + @use_profile('postgres') def test_env_vars_prod(self): results = self.run_dbt(['run', '--target', 'prod']) self.assertEqual(len(results), 1) @@ -125,7 +124,7 @@ def test_env_vars_prod(self): self.assertEqual(ctx['this.table'], 'context') self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], 'database') + self.assertEqual(ctx['target.host'], self.database_host) self.assertEqual(ctx['target.name'], 'prod') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) diff --git a/test/integration/014_hook_tests/test_model_hooks.py b/test/integration/014_hook_tests/test_model_hooks.py index 0233b6155fb..c9f01756269 100644 --- a/test/integration/014_hook_tests/test_model_hooks.py +++ b/test/integration/014_hook_tests/test_model_hooks.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile from dbt.exceptions import CompilationException @@ -103,7 +102,7 @@ def check_hooks(self, state, count=1): for ctx in ctxs: self.assertEqual(ctx['state'], state) self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], 'database') + self.assertEqual(ctx['target.host'], self.database_host) self.assertEqual(ctx['target.name'], 'default2') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) @@ -146,7 +145,7 @@ def project_config(self): def models(self): return "test/integration/014_hook_tests/models" - @attr(type='postgres') + @use_profile('postgres') def test_postgres_pre_and_post_model_hooks(self): self.run_dbt(['run']) @@ -176,7 +175,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test_postgres_hooks_on_seeds(self): res = self.run_dbt(['seed']) self.assertEqual(len(res), 1, 'Expected exactly one item') @@ -195,14 +194,14 @@ def project_config(self): def models(self): return "test/integration/014_hook_tests/configured-models" - @attr(type='postgres') + @use_profile('postgres') def test_postgres_pre_and_post_model_hooks_model(self): self.run_dbt(['run']) self.check_hooks('start') self.check_hooks('end') - @attr(type='postgres') + @use_profile('postgres') def test_postgres_pre_and_post_model_hooks_model_and_project(self): self.use_default_project({ 'models': { @@ -247,10 +246,10 @@ def schema(self): def models(self): return "test/integration/014_hook_tests/error-models" - @attr(type='postgres') + @use_profile('postgres') def test_postgres_run_duplicate_hook_defs(self): with self.assertRaises(CompilationException) as exc: self.run_dbt(['run']) - self.assertIn('pre_hook', str(exc.exception)) - self.assertIn('pre-hook', str(exc.exception)) + self.assertIn('pre_hook', str(exc.exception)) + self.assertIn('pre-hook', str(exc.exception)) diff --git a/test/integration/014_hook_tests/test_model_hooks_bq.py b/test/integration/014_hook_tests/test_model_hooks_bq.py index d9c81b212f5..95c55d9cd6d 100644 --- a/test/integration/014_hook_tests/test_model_hooks_bq.py +++ b/test/integration/014_hook_tests/test_model_hooks_bq.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile MODEL_PRE_HOOK = """ insert into {{this.schema}}.on_model_hook ( @@ -106,7 +105,7 @@ def check_hooks(self, state): self.assertTrue(ctx['run_started_at'] is not None and len(ctx['run_started_at']) > 0, 'run_started_at was not set') self.assertTrue(ctx['invocation_id'] is not None and len(ctx['invocation_id']) > 0, 'invocation_id was not set') - @attr(type='bigquery') + @use_profile('bigquery') def test_pre_and_post_model_hooks_bigquery(self): self.run_dbt(['run']) @@ -135,7 +134,7 @@ def project_config(self): } } - @attr(type='bigquery') + @use_profile('bigquery') def test_hooks_on_seeds_bigquery(self): res = self.run_dbt(['seed']) self.assertEqual(len(res), 1, 'Expected exactly one item') diff --git a/test/integration/014_hook_tests/test_run_hooks.py b/test/integration/014_hook_tests/test_run_hooks.py index a92790c8473..e3af9babdd8 100644 --- a/test/integration/014_hook_tests/test_run_hooks.py +++ b/test/integration/014_hook_tests/test_run_hooks.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestPrePostRunHooks(DBTIntegrationTest): @@ -76,7 +75,7 @@ def check_hooks(self, state): self.assertEqual(ctx['state'], state) self.assertEqual(ctx['target.dbname'], 'dbt') - self.assertEqual(ctx['target.host'], 'database') + self.assertEqual(ctx['target.host'], self.database_host) self.assertEqual(ctx['target.name'], 'default2') self.assertEqual(ctx['target.port'], 5432) self.assertEqual(ctx['target.schema'], self.unique_schema()) @@ -88,7 +87,7 @@ def check_hooks(self, state): self.assertTrue(ctx['run_started_at'] is not None and len(ctx['run_started_at']) > 0, 'run_started_at was not set') self.assertTrue(ctx['invocation_id'] is not None and len(ctx['invocation_id']) > 0, 'invocation_id was not set') - @attr(type='postgres') + @use_profile('postgres') def test__postgres__pre_and_post_run_hooks(self): self.run_dbt(['run']) @@ -99,7 +98,7 @@ def test__postgres__pre_and_post_run_hooks(self): self.assertTableDoesNotExist("end_hook_order_test") self.assert_used_schemas() - @attr(type='postgres') + @use_profile('postgres') def test__postgres__pre_and_post_seed_hooks(self): self.run_dbt(['seed']) diff --git a/test/integration/014_hook_tests/test_run_hooks_bq.py b/test/integration/014_hook_tests/test_run_hooks_bq.py index 5dc05e57faa..46d5829d1f2 100644 --- a/test/integration/014_hook_tests/test_run_hooks_bq.py +++ b/test/integration/014_hook_tests/test_run_hooks_bq.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestBigqueryPrePostRunHooks(DBTIntegrationTest): @@ -78,7 +77,7 @@ def check_hooks(self, state): self.assertTrue(ctx['run_started_at'] is not None and len(ctx['run_started_at']) > 0, 'run_started_at was not set') self.assertTrue(ctx['invocation_id'] is not None and len(ctx['invocation_id']) > 0, 'invocation_id was not set') - @attr(type='bigquery') + @use_profile('bigquery') def test_bigquery_pre_and_post_run_hooks(self): self.run_dbt(['run']) @@ -88,7 +87,7 @@ def test_bigquery_pre_and_post_run_hooks(self): self.assertTableDoesNotExist("start_hook_order_test") self.assertTableDoesNotExist("end_hook_order_test") - @attr(type='bigquery') + @use_profile('bigquery') def test_bigquery_pre_and_post_seed_hooks(self): self.run_dbt(['seed']) diff --git a/test/integration/015_cli_invocation_tests/test_cli_invocation.py b/test/integration/015_cli_invocation_tests/test_cli_invocation.py index 8a43eba8794..f670a298be7 100644 --- a/test/integration/015_cli_invocation_tests/test_cli_invocation.py +++ b/test/integration/015_cli_invocation_tests/test_cli_invocation.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest, DBT_PROFILES +from test.integration.base import DBTIntegrationTest, DBT_PROFILES, use_profile import os, shutil, yaml class TestCLIInvocation(DBTIntegrationTest): @@ -17,13 +16,13 @@ def schema(self): def models(self): return "test/integration/015_cli_invocation_tests/models" - @attr(type='postgres') + @use_profile('postgres') def test_toplevel_dbt_run(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) self.assertTablesEqual("seed", "model") - @attr(type='postgres') + @use_profile('postgres') def test_subdir_dbt_run(self): os.chdir(os.path.join(self.models, "subdir1")) @@ -62,7 +61,7 @@ def custom_profile_config(self): 'default': { 'type': 'postgres', 'threads': 1, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'root', 'pass': 'password', @@ -86,7 +85,7 @@ def custom_schema(self): def models(self): return "test/integration/015_cli_invocation_tests/models" - @attr(type='postgres') + @use_profile('postgres') def test_toplevel_dbt_run_with_profile_dir_arg(self): results = self.run_dbt(['run', '--profiles-dir', 'dbt-profile']) self.assertEqual(len(results), 1) diff --git a/test/integration/016_macro_tests/test_macros.py b/test/integration/016_macro_tests/test_macros.py index a2164c9c62f..e0cf958c1f9 100644 --- a/test/integration/016_macro_tests/test_macros.py +++ b/test/integration/016_macro_tests/test_macros.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestMacros(DBTIntegrationTest): @@ -35,7 +34,7 @@ def project_config(self): "macro-paths": ["test/integration/016_macro_tests/macros"], } - @attr(type='postgres') + @use_profile('postgres') def test_working_macros(self): self.run_dbt(["deps"]) results = self.run_dbt(["run"]) @@ -64,7 +63,7 @@ def project_config(self): "macro-paths": ["test/integration/016_macro_tests/bad-macros"] } - @attr(type='postgres') + @use_profile('postgres') def test_invalid_macro(self): try: @@ -107,7 +106,7 @@ def project_config(self): # fails, it does not raise a runtime exception. change this test to verify # that the model finished with ERROR state. # - # @attr(type='postgres') + # @use_profile('postgres') # def test_working_macros(self): # self.run_dbt(["deps"]) diff --git a/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py b/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py index 42dfc64332e..977911d2d4c 100644 --- a/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py +++ b/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestRuntimeMaterialization(DBTIntegrationTest): @@ -19,7 +18,7 @@ def schema(self): def models(self): return "test/integration/017_runtime_materialization_tests/models" - @attr(type='postgres') + @use_profile('postgres') def test_postgres_full_refresh(self): # initial full-refresh should have no effect results = self.run_dbt(['run', '--full-refresh']) @@ -44,7 +43,7 @@ def test_postgres_full_refresh(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @attr(type='postgres') + @use_profile('postgres') def test_postgres_non_destructive(self): results = self.run_dbt(['run', '--non-destructive']) self.assertEqual(len(results), 3) @@ -64,7 +63,7 @@ def test_postgres_non_destructive(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @attr(type='postgres') + @use_profile('postgres') def test_postgres_full_refresh_and_non_destructive(self): results = self.run_dbt(['run', '--full-refresh', '--non-destructive']) self.assertEqual(len(results), 3) @@ -85,7 +84,7 @@ def test_postgres_full_refresh_and_non_destructive(self): self.assertTablesEqual("seed","incremental") self.assertTablesEqual("seed","materialized") - @attr(type='postgres') + @use_profile('postgres') def test_postgres_delete__dbt_tmp_relation(self): # This creates a __dbt_tmp view - make sure it doesn't interfere with the dbt run self.run_sql_file("test/integration/017_runtime_materialization_tests/create_view__dbt_tmp.sql") @@ -96,7 +95,7 @@ def test_postgres_delete__dbt_tmp_relation(self): self.assertTablesEqual("seed","view") - @attr(type='snowflake') + @use_profile('snowflake') def test_snowflake_backup_different_type(self): self.run_sql_file( 'test/integration/017_runtime_materialization_tests/create_backup_and_original.sql' diff --git a/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py b/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py index 274bc761d13..caebb436a55 100644 --- a/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py +++ b/test/integration/018_adapter_ddl_tests/test_adapter_ddl.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestAdapterDDL(DBTIntegrationTest): @@ -16,7 +15,7 @@ def schema(self): def models(self): return "test/integration/018_adapter_ddl_tests/models" - @attr(type='postgres') + @use_profile('postgres') def test_sort_and_dist_keys_are_nops_on_postgres(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) diff --git a/test/integration/019_analysis_tests/test_analyses.py b/test/integration/019_analysis_tests/test_analyses.py index 5a69d7aace5..53948d9656e 100644 --- a/test/integration/019_analysis_tests/test_analyses.py +++ b/test/integration/019_analysis_tests/test_analyses.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import os @@ -26,7 +25,7 @@ def assert_contents_equal(self, path, expected): with open(path) as fp: self.assertEqual(fp.read().strip(), expected) - @attr(type='postgres') + @use_profile('postgres') def test_analyses(self): compiled_analysis_path = os.path.normpath('target/compiled/test/analysis') path_1 = os.path.join(compiled_analysis_path, 'analysis.sql') diff --git a/test/integration/020_ephemeral_test/test_ephemeral.py b/test/integration/020_ephemeral_test/test_ephemeral.py index 4d97097bde2..bb749ed3424 100644 --- a/test/integration/020_ephemeral_test/test_ephemeral.py +++ b/test/integration/020_ephemeral_test/test_ephemeral.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestEphemeral(DBTIntegrationTest): @@ -11,7 +10,7 @@ def schema(self): def models(self): return "test/integration/020_ephemeral_test/models" - @attr(type='postgres') + @use_profile('postgres') def test__postgres(self): self.run_sql_file("test/integration/020_ephemeral_test/seed.sql") @@ -22,7 +21,7 @@ def test__postgres(self): self.assertTablesEqual("seed", "double_dependent") self.assertTablesEqual("seed", "super_dependent") - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake(self): self.run_sql_file("test/integration/020_ephemeral_test/seed.sql") @@ -42,7 +41,7 @@ def schema(self): def models(self): return "test/integration/020_ephemeral_test/ephemeral-errors" - @attr(type='postgres') + @use_profile('postgres') def test__postgres_upstream_error(self): self.run_sql_file("test/integration/020_ephemeral_test/seed.sql") diff --git a/test/integration/021_concurrency_test/test_concurrency.py b/test/integration/021_concurrency_test/test_concurrency.py index a2e5d497007..75758237140 100644 --- a/test/integration/021_concurrency_test/test_concurrency.py +++ b/test/integration/021_concurrency_test/test_concurrency.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestConcurrency(DBTIntegrationTest): @@ -11,7 +10,7 @@ def schema(self): def models(self): return "test/integration/021_concurrency_test/models" - @attr(type='postgres') + @use_profile('postgres') def test__postgres__concurrency(self): self.run_sql_file("test/integration/021_concurrency_test/seed.sql") @@ -37,7 +36,7 @@ def test__postgres__concurrency(self): self.assertTableDoesNotExist("invalid") self.assertTableDoesNotExist("skip") - @attr(type='snowflake') + @use_profile('snowflake') def test__snowflake__concurrency(self): self.run_sql_file("test/integration/021_concurrency_test/seed.sql") diff --git a/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py b/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py index 0c76fa8e6c7..c0576b7b346 100644 --- a/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py +++ b/test/integration/022_bigquery_test/test_bigquery_adapter_functions.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest, FakeArgs +from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile class TestBigqueryAdapterFunctions(DBTIntegrationTest): @@ -16,7 +15,7 @@ def models(self): def profile_config(self): return self.bigquery_profile() - @attr(type='bigquery') + @use_profile('bigquery') def test__bigquery_adapter_functions(self): results = self.run_dbt() self.assertEqual(len(results), 3) diff --git a/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py b/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py index cd72a6713cf..f040cc87960 100644 --- a/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py +++ b/test/integration/022_bigquery_test/test_bigquery_date_partitioning.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest, FakeArgs +from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile class TestBigqueryDatePartitioning(DBTIntegrationTest): @@ -16,7 +15,7 @@ def models(self): def profile_config(self): return self.bigquery_profile() - @attr(type='bigquery') + @use_profile('bigquery') def test__bigquery_date_partitioning(self): results = self.run_dbt() self.assertEqual(len(results), 6) diff --git a/test/integration/023_exit_codes_test/test_exit_codes.py b/test/integration/023_exit_codes_test/test_exit_codes.py index bda5a5dee9f..bc54e01aa57 100644 --- a/test/integration/023_exit_codes_test/test_exit_codes.py +++ b/test/integration/023_exit_codes_test/test_exit_codes.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest, FakeArgs +from test.integration.base import DBTIntegrationTest, FakeArgs, use_profile import dbt.exceptions @@ -33,21 +32,21 @@ def project_config(self): ] } - @attr(type='postgres') + @use_profile('postgres') def test_exit_code_run_succeed(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertEqual(len(results), 1) self.assertTrue(success) self.assertTableDoesExist('good') - @attr(type='postgres') + @use_profile('postgres') def test__exit_code_run_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'bad']) self.assertEqual(len(results), 1) self.assertFalse(success) self.assertTableDoesNotExist('bad') - @attr(type='postgres') + @use_profile('postgres') def test___schema_test_pass(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertEqual(len(results), 1) @@ -56,7 +55,7 @@ def test___schema_test_pass(self): self.assertEqual(len(results), 1) self.assertTrue(success) - @attr(type='postgres') + @use_profile('postgres') def test___schema_test_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'dupe']) self.assertEqual(len(results), 1) @@ -65,13 +64,13 @@ def test___schema_test_fail(self): self.assertEqual(len(results), 1) self.assertFalse(success) - @attr(type='postgres') + @use_profile('postgres') def test___compile(self): results, success = self.run_dbt_and_check(['compile']) self.assertEqual(len(results), 7) self.assertTrue(success) - @attr(type='postgres') + @use_profile('postgres') def test___archive_pass(self): self.run_dbt_and_check(['run', '--model', 'good']) results, success = self.run_dbt_and_check(['archive']) @@ -108,7 +107,7 @@ def project_config(self): ] } - @attr(type='postgres') + @use_profile('postgres') def test___archive_fail(self): results, success = self.run_dbt_and_check(['run', '--model', 'good']) self.assertTrue(success) @@ -137,7 +136,7 @@ def packages_config(self): ] } - @attr(type='postgres') + @use_profile('postgres') def test_deps(self): _, success = self.run_dbt_and_check(['deps']) self.assertTrue(success) @@ -163,7 +162,7 @@ def packages_config(self): ] } - @attr(type='postgres') + @use_profile('postgres') def test_deps(self): # this should fail try: @@ -187,7 +186,7 @@ def project_config(self): "data-paths": ['test/integration/023_exit_codes_test/data-good'] } - @attr(type='postgres') + @use_profile('postgres') def test_seed(self): results, success = self.run_dbt_and_check(['seed']) self.assertEqual(len(results), 1) @@ -208,7 +207,7 @@ def project_config(self): "data-paths": ['test/integration/023_exit_codes_test/data-bad'] } - @attr(type='postgres') + @use_profile('postgres') def test_seed(self): try: _, success = self.run_dbt_and_check(['seed']) diff --git a/test/integration/024_custom_schema_test/models/view_3.sql b/test/integration/024_custom_schema_test/models/view_3.sql index c208e5d32df..33931704248 100644 --- a/test/integration/024_custom_schema_test/models/view_3.sql +++ b/test/integration/024_custom_schema_test/models/view_3.sql @@ -1,5 +1,5 @@ -{{ config(schema='test') }} +{{ config(schema='test', materialized='table') }} with v1 as ( diff --git a/test/integration/024_custom_schema_test/test_custom_schema.py b/test/integration/024_custom_schema_test/test_custom_schema.py index 4a64d7e419f..b7b3ed752ea 100644 --- a/test/integration/024_custom_schema_test/test_custom_schema.py +++ b/test/integration/024_custom_schema_test/test_custom_schema.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestCustomSchema(DBTIntegrationTest): @@ -12,7 +11,7 @@ def schema(self): def models(self): return "test/integration/024_custom_schema_test/models" - @attr(type='postgres') + @use_profile('postgres') def test__postgres__custom_schema_no_prefix(self): self.use_default_project() self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") @@ -47,7 +46,7 @@ def profile_config(self): 'my-target': { 'type': 'postgres', 'threads': 1, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'root', 'pass': 'password', @@ -67,7 +66,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test__postgres__custom_schema_with_prefix(self): self.use_default_project() self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") @@ -85,6 +84,42 @@ def test__postgres__custom_schema_with_prefix(self): self.assertTablesEqual("agg","view_3", schema, xf_schema) +class TestCustomProjectSchemaWithPrefixSnowflake(DBTIntegrationTest): + + @property + def schema(self): + return "custom_schema_024" + + @property + def models(self): + return "test/integration/024_custom_schema_test/models" + + @property + def project_config(self): + return { + "models": { + "schema": "dbt_test" + } + } + + @use_profile('snowflake') + def test__snowflake__custom_schema_with_prefix(self): + self.use_default_project() + self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") + + results = self.run_dbt() + self.assertEqual(len(results), 3) + + schema = self.unique_schema().upper() + v1_schema = "{}_DBT_TEST".format(schema) + v2_schema = "{}_CUSTOM".format(schema) + xf_schema = "{}_TEST".format(schema) + + self.assertTablesEqual("SEED","VIEW_1", schema, v1_schema) + self.assertTablesEqual("SEED","VIEW_2", schema, v2_schema) + self.assertTablesEqual("AGG","VIEW_3", schema, xf_schema) + + class TestCustomSchemaWithCustomMacro(DBTIntegrationTest): @property @@ -103,7 +138,7 @@ def profile_config(self): 'prod': { 'type': 'postgres', 'threads': 1, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'root', 'pass': 'password', @@ -124,7 +159,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test__postgres__custom_schema_from_macro(self): self.use_default_project() self.run_sql_file("test/integration/024_custom_schema_test/seed.sql") diff --git a/test/integration/025_duplicate_model_test/test_duplicate_model.py b/test/integration/025_duplicate_model_test/test_duplicate_model.py index 1aa52693293..5d438640ac1 100644 --- a/test/integration/025_duplicate_model_test/test_duplicate_model.py +++ b/test/integration/025_duplicate_model_test/test_duplicate_model.py @@ -1,7 +1,5 @@ -from nose.plugins.attrib import attr - from dbt.exceptions import CompilationException -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestDuplicateModelEnabled(DBTIntegrationTest): @@ -22,7 +20,7 @@ def profile_config(self): "dev": { "type": "postgres", "threads": 1, - "host": "database", + "host": self.database_host, "port": 5432, "user": "root", "pass": "password", @@ -34,7 +32,7 @@ def profile_config(self): } } - @attr(type="postgres") + @use_profile("postgres") def test_duplicate_model_enabled(self): message = "dbt found two resources with the name" try: @@ -62,7 +60,7 @@ def profile_config(self): "dev": { "type": "postgres", "threads": 1, - "host": "database", + "host": self.database_host, "port": 5432, "user": "root", "pass": "password", @@ -74,7 +72,7 @@ def profile_config(self): } } - @attr(type="postgres") + @use_profile("postgres") def test_duplicate_model_disabled(self): try: results = self.run_dbt(["run"]) @@ -109,7 +107,7 @@ def packages_config(self): ], } - @attr(type="postgres") + @use_profile("postgres") def test_duplicate_model_enabled_across_packages(self): self.run_dbt(["deps"]) message = "dbt found two resources with the name" @@ -145,7 +143,7 @@ def packages_config(self): ], } - @attr(type="postgres") + @use_profile("postgres") def test_duplicate_model_disabled_across_packages(self): self.run_dbt(["deps"]) try: diff --git a/test/integration/025_timezones_test/test_timezones.py b/test/integration/025_timezones_test/test_timezones.py index ed1ce6a9023..188a145ac59 100644 --- a/test/integration/025_timezones_test/test_timezones.py +++ b/test/integration/025_timezones_test/test_timezones.py @@ -1,6 +1,5 @@ from freezegun import freeze_time -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestTimezones(DBTIntegrationTest): @@ -21,7 +20,7 @@ def profile_config(self): 'dev': { 'type': 'postgres', 'threads': 1, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': "root", 'pass': "password", @@ -43,7 +42,7 @@ def query(self): """.format(schema=self.unique_schema()) @freeze_time("2017-01-01 03:00:00", tz_offset=0) - @attr(type='postgres') + @use_profile('postgres') def test_run_started_at(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 1) diff --git a/test/integration/026_aliases_test/test_aliases.py b/test/integration/026_aliases_test/test_aliases.py index 418b799cc63..b53d3680434 100644 --- a/test/integration/026_aliases_test/test_aliases.py +++ b/test/integration/026_aliases_test/test_aliases.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestAliases(DBTIntegrationTest): @@ -27,19 +26,19 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test__alias_model_name(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 4) self.run_dbt(['test']) - @attr(type='bigquery') + @use_profile('bigquery') def test__alias_model_name_bigquery(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 4) self.run_dbt(['test']) - @attr(type='snowflake') + @use_profile('snowflake') def test__alias_model_name_snowflake(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 4) @@ -60,7 +59,7 @@ def project_config(self): "macro-paths": ['test/integration/026_aliases_test/macros'], } - @attr(type='postgres') + @use_profile('postgres') def test__alias_dupe_throws_exception(self): message = ".*identical database representation.*" with self.assertRaisesRegexp(Exception, message): @@ -81,7 +80,7 @@ def project_config(self): "macro-paths": ['test/integration/026_aliases_test/macros'], } - @attr(type='postgres') + @use_profile('postgres') def test__same_alias_succeeds_in_different_schemas(self): results = self.run_dbt(['run']) self.assertEqual(len(results), 3) diff --git a/test/integration/027_cycle_test/test_cycles.py b/test/integration/027_cycle_test/test_cycles.py index 91fd22705cb..10a3ddb82fc 100644 --- a/test/integration/027_cycle_test/test_cycles.py +++ b/test/integration/027_cycle_test/test_cycles.py @@ -1,6 +1,5 @@ from freezegun import freeze_time -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class TestSimpleCycle(DBTIntegrationTest): @@ -14,7 +13,7 @@ def models(self): return "test/integration/027_cycle_test/simple_cycle_models" @property - @attr(type='postgres') + @use_profile('postgres') def test_simple_cycle(self): message = "Found a cycle.*" with self.assertRaisesRegexp(Exception, message): @@ -31,7 +30,7 @@ def models(self): return "test/integration/027_cycle_test/complex_cycle_models" @property - @attr(type='postgres') + @use_profile('postgres') def test_simple_cycle(self): message = "Found a cycle.*" with self.assertRaisesRegexp(Exception, message): diff --git a/test/integration/028_cli_vars/test_cli_var_override.py b/test/integration/028_cli_vars/test_cli_var_override.py index 0a2451118d0..158cd9830fe 100644 --- a/test/integration/028_cli_vars/test_cli_var_override.py +++ b/test/integration/028_cli_vars/test_cli_var_override.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import yaml @@ -22,7 +21,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test__overriden_vars_global(self): self.use_default_project() self.use_profile('postgres') @@ -53,7 +52,7 @@ def project_config(self): } } - @attr(type='postgres') + @use_profile('postgres') def test__overriden_vars_project_level(self): # This should be "override" diff --git a/test/integration/028_cli_vars/test_cli_vars.py b/test/integration/028_cli_vars/test_cli_vars.py index 1184c7a9788..86c28cca43f 100644 --- a/test/integration/028_cli_vars/test_cli_vars.py +++ b/test/integration/028_cli_vars/test_cli_vars.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import yaml @@ -12,7 +11,7 @@ def schema(self): def models(self): return "test/integration/028_cli_vars/models_complex" - @attr(type='postgres') + @use_profile('postgres') def test__cli_vars_longform(self): self.use_profile('postgres') self.use_default_project() @@ -39,7 +38,7 @@ def schema(self): def models(self): return "test/integration/028_cli_vars/models_simple" - @attr(type='postgres') + @use_profile('postgres') def test__cli_vars_shorthand(self): self.use_profile('postgres') self.use_default_project() @@ -49,7 +48,7 @@ def test__cli_vars_shorthand(self): results = self.run_dbt(["test", "--vars", "simple: abc"]) self.assertEqual(len(results), 1) - @attr(type='postgres') + @use_profile('postgres') def test__cli_vars_longer(self): self.use_profile('postgres') self.use_default_project() diff --git a/test/integration/029_docs_generate_tests/ref_models/schema.yml b/test/integration/029_docs_generate_tests/ref_models/schema.yml index 087efc30108..0ebd5e3af3f 100644 --- a/test/integration/029_docs_generate_tests/ref_models/schema.yml +++ b/test/integration/029_docs_generate_tests/ref_models/schema.yml @@ -17,10 +17,15 @@ sources: description: "{{ doc('source_info') }}" loader: a_loader schema: "{{ var('test_schema') }}" + quoting: + database: False + identifier: False tables: - name: my_table description: "{{ doc('table_info') }}" identifier: seed + quoting: + identifier: True columns: - name: id description: "An ID field" diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 8e63acaec8b..26339897ad5 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -11,8 +11,28 @@ def _read_file(path): + with open(path, 'r') as fp: + return fp.read().replace('\r', '').replace('\\r', '') + + +class LineIndifferent(object): + def __init__(self, expected): + self.expected = expected.replace('\r', '') + + def __eq__(self, other): + return self.expected == other.replace('\r', '') + + def __repr__(self): + return 'LineIndifferent("{}")'.format(self.expected) + + def __str__(self): + return self.__repr__() + + +def _read_json(path): + # read json generated by dbt. with open(path) as fp: - return fp.read() + return json.load(fp) def _normalize(path): @@ -28,8 +48,15 @@ def _normalize(path): return os.path.normcase(os.path.normpath(path)) +def walk_files(path): + for root, dirs, files in os.walk(path): + for basename in files: + yield os.path.join(root, basename) + + class TestDocsGenerate(DBTIntegrationTest): setup_alternate_db = True + def setUp(self): super(TestDocsGenerate, self).setUp() self.maxDiff = None @@ -119,7 +146,7 @@ def _redshift_stats(self): "diststyle": { "id": "diststyle", "label": "Dist Style", - "value": "EVEN", + "value": AnyStringWith(None), "description": "Distribution style or distribution key column, if key distribution is defined.", "include": True }, @@ -471,7 +498,6 @@ def expected_presto_catalog(self): model_database=self.default_database ) - @staticmethod def _clustered_bigquery_columns(update_type): return { @@ -725,8 +751,7 @@ def expected_redshift_incremental_catalog(self): def verify_catalog(self, expected): self.assertTrue(os.path.exists('./target/catalog.json')) - with open('./target/catalog.json') as fp: - catalog = json.load(fp) + catalog = _read_json('./target/catalog.json') self.assertIn('generated_at', catalog) self.assertBetween( @@ -803,7 +828,7 @@ def expected_seeded_manifest(self, model_database=None): 'path': 'model.sql', 'original_file_path': model_sql_path, 'package_name': 'test', - 'raw_sql': _read_file(model_sql_path).rstrip('\n'), + 'raw_sql': LineIndifferent(_read_file(model_sql_path).rstrip('\r\n')), 'refs': [['seed']], 'sources': [], 'depends_on': {'nodes': ['seed.test.seed'], 'macros': []}, @@ -1004,7 +1029,7 @@ def expected_postgres_references_manifest(self, model_database=None): config_vars = {'alternate_db': model_database} my_schema_name = self.unique_schema() docs_path = self.dir('ref_models/docs.md') - docs_file = _read_file(docs_path).lstrip() + docs_file = LineIndifferent(_read_file(docs_path).lstrip()) return { 'nodes': { 'model.test.ephemeral_copy': { @@ -1032,7 +1057,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'original_file_path': self.dir('ref_models/ephemeral_copy.sql'), 'package_name': 'test', 'path': 'ephemeral_copy.sql', - 'raw_sql': ( + 'raw_sql': LineIndifferent( '{{\n config(\n materialized = "ephemeral"\n )\n}}' '\n\nselect * from {{ source("my_source", "my_table") }}' ), @@ -1089,14 +1114,13 @@ def expected_postgres_references_manifest(self, model_database=None): } ], 'empty': False, - 'fqn': ['test', - 'ephemeral_summary'], + 'fqn': ['test', 'ephemeral_summary'], 'name': 'ephemeral_summary', 'original_file_path': self.dir('ref_models/ephemeral_summary.sql'), 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'ephemeral_summary.sql', - 'raw_sql': ( + 'raw_sql': LineIndifferent( '{{\n config(\n materialized = "table"\n )\n}}\n\n' 'select first_name, count(*) as ct from ' "{{ref('ephemeral_copy')}}\ngroup by first_name\n" @@ -1160,7 +1184,7 @@ def expected_postgres_references_manifest(self, model_database=None): 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'view_summary.sql', - 'raw_sql': ( + 'raw_sql': LineIndifferent( '{{\n config(\n materialized = "view"\n )\n}}\n\n' 'select first_name, ct from ' "{{ref('ephemeral_summary')}}\norder by ct asc" @@ -1211,32 +1235,36 @@ def expected_postgres_references_manifest(self, model_database=None): 'name': 'id' } }, - 'database': self.default_database, - 'description': 'My table', - 'docrefs': [ - { - 'documentation_name': 'table_info', - 'documentation_package': '' - }, - { - 'documentation_name': 'source_info', - 'documentation_package': '' - } - ], - 'freshness': {}, - 'identifier': 'seed', - 'loaded_at_field': None, - 'loader': 'a_loader', - 'name': 'my_table', - 'original_file_path': self.dir('ref_models/schema.yml'), - 'package_name': 'test', - 'path': self.dir('ref_models/schema.yml'), - 'resource_type': 'source', - 'root_path': os.getcwd(), - 'schema': my_schema_name, - 'source_description': "{{ doc('source_info') }}", - 'source_name': 'my_source', - 'unique_id': 'source.test.my_source.my_table' + 'quoting': { + 'database': False, + 'identifier': True, + }, + 'database': self.default_database, + 'description': 'My table', + 'docrefs': [ + { + 'documentation_name': 'table_info', + 'documentation_package': '' + }, + { + 'documentation_name': 'source_info', + 'documentation_package': '' + } + ], + 'freshness': {}, + 'identifier': 'seed', + 'loaded_at_field': None, + 'loader': 'a_loader', + 'name': 'my_table', + 'original_file_path': self.dir('ref_models/schema.yml'), + 'package_name': 'test', + 'path': self.dir('ref_models/schema.yml'), + 'resource_type': 'source', + 'root_path': os.getcwd(), + 'schema': my_schema_name, + 'source_description': "{{ doc('source_info') }}", + 'source_name': 'my_source', + 'unique_id': 'source.test.my_source.my_table' } }, 'docs': { @@ -1366,7 +1394,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': clustered_sql_path, 'package_name': 'test', 'path': 'clustered.sql', - 'raw_sql': _read_file(clustered_sql_path).rstrip('\n'), + 'raw_sql': LineIndifferent(_read_file(clustered_sql_path).rstrip('\r\n')), 'refs': [['seed']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1422,7 +1450,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': multi_clustered_sql_path, 'package_name': 'test', 'path': 'multi_clustered.sql', - 'raw_sql': _read_file(multi_clustered_sql_path).rstrip('\n'), + 'raw_sql': LineIndifferent(_read_file(multi_clustered_sql_path).rstrip('\r\n')), 'refs': [['seed']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1479,7 +1507,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': nested_view_sql_path, 'package_name': 'test', 'path': 'nested_view.sql', - 'raw_sql': _read_file(nested_view_sql_path).rstrip('\n'), + 'raw_sql': LineIndifferent(_read_file(nested_view_sql_path).rstrip('\r\n')), 'refs': [['nested_table']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1536,7 +1564,7 @@ def expected_bigquery_complex_manifest(self): 'original_file_path': nested_table_sql_path, 'package_name': 'test', 'path': 'nested_table.sql', - 'raw_sql': _read_file(nested_table_sql_path).rstrip('\n'), + 'raw_sql': LineIndifferent(_read_file(nested_table_sql_path).rstrip('\r\n')), 'refs': [], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -1620,7 +1648,7 @@ def expected_redshift_incremental_view_manifest(self): "path": "model.sql", "original_file_path": model_sql_path, "package_name": "test", - "raw_sql": _read_file(model_sql_path).rstrip('\n'), + "raw_sql": LineIndifferent(_read_file(model_sql_path).rstrip('\r\n')), "refs": [["seed"]], "sources": [], "depends_on": { @@ -1728,8 +1756,7 @@ def expected_redshift_incremental_view_manifest(self): def verify_manifest(self, expected_manifest): self.assertTrue(os.path.exists('./target/manifest.json')) - with open('./target/manifest.json') as fp: - manifest = json.load(fp) + manifest = _read_json('./target/manifest.json') self.assertEqual( set(manifest), @@ -1781,7 +1808,9 @@ def expected_run_results(self, quote_schema=True, quote_model=False, schema = self.unique_schema() # we are selecting from the seed, which is always in the default db - compiled_database = self._quote(self.default_database) + compiled_database = self.default_database + if self.adapter_type != 'snowflake': + compiled_database = self._quote(compiled_database) compiled_schema = self._quote(schema) if quote_schema else schema compiled_seed = self._quote('seed') if quote_model else 'seed' @@ -1832,7 +1861,7 @@ def expected_run_results(self, quote_schema=True, quote_model=False, 'package_name': 'test', 'patch_path': schema_yml_path, 'path': 'model.sql', - 'raw_sql': _read_file(model_sql_path).rstrip('\n'), + 'raw_sql': LineIndifferent(_read_file(model_sql_path).rstrip('\r\n')), 'refs': [['seed']], 'resource_type': 'model', 'root_path': os.getcwd(), @@ -2051,7 +2080,7 @@ def expected_postgres_references_run_results(self): ) cte_sql = ( - ' __dbt__CTE__ephemeral_copy as (\n\n\nselect * from "{}"."{}"."seed"\n)' + ' __dbt__CTE__ephemeral_copy as (\n\n\nselect * from {}."{}"."seed"\n)' ).format(self.default_database, my_schema_name) ephemeral_injected_sql = ( @@ -2133,7 +2162,7 @@ def expected_postgres_references_run_results(self): 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'ephemeral_summary.sql', - 'raw_sql': ( + 'raw_sql': LineIndifferent( '{{\n config(\n materialized = "table"\n )\n}}\n' '\nselect first_name, count(*) as ct from ' "{{ref('ephemeral_copy')}}\ngroup by first_name\n" @@ -2220,7 +2249,7 @@ def expected_postgres_references_run_results(self): 'package_name': 'test', 'patch_path': self.dir('ref_models/schema.yml'), 'path': 'view_summary.sql', - 'raw_sql': ( + 'raw_sql': LineIndifferent( '{{\n config(\n materialized = "view"\n )\n}}\n\n' 'select first_name, ct from ' "{{ref('ephemeral_summary')}}\norder by ct asc" @@ -2291,8 +2320,7 @@ def expected_postgres_references_run_results(self): ] def verify_run_results(self, expected_run_results): - with open('./target/run_results.json') as fp: - run_result = json.load(fp) + run_result = _read_json('./target/run_results.json') self.assertIn('generated_at', run_result) self.assertIn('elapsed_time', run_result) diff --git a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py index cfb9876a4c7..1b47ded1d10 100644 --- a/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py +++ b/test/integration/032_concurrent_transaction_test/test_concurrent_transaction.py @@ -1,6 +1,6 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import threading +from dbt.adapters.factory import get_adapter class BaseTestConcurrentTransaction(DBTIntegrationTest): @@ -10,6 +10,10 @@ def reset(self): 'model_1': 'wait', } + def setUp(self): + super(BaseTestConcurrentTransaction, self).setUp() + self.reset() + @property def schema(self): return "concurrent_transaction_032" @@ -26,7 +30,8 @@ def project_config(self): def run_select_and_check(self, rel, sql): connection_name = '__test_{}'.format(id(threading.current_thread())) try: - res = self.run_sql(sql, fetch='one', connection_name=connection_name) + with get_adapter(self.config).connection_named(connection_name) as conn: + res = self.run_sql_common(self.transform_sql(sql), 'one', conn) # The result is the output of f_sleep(), which is True if res[0] == True: @@ -54,7 +59,7 @@ def async_select(self, rel, sleep=10): sleep=sleep, rel=rel) - thread = threading.Thread(target=lambda: self.run_select_and_check(rel, query)) + thread = threading.Thread(target=self.run_select_and_check, args=(rel, query)) thread.start() return thread @@ -88,7 +93,7 @@ class TableTestConcurrentTransaction(BaseTestConcurrentTransaction): def models(self): return "test/integration/032_concurrent_transaction_test/models-table" - @attr(type="redshift") + @use_profile("redshift") def test__redshift__concurrent_transaction_table(self): self.reset() self.run_test() @@ -98,7 +103,7 @@ class ViewTestConcurrentTransaction(BaseTestConcurrentTransaction): def models(self): return "test/integration/032_concurrent_transaction_test/models-view" - @attr(type="redshift") + @use_profile("redshift") def test__redshift__concurrent_transaction_view(self): self.reset() self.run_test() @@ -108,7 +113,7 @@ class IncrementalTestConcurrentTransaction(BaseTestConcurrentTransaction): def models(self): return "test/integration/032_concurrent_transaction_test/models-incremental" - @attr(type="redshift") + @use_profile("redshift") def test__redshift__concurrent_transaction_incremental(self): self.reset() self.run_test() diff --git a/test/integration/033_event_tracking_test/test_events.py b/test/integration/033_event_tracking_test/test_events.py index 83d94335f85..91121872623 100644 --- a/test/integration/033_event_tracking_test/test_events.py +++ b/test/integration/033_event_tracking_test/test_events.py @@ -1,7 +1,7 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile import mock import hashlib +import os from mock import call, ANY @@ -188,7 +188,7 @@ def project_config(self): "test-paths": [self.dir("test")], } - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_compile(self): expected_calls = [ call( @@ -216,7 +216,7 @@ def test__event_tracking_compile(self): expected_contexts ) - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_deps(self): package_context = [ { @@ -259,7 +259,7 @@ def test__event_tracking_deps(self): self.run_event_test(["deps"], expected_calls, expected_contexts) - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_seed(self): def seed_context(project_id, user_id, invocation_id, version): return [{ @@ -313,7 +313,7 @@ def seed_context(project_id, user_id, invocation_id, version): self.run_event_test(["seed"], expected_calls, expected_contexts) - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_models(self): expected_calls = [ call( @@ -342,6 +342,12 @@ def test__event_tracking_models(self): ), ] + hashed = '20ff78afb16c8b3b8f83861b1d3b99bd' + # this hashed contents field changes on azure postgres tests, I believe + # due to newlines again + if os.name == 'nt': + hashed = '52cf9d1db8f0a18ca64ef64681399746' + expected_contexts = [ self.build_context('run', 'start'), self.run_context( @@ -353,7 +359,7 @@ def test__event_tracking_models(self): materialization='view' ), self.run_context( - hashed_contents='20ff78afb16c8b3b8f83861b1d3b99bd', + hashed_contents=hashed, model_id='57994a805249953b31b738b1af7a1eeb', index=2, total=2, @@ -369,7 +375,7 @@ def test__event_tracking_models(self): expected_contexts ) - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_model_error(self): # cmd = ["run", "--model", "model_error"] # self.run_event_test(cmd, event_run_model_error, expect_pass=False) @@ -415,7 +421,7 @@ def test__event_tracking_model_error(self): expect_pass=False ) - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_tests(self): # TODO: dbt does not track events for tests, but it should! self.run_dbt(["run", "--model", "example", "example_2"]) @@ -455,7 +461,7 @@ def project_config(self): "source-paths": [self.dir("model-compilation-error")], } - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_with_compilation_error(self): expected_calls = [ call( @@ -499,7 +505,7 @@ def profile_config(self): 'default2': { 'type': 'postgres', 'threads': 4, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'root', 'pass': 'password', @@ -509,7 +515,7 @@ def profile_config(self): 'noaccess': { 'type': 'postgres', 'threads': 4, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'BAD', 'pass': 'bad_password', @@ -521,7 +527,7 @@ def profile_config(self): } } - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_unable_to_connect(self): expected_calls = [ call( @@ -571,7 +577,7 @@ def project_config(self): ] } - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_archive(self): self.run_dbt(["run", "--models", "archivable"]) @@ -596,10 +602,11 @@ def test__event_tracking_archive(self): ), ] + # the model here has a raw_sql that contains the schema, which changes expected_contexts = [ self.build_context('archive', 'start'), self.run_context( - hashed_contents='f785c4490e73e5b52fed5627f5709bfa', + hashed_contents=ANY, model_id='3cdcd0fef985948fd33af308468da3b9', index=1, total=1, @@ -617,7 +624,7 @@ def test__event_tracking_archive(self): class TestEventTrackingCatalogGenerate(TestEventTracking): - @attr(type="postgres") + @use_profile("postgres") def test__event_tracking_catalog_generate(self): # create a model for the catalog self.run_dbt(["run", "--models", "example"]) diff --git a/test/integration/034_redshift_test/test_late_binding_view.py b/test/integration/034_redshift_test/test_late_binding_view.py index a55318443bf..37350ef7d57 100644 --- a/test/integration/034_redshift_test/test_late_binding_view.py +++ b/test/integration/034_redshift_test/test_late_binding_view.py @@ -1,7 +1,6 @@ import json import os -from nose.plugins.attrib import attr from test.integration.base import DBTIntegrationTest, use_profile diff --git a/test/integration/037_external_reference_test/test_external_reference.py b/test/integration/037_external_reference_test/test_external_reference.py index bd754ae169c..ba6bf73bdb6 100644 --- a/test/integration/037_external_reference_test/test_external_reference.py +++ b/test/integration/037_external_reference_test/test_external_reference.py @@ -29,8 +29,8 @@ def tearDown(self): # This has to happen before we drop the external schema, because # otherwise postgres hangs forever. self._drop_schemas() - self.adapter.drop_schema(self.default_database, self.external_schema, - model_name='__test') + with self.test_connection(): + self.adapter.drop_schema(self.default_database, self.external_schema) super(TestExternalReference, self).tearDown() @use_profile('postgres') @@ -39,6 +39,7 @@ def test__postgres__external_reference(self): # running it again should succeed self.assertEquals(len(self.run_dbt()), 1) + # The opposite of the test above -- check that external relations that # depend on a dbt model do not create issues with caching class TestExternalDependency(DBTIntegrationTest): @@ -54,8 +55,8 @@ def tearDown(self): # This has to happen before we drop the external schema, because # otherwise postgres hangs forever. self._drop_schemas() - self.adapter.drop_schema(self.default_database, self.external_schema, - model_name='__test') + with self.test_connection(): + self.adapter.drop_schema(self.default_database, self.external_schema) super(TestExternalDependency, self).tearDown() @use_profile('postgres') diff --git a/test/integration/040_override_database_test/test_override_database.py b/test/integration/040_override_database_test/test_override_database.py index a3319441d6b..1071eb2bd31 100644 --- a/test/integration/040_override_database_test/test_override_database.py +++ b/test/integration/040_override_database_test/test_override_database.py @@ -1,5 +1,4 @@ -from nose.plugins.attrib import attr -from test.integration.base import DBTIntegrationTest +from test.integration.base import DBTIntegrationTest, use_profile class BaseOverrideDatabase(DBTIntegrationTest): @@ -42,11 +41,11 @@ def run_database_override(self): (func('view_4'), self.unique_schema(), self.alternative_database), ]) - @attr(type='bigquery') + @use_profile('bigquery') def test_bigquery_database_override(self): self.run_database_override() - @attr(type='snowflake') + @use_profile('snowflake') def test_snowflake_database_override(self): self.run_database_override() @@ -82,11 +81,11 @@ def run_database_override(self): (func('view_4'), self.unique_schema(), self.alternative_database), ]) - @attr(type='bigquery') + @use_profile('bigquery') def test_bigquery_database_override(self): self.run_database_override() - @attr(type='snowflake') + @use_profile('snowflake') def test_snowflake_database_override(self): self.run_database_override() @@ -112,10 +111,10 @@ def run_database_override(self): (func('view_4'), self.unique_schema(), self.alternative_database), ]) - @attr(type='bigquery') + @use_profile('bigquery') def test_bigquery_database_override(self): self.run_database_override() - @attr(type='snowflake') + @use_profile('snowflake') def test_snowflake_database_override(self): self.run_database_override() diff --git a/test/integration/042_sources_test/macros/macro.sql b/test/integration/042_sources_test/macros/macro.sql new file mode 100644 index 00000000000..a400a94f625 --- /dev/null +++ b/test/integration/042_sources_test/macros/macro.sql @@ -0,0 +1,7 @@ +{% macro override_me() -%} + {{ exceptions.raise_compiler_error('this is a bad macro') }} +{%- endmacro %} + +{% macro happy_little_macro() -%} + {{ override_me() }} +{%- endmacro %} diff --git a/test/integration/042_sources_test/models/ephemeral_model.sql b/test/integration/042_sources_test/models/ephemeral_model.sql new file mode 100644 index 00000000000..8de35cd3e21 --- /dev/null +++ b/test/integration/042_sources_test/models/ephemeral_model.sql @@ -0,0 +1,3 @@ +{{ config(materialized='ephemeral') }} + +select 1 as id diff --git a/test/integration/042_sources_test/models/schema.yml b/test/integration/042_sources_test/models/schema.yml index 894dbfc03f4..00b32bc6dcd 100644 --- a/test/integration/042_sources_test/models/schema.yml +++ b/test/integration/042_sources_test/models/schema.yml @@ -14,6 +14,8 @@ sources: warn_after: {count: 10, period: hour} error_after: {count: 1, period: day} schema: "{{ var(env_var('DBT_TEST_SCHEMA_NAME_VARIABLE')) }}" + quoting: + identifier: True tables: - name: test_table identifier: source @@ -46,6 +48,8 @@ sources: identifier: other_table - name: other_source schema: "{{ var('test_run_schema') }}" + quoting: + identifier: True tables: - name: test_table identifier: other_source_table diff --git a/test/integration/042_sources_test/test_sources.py b/test/integration/042_sources_test/test_sources.py index 0c4aa17538a..ab56d077443 100644 --- a/test/integration/042_sources_test/test_sources.py +++ b/test/integration/042_sources_test/test_sources.py @@ -1,11 +1,19 @@ -from nose.plugins.attrib import attr -from datetime import datetime, timedelta import json +import multiprocessing import os +import socket +import sys +import time +import unittest +from base64 import standard_b64encode as b64 +from datetime import datetime, timedelta + +import requests from dbt.exceptions import CompilationException from test.integration.base import DBTIntegrationTest, use_profile, AnyFloat, \ AnyStringWith +from dbt.main import handle_and_check class BaseSourcesTest(DBTIntegrationTest): @@ -113,13 +121,14 @@ def test_source_childrens_parents(self): ) self.assertTableDoesNotExist('nonsource_descendant') + class TestSourceFreshness(BaseSourcesTest): def setUp(self): super(TestSourceFreshness, self).setUp() self.maxDiff = None self._id = 100 # this is the db initial value - self.last_inserted_time = "2016-09-19T14:45:51+00:00Z" + self.last_inserted_time = "2016-09-19T14:45:51+00:00" # test_source.test_table should have a loaded_at field of `updated_at` # and a freshness of warn_after: 10 hours, error_after: 18 hours @@ -144,7 +153,7 @@ def _set_updated_at_to(self, delta): 'source': self.adapter.quote('source'), } ) - self.last_inserted_time = insert_time.strftime("%Y-%m-%dT%H:%M:%S+00:00Z") + self.last_inserted_time = insert_time.strftime("%Y-%m-%dT%H:%M:%S+00:00") def _assert_freshness_results(self, path, state): self.assertTrue(os.path.exists(path)) @@ -160,7 +169,7 @@ def _assert_freshness_results(self, path, state): last_inserted_time = self.last_inserted_time if last_inserted_time is None: - last_inserted_time = "2016-09-19T14:45:51+00:00Z" + last_inserted_time = "2016-09-19T14:45:51+00:00" self.assertEqual(data['sources'], { 'source.test.test_source.test_table': { @@ -256,3 +265,594 @@ def test_postgres_malformed_schema_nonstrict_will_not_break_run(self): def test_postgres_malformed_schema_strict_will_break_run(self): with self.assertRaises(CompilationException): self.run_dbt_with_vars(['run'], strict=True) + + +class ServerProcess(multiprocessing.Process): + def __init__(self, cli_vars=None): + self.port = 22991 + handle_and_check_args = [ + '--strict', 'rpc', '--log-cache-events', + '--port', str(self.port), + ] + if cli_vars: + handle_and_check_args.extend(['--vars', cli_vars]) + super(ServerProcess, self).__init__( + target=handle_and_check, + args=(handle_and_check_args,), + name='ServerProcess') + + def is_up(self): + sock = socket.socket() + try: + sock.connect(('localhost', self.port)) + except socket.error: + return False + sock.close() + return True + + def start(self): + super(ServerProcess, self).start() + for _ in range(10): + if self.is_up(): + break + time.sleep(0.5) + if not self.is_up(): + self.terminate() + raise Exception('server never appeared!') + + +def query_url(url, query): + headers = {'content-type': 'application/json'} + return requests.post(url, headers=headers, data=json.dumps(query)) + + +class BackgroundQueryProcess(multiprocessing.Process): + def __init__(self, query, url, group=None, name=None): + parent, child = multiprocessing.Pipe() + self.parent_pipe = parent + self.child_pipe = child + self.query = query + self.url = url + super(BackgroundQueryProcess, self).__init__(group=group, name=name) + + def run(self): + try: + result = query_url(self.url, self.query).json() + except Exception as exc: + self.child_pipe.send(('error', str(exc))) + else: + self.child_pipe.send(('result', result)) + + def wait_result(self): + result_type, result = self.parent_pipe.recv() + self.join() + if result_type == 'error': + raise Exception(result) + else: + return result + +_select_from_ephemeral = '''with __dbt__CTE__ephemeral_model as ( + + +select 1 as id +)select * from __dbt__CTE__ephemeral_model''' + + +@unittest.skipIf(os.name == 'nt', 'Windows not supported for now') +class TestRPCServer(BaseSourcesTest): + def setUp(self): + super(TestRPCServer, self).setUp() + self._server = ServerProcess( + cli_vars='{{test_run_schema: {}}}'.format(self.unique_schema()) + ) + self._server.start() + + def tearDown(self): + self._server.terminate() + super(TestRPCServer, self).tearDown() + + @property + def project_config(self): + return { + 'data-paths': ['test/integration/042_sources_test/data'], + 'quoting': {'database': True, 'schema': True, 'identifier': True}, + 'macro-paths': ['test/integration/042_sources_test/macros'], + } + + def build_query(self, method, kwargs, sql=None, test_request_id=1, + macros=None): + body_data = '' + if sql is not None: + body_data += sql + + if macros is not None: + body_data += macros + + if sql is not None or macros is not None: + kwargs['sql'] = b64(body_data.encode('utf-8')).decode('utf-8') + + return { + 'jsonrpc': '2.0', + 'method': method, + 'params': kwargs, + 'id': test_request_id + } + + @property + def url(self): + return 'http://localhost:{}/jsonrpc'.format(self._server.port) + + def query(self, _method, _sql=None, _test_request_id=1, macros=None, **kwargs): + built = self.build_query(_method, kwargs, _sql, _test_request_id, macros) + return query_url(self.url, built) + + def handle_result(self, bg_query, pipe): + result_type, result = pipe.recv() + bg_query.join() + if result_type == 'error': + raise result + else: + return result + + def background_query(self, _method, _sql=None, _test_request_id=1, + _block=False, macros=None, **kwargs): + built = self.build_query(_method, kwargs, _sql, _test_request_id, + macros) + + url = 'http://localhost:{}/jsonrpc'.format(self._server.port) + name = _method + if 'name' in kwargs: + name += ' ' + kwargs['name'] + bg_query = BackgroundQueryProcess(built, url, name=name) + bg_query.start() + return bg_query + + def assertResultHasTimings(self, result, *names): + self.assertIn('timing', result) + timings = result['timing'] + self.assertEqual(len(timings), len(names)) + for expected_name, timing in zip(names, timings): + self.assertIn('name', timing) + self.assertEqual(timing['name'], expected_name) + self.assertIn('started_at', timing) + self.assertIn('completed_at', timing) + datetime.strptime(timing['started_at'], '%Y-%m-%dT%H:%M:%S.%fZ') + datetime.strptime(timing['completed_at'], '%Y-%m-%dT%H:%M:%S.%fZ') + + def assertIsResult(self, data): + self.assertEqual(data['id'], 1) + self.assertEqual(data['jsonrpc'], '2.0') + self.assertIn('result', data) + self.assertNotIn('error', data) + return data['result'] + + def assertIsError(self, data, id_=1): + self.assertEqual(data['id'], id_) + self.assertEqual(data['jsonrpc'], '2.0') + self.assertIn('error', data) + self.assertNotIn('result', data) + return data['error'] + + def assertIsErrorWithCode(self, data, code, id_=1): + error = self.assertIsError(data, id_) + self.assertIn('code', error) + self.assertIn('message', error) + self.assertEqual(error['code'], code) + return error + + def assertIsErrorWith(self, data, code, message, error_data): + error = self.assertIsErrorWithCode(data, code) + if message is not None: + self.assertEqual(error['message'], message) + + if error_data is not None: + return self.assertHasErrorData(error, error_data) + else: + return error.get('data') + + def assertResultHasSql(self, data, raw_sql, compiled_sql=None): + if compiled_sql is None: + compiled_sql = raw_sql + result = self.assertIsResult(data) + self.assertIn('logs', result) + self.assertTrue(len(result['logs']) > 0) + self.assertIn('raw_sql', result) + self.assertIn('compiled_sql', result) + self.assertEqual(result['raw_sql'], raw_sql) + self.assertEqual(result['compiled_sql'], compiled_sql) + return result + + def assertSuccessfulCompilationResult(self, data, raw_sql, compiled_sql=None): + result = self.assertResultHasSql(data, raw_sql, compiled_sql) + self.assertNotIn('table', result) + # compile results still have an 'execute' timing, it just represents + # the time to construct a result object. + self.assertResultHasTimings(result, 'compile', 'execute') + + def assertSuccessfulRunResult(self, data, raw_sql, compiled_sql=None, table=None): + result = self.assertResultHasSql(data, raw_sql, compiled_sql) + self.assertIn('table', result) + if table is not None: + self.assertEqual(result['table'], table) + self.assertResultHasTimings(result, 'compile', 'execute') + + @use_profile('postgres') + def test_compile_postgres(self): + trivial = self.query( + 'compile', + 'select 1 as id', + name='foo' + ).json() + self.assertSuccessfulCompilationResult( + trivial, 'select 1 as id' + ) + + ref = self.query( + 'compile', + 'select * from {{ ref("descendant_model") }}', + name='foo' + ).json() + self.assertSuccessfulCompilationResult( + ref, + 'select * from {{ ref("descendant_model") }}', + compiled_sql='select * from "{}"."{}"."descendant_model"'.format( + self.default_database, + self.unique_schema()) + ) + + source = self.query( + 'compile', + 'select * from {{ source("test_source", "test_table") }}', + name='foo' + ).json() + self.assertSuccessfulCompilationResult( + source, + 'select * from {{ source("test_source", "test_table") }}', + compiled_sql='select * from "{}"."{}"."source"'.format( + self.default_database, + self.unique_schema()) + ) + + macro = self.query( + 'compile', + 'select {{ my_macro() }}', + name='foo', + macros='{% macro my_macro() %}1 as id{% endmacro %}' + ).json() + self.assertSuccessfulCompilationResult( + macro, + 'select {{ my_macro() }}', + compiled_sql='select 1 as id' + ) + + macro_override = self.query( + 'compile', + 'select {{ happy_little_macro() }}', + name='foo', + macros='{% macro override_me() %}2 as id{% endmacro %}' + ).json() + self.assertSuccessfulCompilationResult( + macro_override, + 'select {{ happy_little_macro() }}', + compiled_sql='select 2 as id' + ) + + macro_override_with_if_statement = self.query( + 'compile', + '{% if True %}select {{ happy_little_macro() }}{% endif %}', + name='foo', + macros='{% macro override_me() %}2 as id{% endmacro %}' + ).json() + self.assertSuccessfulCompilationResult( + macro_override_with_if_statement, + '{% if True %}select {{ happy_little_macro() }}{% endif %}', + compiled_sql='select 2 as id' + ) + + ephemeral = self.query( + 'compile', + 'select * from {{ ref("ephemeral_model") }}', + name='foo' + ).json() + self.assertSuccessfulCompilationResult( + ephemeral, + 'select * from {{ ref("ephemeral_model") }}', + compiled_sql=_select_from_ephemeral + ) + + @use_profile('postgres') + def test_run_postgres(self): + # seed + run dbt to make models before using them! + self.run_dbt_with_vars(['seed']) + self.run_dbt_with_vars(['run']) + data = self.query( + 'run', + 'select 1 as id', + name='foo' + ).json() + self.assertSuccessfulRunResult( + data, 'select 1 as id', table={'column_names': ['id'], 'rows': [[1.0]]} + ) + + ref = self.query( + 'run', + 'select * from {{ ref("descendant_model") }} order by updated_at limit 1', + name='foo' + ).json() + self.assertSuccessfulRunResult( + ref, + 'select * from {{ ref("descendant_model") }} order by updated_at limit 1', + compiled_sql='select * from "{}"."{}"."descendant_model" order by updated_at limit 1'.format( + self.default_database, + self.unique_schema()), + table={ + 'column_names': ['favorite_color', 'id', 'first_name', 'email', 'ip_address', 'updated_at'], + 'rows': [['blue', 38.0, 'Gary', 'gray11@statcounter.com', "'40.193.124.56'", '1970-01-27T10:04:51']], + } + ) + + source = self.query( + 'run', + 'select * from {{ source("test_source", "test_table") }} order by updated_at limit 1', + name='foo' + ).json() + self.assertSuccessfulRunResult( + source, + 'select * from {{ source("test_source", "test_table") }} order by updated_at limit 1', + compiled_sql='select * from "{}"."{}"."source" order by updated_at limit 1'.format( + self.default_database, + self.unique_schema()), + table={ + 'column_names': ['favorite_color', 'id', 'first_name', 'email', 'ip_address', 'updated_at'], + 'rows': [['blue', 38.0, 'Gary', 'gray11@statcounter.com', "'40.193.124.56'", '1970-01-27T10:04:51']], + } + ) + + macro = self.query( + 'run', + 'select {{ my_macro() }}', + name='foo', + macros='{% macro my_macro() %}1 as id{% endmacro %}' + ).json() + self.assertSuccessfulRunResult( + macro, + raw_sql='select {{ my_macro() }}', + compiled_sql='select 1 as id', + table={'column_names': ['id'], 'rows': [[1.0]]} + ) + + macro_override = self.query( + 'run', + 'select {{ happy_little_macro() }}', + name='foo', + macros='{% macro override_me() %}2 as id{% endmacro %}' + ).json() + self.assertSuccessfulRunResult( + macro_override, + raw_sql='select {{ happy_little_macro() }}', + compiled_sql='select 2 as id', + table={'column_names': ['id'], 'rows': [[2.0]]} + ) + + macro_override_with_if_statement = self.query( + 'run', + '{% if True %}select {{ happy_little_macro() }}{% endif %}', + name='foo', + macros='{% macro override_me() %}2 as id{% endmacro %}' + ).json() + self.assertSuccessfulRunResult( + macro_override_with_if_statement, + '{% if True %}select {{ happy_little_macro() }}{% endif %}', + compiled_sql='select 2 as id', + table={'column_names': ['id'], 'rows': [[2.0]]} + ) + + macro_with_raw_statement = self.query( + 'run', + '{% raw %}select 1 as{% endraw %}{{ test_macros() }}{% macro test_macros() %} id{% endmacro %}', + name='foo' + ).json() + self.assertSuccessfulRunResult( + macro_with_raw_statement, + '{% raw %}select 1 as{% endraw %}{{ test_macros() }}', + compiled_sql='select 1 as id', + table={'column_names': ['id'], 'rows': [[1.0]]} + ) + + macro_with_comment = self.query( + 'run', + '{% raw %}select 1 {% endraw %}{{ test_macros() }} {# my comment #}{% macro test_macros() -%} as{% endmacro %} id{# another comment #}', + name='foo' + ).json() + self.assertSuccessfulRunResult( + macro_with_comment, + '{% raw %}select 1 {% endraw %}{{ test_macros() }} {# my comment #} id{# another comment #}', + compiled_sql='select 1 as id', + table={'column_names': ['id'], 'rows': [[1.0]]} + ) + + ephemeral = self.query( + 'run', + 'select * from {{ ref("ephemeral_model") }}', + name='foo' + ).json() + self.assertSuccessfulRunResult( + ephemeral, + raw_sql='select * from {{ ref("ephemeral_model") }}', + compiled_sql=_select_from_ephemeral, + table={'column_names': ['id'], 'rows': [[1.0]]} + ) + + @use_profile('postgres') + def test_ps_kill_postgres(self): + done_query = self.query('compile', 'select 1 as id', name='done').json() + self.assertIsResult(done_query) + pg_sleeper, sleep_task_id, request_id = self._get_sleep_query() + + empty_ps_result = self.query('ps', completed=False, active=False).json() + result = self.assertIsResult(empty_ps_result) + self.assertEqual(len(result['rows']), 0) + + sleeper_ps_result = self.query('ps', completed=False, active=True).json() + result = self.assertIsResult(sleeper_ps_result) + self.assertEqual(len(result['rows']), 1) + rowdict = result['rows'] + self.assertEqual(rowdict[0]['request_id'], request_id) + self.assertEqual(rowdict[0]['method'], 'run') + self.assertEqual(rowdict[0]['state'], 'running') + self.assertEqual(rowdict[0]['timeout'], None) + + complete_ps_result = self.query('ps', completed=True, active=False).json() + result = self.assertIsResult(complete_ps_result) + self.assertEqual(len(result['rows']), 1) + rowdict = result['rows'] + self.assertEqual(rowdict[0]['request_id'], 1) + self.assertEqual(rowdict[0]['method'], 'compile') + self.assertEqual(rowdict[0]['state'], 'finished') + self.assertEqual(rowdict[0]['timeout'], None) + + all_ps_result = self.query('ps', completed=True, active=True).json() + result = self.assertIsResult(all_ps_result) + self.assertEqual(len(result['rows']), 2) + rowdict = result['rows'] + rowdict.sort(key=lambda r: r['start']) + self.assertEqual(rowdict[0]['request_id'], 1) + self.assertEqual(rowdict[0]['method'], 'compile') + self.assertEqual(rowdict[0]['state'], 'finished') + self.assertEqual(rowdict[0]['timeout'], None) + self.assertEqual(rowdict[1]['request_id'], request_id) + self.assertEqual(rowdict[1]['method'], 'run') + self.assertEqual(rowdict[1]['state'], 'running') + self.assertEqual(rowdict[1]['timeout'], None) + + self.kill_and_assert(pg_sleeper, sleep_task_id, request_id) + + def kill_and_assert(self, pg_sleeper, task_id, request_id): + kill_result = self.query('kill', task_id=task_id).json() + kill_time = time.time() + result = self.assertIsResult(kill_result) + self.assertTrue(result['killed']) + + sleeper_result = pg_sleeper.wait_result() + result_time = time.time() + error = self.assertIsErrorWithCode(sleeper_result, 10009, request_id) + self.assertEqual(error['message'], 'RPC process killed') + self.assertIn('data', error) + error_data = error['data'] + self.assertEqual(error_data['signum'], 2) + self.assertEqual(error_data['message'], 'RPC process killed by signal 2') + self.assertIn('logs', error_data) + # it should take less than 5s to kill the process if things are working + # properly. On python 2.x, things do not work properly. + if sys.version_info.major > 2: + self.assertLess(result_time, kill_time + 5) + return error_data + + def _get_sleep_query(self): + request_id = 90890 + pg_sleeper = self.background_query( + 'run', + 'select pg_sleep(15)', + _test_request_id=request_id, + name='sleeper', + ) + + for _ in range(20): + time.sleep(0.2) + sleeper_ps_result = self.query('ps', completed=False, active=True).json() + result = self.assertIsResult(sleeper_ps_result) + rows = result['rows'] + for row in rows: + if row['request_id'] == request_id and row['state'] == 'running': + return pg_sleeper, row['task_id'], request_id + + self.assertTrue(False, 'request ID never found running!') + + @use_profile('postgres') + def test_ps_kill_longwait_postgres(self): + pg_sleeper, sleep_task_id, request_id = self._get_sleep_query() + + # the test above frequently kills the process during parsing of the + # requested node. That's also a useful test, but we should test that + # we cancel the in-progress sleep query. + time.sleep(3) + + error_data = self.kill_and_assert(pg_sleeper, sleep_task_id, request_id) + # we should have logs if we did anything + self.assertTrue(len(error_data['logs']) > 0) + + @use_profile('postgres') + def test_invalid_requests_postgres(self): + data = self.query( + 'xxxxxnotamethodxxxxx', + 'hi this is not sql' + ).json() + self.assertIsErrorWith(data, -32601, 'Method not found', None) + + data = self.query( + 'compile', + 'select * from {{ reff("nonsource_descendant") }}', + name='mymodel' + ).json() + error_data = self.assertIsErrorWith(data, 10004, 'Compilation Error', { + 'type': 'CompilationException', + 'message': "Compilation Error in rpc mymodel (from remote system)\n 'reff' is undefined", + 'compiled_sql': None, + 'raw_sql': 'select * from {{ reff("nonsource_descendant") }}', + }) + self.assertIn('logs', error_data) + self.assertTrue(len(error_data['logs']) > 0) + + data = self.query( + 'run', + 'hi this is not sql', + name='foo' + ).json() + error_data = self.assertIsErrorWith(data, 10003, 'Database Error', { + 'type': 'DatabaseException', + 'message': 'Database Error in rpc foo (from remote system)\n syntax error at or near "hi"\n LINE 1: hi this is not sql\n ^', + 'compiled_sql': 'hi this is not sql', + 'raw_sql': 'hi this is not sql', + }) + self.assertIn('logs', error_data) + self.assertTrue(len(error_data['logs']) > 0) + + macro_no_override = self.query( + 'run', + 'select {{ happy_little_macro() }}', + name='foo', + ).json() + error_data = self.assertIsErrorWith(macro_no_override, 10004, 'Compilation Error', { + 'type': 'CompilationException', + 'raw_sql': 'select {{ happy_little_macro() }}', + 'compiled_sql': None + }) + self.assertIn('logs', error_data) + self.assertTrue(len(error_data['logs']) > 0) + + def assertHasErrorData(self, error, expected_error_data): + self.assertIn('data', error) + error_data = error['data'] + for key, value in expected_error_data.items(): + self.assertIn(key, error_data) + self.assertEqual(error_data[key], value) + return error_data + + @use_profile('postgres') + def test_timeout_postgres(self): + data = self.query( + 'run', + 'select from pg_sleep(5)', + name='foo', + timeout=1 + ).json() + error = self.assertIsErrorWithCode(data, 10008) + self.assertEqual(error['message'], 'RPC timeout error') + self.assertIn('data', error) + error_data = error['data'] + self.assertIn('timeout', error_data) + self.assertEqual(error_data['timeout'], 1) + self.assertIn('message', error_data) + self.assertEqual(error_data['message'], 'RPC timed out after 1s') + self.assertIn('logs', error_data) + self.assertTrue(len(error_data['logs']) > 0) diff --git a/test/integration/043_custom_aliases_test/macros/macros.sql b/test/integration/043_custom_aliases_test/macros/macros.sql new file mode 100644 index 00000000000..4bc6a3990e0 --- /dev/null +++ b/test/integration/043_custom_aliases_test/macros/macros.sql @@ -0,0 +1,21 @@ + +{% macro generate_alias_name(node, custom_alias_name=none) -%} + {%- if custom_alias_name is none -%} + {{ node.name }} + {%- else -%} + custom_{{ custom_alias_name | trim }} + {%- endif -%} +{%- endmacro %} + + +{% macro string_literal(s) -%} + {{ adapter_macro('test.string_literal', s) }} +{%- endmacro %} + +{% macro default__string_literal(s) %} + '{{ s }}'::text +{% endmacro %} + +{% macro bigquery__string_literal(s) %} + cast('{{ s }}' as string) +{% endmacro %} diff --git a/test/integration/043_custom_aliases_test/models/model1.sql b/test/integration/043_custom_aliases_test/models/model1.sql new file mode 100644 index 00000000000..000ce2ed6c5 --- /dev/null +++ b/test/integration/043_custom_aliases_test/models/model1.sql @@ -0,0 +1,3 @@ +{{ config(materialized='table', alias='alias') }} + +select {{ string_literal(this.name) }} as model_name diff --git a/test/integration/043_custom_aliases_test/models/model2.sql b/test/integration/043_custom_aliases_test/models/model2.sql new file mode 100644 index 00000000000..a2de8f099ea --- /dev/null +++ b/test/integration/043_custom_aliases_test/models/model2.sql @@ -0,0 +1,3 @@ +{{ config(materialized='table') }} + +select {{ string_literal(this.name) }} as model_name diff --git a/test/integration/043_custom_aliases_test/models/schema.yml b/test/integration/043_custom_aliases_test/models/schema.yml new file mode 100644 index 00000000000..4d43836e482 --- /dev/null +++ b/test/integration/043_custom_aliases_test/models/schema.yml @@ -0,0 +1,15 @@ +version: 2 + +models: + - name: model1 + columns: + - name: model_name + tests: + - accepted_values: + values: ['custom_alias'] + - name: model2 + columns: + - name: model_name + tests: + - accepted_values: + values: ['model2'] diff --git a/test/integration/043_custom_aliases_test/test_custom_aliases.py b/test/integration/043_custom_aliases_test/test_custom_aliases.py new file mode 100644 index 00000000000..131941f5e74 --- /dev/null +++ b/test/integration/043_custom_aliases_test/test_custom_aliases.py @@ -0,0 +1,23 @@ +from test.integration.base import DBTIntegrationTest, use_profile + + +class TestAliases(DBTIntegrationTest): + @property + def schema(self): + return "custom_aliases_043" + + @property + def models(self): + return "test/integration/043_custom_aliases_test/models" + + @property + def project_config(self): + return { + "macro-paths": ['test/integration/043_custom_aliases_test/macros'], + } + + @use_profile('postgres') + def test_postgres_customer_alias_name(self): + results = self.run_dbt(['run']) + self.assertEqual(len(results), 2) + self.run_dbt(['test']) diff --git a/test/integration/044_run_operations_test/macros/happy_macros.sql b/test/integration/044_run_operations_test/macros/happy_macros.sql new file mode 100644 index 00000000000..6170ebc7657 --- /dev/null +++ b/test/integration/044_run_operations_test/macros/happy_macros.sql @@ -0,0 +1,24 @@ +{% macro no_args() %} + {% if execute %} + {% call statement(auto_begin=True) %} + create table "{{ schema }}"."no_args" (id int); + commit; + {% endcall %} + {% endif %} +{% endmacro %} + + +{% macro table_name_args(table_name) %} + {% if execute %} + {% call statement(auto_begin=True) %} + create table "{{ schema }}"."{{ table_name }}" (id int); + commit; + {% endcall %} + {% endif %} +{% endmacro %} + +{% macro vacuum(table_name) %} + {% call statement(auto_begin=false) %} + vacuum "{{ schema }}"."{{ table_name }}" + {% endcall %} +{% endmacro %} diff --git a/test/integration/044_run_operations_test/macros/sad_macros.sql b/test/integration/044_run_operations_test/macros/sad_macros.sql new file mode 100644 index 00000000000..4f2c80bc40f --- /dev/null +++ b/test/integration/044_run_operations_test/macros/sad_macros.sql @@ -0,0 +1,7 @@ +{% macro syntax_error() %} + {% if execute %} + {% call statement() %} + select NOPE NOT A VALID QUERY + {% endcall %} + {% endif %} +{% endmacro %} diff --git a/test/integration/044_run_operations_test/models/model.sql b/test/integration/044_run_operations_test/models/model.sql new file mode 100644 index 00000000000..43258a71464 --- /dev/null +++ b/test/integration/044_run_operations_test/models/model.sql @@ -0,0 +1 @@ +select 1 as id diff --git a/test/integration/044_run_operations_test/test_run_operations.py b/test/integration/044_run_operations_test/test_run_operations.py new file mode 100644 index 00000000000..c66de6d8af5 --- /dev/null +++ b/test/integration/044_run_operations_test/test_run_operations.py @@ -0,0 +1,58 @@ +from test.integration.base import DBTIntegrationTest, use_profile +import yaml + + +class TestOperations(DBTIntegrationTest): + @property + def schema(self): + return "run_operations_044" + + @property + def models(self): + return "test/integration/044_run_operations_test/models" + + @property + def project_config(self): + return { + "macro-paths": ['test/integration/044_run_operations_test/macros'], + } + + def run_operation(self, macro, expect_pass=True, extra_args=None, **kwargs): + args = ['run-operation'] + if macro: + args.extend(('--macro', macro)) + if kwargs: + args.extend(('--args', yaml.safe_dump(kwargs))) + if extra_args: + args.extend(extra_args) + return self.run_dbt(args, expect_pass=expect_pass) + + @use_profile('postgres') + def test__postgres_macro_noargs(self): + self.run_operation('no_args') + self.assertTableDoesExist('no_args') + + @use_profile('postgres') + def test__postgres_macro_args(self): + self.run_operation('table_name_args', table_name='my_fancy_table') + self.assertTableDoesExist('my_fancy_table') + + @use_profile('postgres') + def test__postgres_macro_exception(self): + self.run_operation('syntax_error', False) + + @use_profile('postgres') + def test__postgres_macro_missing(self): + self.run_operation('this_macro_does_not_exist', False) + + @use_profile('postgres') + def test__postgres_cannot_connect(self): + self.run_operation('no_args', + extra_args=['--target', 'noaccess'], + expect_pass=False) + + @use_profile('postgres') + def test__postgres_vacuum(self): + self.run_dbt(['run']) + # this should succeed + self.run_operation('vacuum', table_name='model') diff --git a/test/integration/base.py b/test/integration/base.py index 8f0c7838eae..e1c77954b37 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -1,6 +1,7 @@ import unittest import dbt.main as dbt -import os, shutil +import os +import shutil import yaml import random import time @@ -8,7 +9,8 @@ from datetime import datetime from functools import wraps -from nose.plugins.attrib import attr +import pytest +from mock import patch import dbt.flags as flags @@ -16,6 +18,9 @@ from dbt.clients.jinja import template_cache from dbt.config import RuntimeConfig from dbt.compat import basestring +from dbt.context import common + +from contextlib import contextmanager from dbt.logger import GLOBAL_LOGGER as logger import logging @@ -71,6 +76,12 @@ class DBTIntegrationTest(unittest.TestCase): prefix = "test{}{:04}".format(int(time.time()), random.randint(0, 9999)) setup_alternate_db = False + @property + def database_host(self): + if os.name == 'nt': + return 'localhost' + return 'database' + def postgres_profile(self): return { 'config': { @@ -81,7 +92,7 @@ def postgres_profile(self): 'default2': { 'type': 'postgres', 'threads': 4, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'root', 'pass': 'password', @@ -91,7 +102,7 @@ def postgres_profile(self): 'noaccess': { 'type': 'postgres', 'threads': 4, - 'host': 'database', + 'host': self.database_host, 'port': 5432, 'user': 'noaccess', 'pass': 'password', @@ -366,7 +377,7 @@ def _get_schema_fqn(self, database, schema): def _create_schema_named(self, database, schema): if self.adapter_type == 'bigquery': - self.adapter.create_schema(database, schema, '__test') + self.adapter.create_schema(database, schema) else: schema_fqn = self._get_schema_fqn(database, schema) self.run_sql(self.CREATE_SCHEMA_STATEMENT.format(schema_fqn)) @@ -375,7 +386,7 @@ def _create_schema_named(self, database, schema): def _drop_schema_named(self, database, schema): if self.adapter_type == 'bigquery' or self.adapter_type == 'presto': self.adapter.drop_schema( - database, schema, '__test' + database, schema ) else: schema_fqn = self._get_schema_fqn(database, schema) @@ -383,9 +394,10 @@ def _drop_schema_named(self, database, schema): def _create_schemas(self): schema = self.unique_schema() - self._create_schema_named(self.default_database, schema) - if self.setup_alternate_db and self.adapter_type == 'snowflake': - self._create_schema_named(self.alternative_database, schema) + with self.adapter.connection_named('__test'): + self._create_schema_named(self.default_database, schema) + if self.setup_alternate_db and self.adapter_type == 'snowflake': + self._create_schema_named(self.alternative_database, schema) def _drop_schemas_adapter(self): schema = self.unique_schema() @@ -415,10 +427,11 @@ def _drop_schemas_sql(self): self._created_schemas.clear() def _drop_schemas(self): - if self.adapter_type == 'bigquery' or self.adapter_type == 'presto': - self._drop_schemas_adapter() - else: - self._drop_schemas_sql() + with self.adapter.connection_named('__test'): + if self.adapter_type == 'bigquery' or self.adapter_type == 'presto': + self._drop_schemas_adapter() + else: + self._drop_schemas_sql() @property def project_config(self): @@ -428,16 +441,25 @@ def project_config(self): def profile_config(self): return {} - def run_dbt(self, args=None, expect_pass=True, strict=True): + def run_dbt(self, args=None, expect_pass=True, strict=True, parser=True): if args is None: args = ["run"] + final_args = [] + if strict: - args = ["--strict"] + args - args.append('--log-cache-events') - logger.info("Invoking dbt with {}".format(args)) + final_args.append('--strict') + if parser: + final_args.append('--test-new-parser') + if os.getenv('DBT_TEST_SINGLE_THREADED') in ('y', 'Y', '1'): + final_args.append('--single-threaded') + + final_args.extend(args) + final_args.append('--log-cache-events') - res, success = dbt.handle_and_check(args) + logger.info("Invoking dbt with {}".format(final_args)) + + res, success = dbt.handle_and_check(final_args) self.assertEqual( success, expect_pass, "dbt exit state did not match expected") @@ -491,8 +513,7 @@ def run_sql_bigquery(self, sql, fetch): else: return list(res) - def run_sql_presto(self, sql, fetch, connection_name=None): - conn = self.adapter.acquire_connection(connection_name) + def run_sql_presto(self, sql, fetch, conn): cursor = conn.handle.cursor() try: cursor.execute(sql) @@ -513,6 +534,24 @@ def run_sql_presto(self, sql, fetch, connection_name=None): conn.handle.commit() conn.transaction_open = False + def run_sql_common(self, sql, fetch, conn): + with conn.handle.cursor() as cursor: + try: + cursor.execute(sql) + conn.handle.commit() + if fetch == 'one': + return cursor.fetchone() + elif fetch == 'all': + return cursor.fetchall() + else: + return + except BaseException as e: + conn.handle.rollback() + print(sql) + print(e) + raise e + finally: + conn.transaction_open = False def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): if connection_name is None: @@ -522,30 +561,15 @@ def run_sql(self, query, fetch='None', kwargs=None, connection_name=None): return sql = self.transform_sql(query, kwargs=kwargs) - if self.adapter_type == 'bigquery': - return self.run_sql_bigquery(sql, fetch) - elif self.adapter_type == 'presto': - return self.run_sql_presto(sql, fetch, connection_name) - - conn = self.adapter.acquire_connection(connection_name) - with conn.handle.cursor() as cursor: - logger.debug('test connection "{}" executing: {}'.format(connection_name, sql)) - try: - cursor.execute(sql) - conn.handle.commit() - if fetch == 'one': - return cursor.fetchone() - elif fetch == 'all': - return cursor.fetchall() - else: - return - except BaseException as e: - conn.handle.rollback() - print(query) - print(e) - raise e - finally: - conn.transaction_open = False + + with self.test_connection(connection_name) as conn: + logger.debug('test connection "{}" executing: {}'.format(conn.name, sql)) + if self.adapter_type == 'bigquery': + return self.run_sql_bigquery(sql, fetch) + elif self.adapter_type == 'presto': + return self.run_sql_presto(sql, fetch, conn) + else: + return self.run_sql_common(sql, fetch, conn) def _ilike(self, target, value): # presto has this regex substitution monstrosity instead of 'ilike' @@ -606,11 +630,23 @@ def filter_many_columns(self, column): char_size = 16777216 return (table_name, column_name, data_type, char_size) + @contextmanager + def test_connection(self, name=None): + """Create a test connection context where all executed macros, etc will + get self.adapter as the adapter. + + This allows tests to run normal adapter macros as if reset_adapters() + were not called by handle_and_check (for asserts, etc) + """ + if name is None: + name = '__test' + with patch.object(common, 'get_adapter', return_value=self.adapter): + with self.adapter.connection_named(name) as conn: + yield conn + def get_relation_columns(self, relation): - columns = self.adapter.get_columns_in_relation( - relation, - model_name='__test' - ) + with self.test_connection(): + columns = self.adapter.get_columns_in_relation(relation) return sorted(((c.name, c.dtype, c.char_size) for c in columns), key=lambda x: x[0]) @@ -775,7 +811,8 @@ def assertManyRelationsEqual(self, relations, default_schema=None, default_datab specs.append(relation) - column_specs = self.get_many_relation_columns(specs) + with self.test_connection(): + column_specs = self.get_many_relation_columns(specs) # make sure everyone has equal column definitions first_columns = None @@ -961,7 +998,7 @@ def test_snowflake_thing(self): self.assertEqual(self.adapter_type, 'snowflake') """ def outer(wrapped): - @attr(type=profile_name) + @getattr(pytest.mark, 'profile_'+profile_name) @wraps(wrapped) def func(self, *args, **kwargs): return wrapped(self, *args, **kwargs) diff --git a/test/unit/test_schema.py b/test/unit/test_base_column.py similarity index 87% rename from test/unit/test_schema.py rename to test/unit/test_base_column.py index ec411568331..e48917af839 100644 --- a/test/unit/test_schema.py +++ b/test/unit/test_base_column.py @@ -1,12 +1,13 @@ import unittest import decimal -import dbt.schema + +from dbt.adapters.base import Column class TestStringType(unittest.TestCase): def test__character_type(self): - col = dbt.schema.Column( + col = Column( 'fieldname', 'character', char_size=10 @@ -18,7 +19,7 @@ def test__character_type(self): class TestNumericType(unittest.TestCase): def test__numeric_type(self): - col = dbt.schema.Column( + col = Column( 'fieldname', 'numeric', numeric_precision=decimal.Decimal('12'), @@ -28,7 +29,7 @@ def test__numeric_type(self): def test__numeric_type_with_no_precision(self): # PostgreSQL, at least, will allow empty numeric precision - col = dbt.schema.Column( + col = Column( 'fieldname', 'numeric', numeric_precision=None) diff --git a/test/unit/test_bigquery_adapter.py b/test/unit/test_bigquery_adapter.py index b667cf38a6f..f982d40833f 100644 --- a/test/unit/test_bigquery_adapter.py +++ b/test/unit/test_bigquery_adapter.py @@ -69,7 +69,7 @@ def get_adapter(self, target): profile=profile, ) adapter = BigQueryAdapter(config) - inject_adapter('bigquery', adapter) + inject_adapter(adapter) return adapter @@ -109,14 +109,14 @@ def test_cancel_open_connections_empty(self): def test_cancel_open_connections_master(self): adapter = self.get_adapter('oauth') - adapter.connections.in_use['master'] = object() + adapter.connections.thread_connections[0] = object() self.assertEqual(adapter.cancel_open_connections(), None) def test_cancel_open_connections_single(self): adapter = self.get_adapter('oauth') - adapter.connections.in_use.update({ - 'master': object(), - 'model': object(), + adapter.connections.thread_connections.update({ + 0: object(), + 1: object(), }) # actually does nothing self.assertEqual(adapter.cancel_open_connections(), None) diff --git a/test/unit/test_config.py b/test/unit/test_config.py index 1163468a535..ce3971a89c4 100644 --- a/test/unit/test_config.py +++ b/test/unit/test_config.py @@ -42,7 +42,6 @@ def temp_cd(path): 'sort': 'timestamp', 'materialized': 'incremental', 'dist': 'user_id', - 'sql_where': 'created_at > (select max(created_at) from {{ this }})', 'unique_key': 'id' }, 'base': { @@ -357,7 +356,6 @@ def from_args(self, project_profile_name='default', **kwargs): kw = { 'args': self.args, 'project_profile_name': project_profile_name, - 'cli_vars': {}, } kw.update(kwargs) return dbt.config.Profile.from_args(**kw) @@ -484,7 +482,7 @@ def test_cli_and_env_vars(self): self.args.target = 'cli-and-env-vars' self.args.vars = '{"cli_value_host": "cli-postgres-host"}' with mock.patch.dict(os.environ, self.env_override): - profile = self.from_args(cli_vars=None) + profile = self.from_args() from_raw = self.from_raw_profile_info( target_override='cli-and-env-vars', cli_vars={'cli_value_host': 'cli-postgres-host'}, @@ -756,7 +754,7 @@ def test_no_project(self): def test_invalid_version(self): self.default_project_data['require-dbt-version'] = 'hello!' - with self.assertRaises(dbt.exceptions.DbtProjectError) as exc: + with self.assertRaises(dbt.exceptions.DbtProjectError): dbt.config.Project.from_project_config(self.default_project_data) def test_unsupported_version(self): @@ -800,16 +798,18 @@ def test__get_unused_resource_config_paths_empty(self): ))}, []) self.assertEqual(len(unused), 0) - @mock.patch.object(dbt.config.project, 'logger') - def test__warn_for_unused_resource_config_paths_empty(self, mock_logger): + def test__warn_for_unused_resource_config_paths_empty(self): project = dbt.config.Project.from_project_config( self.default_project_data ) - unused = project.warn_for_unused_resource_config_paths({'models': frozenset(( - ('my_test_project', 'foo', 'bar'), - ('my_test_project', 'foo', 'baz'), - ))}, []) - mock_logger.info.assert_not_called() + dbt.flags.WARN_ERROR = True + try: + unused = project.warn_for_unused_resource_config_paths({'models': frozenset(( + ('my_test_project', 'foo', 'bar'), + ('my_test_project', 'foo', 'baz'), + ))}, []) + finally: + dbt.flags.WARN_ERROR = False def test_none_values(self): self.default_project_data.update({ @@ -892,8 +892,7 @@ def test__warn_for_unused_resource_config_paths(self, warn_or_error): unused = project.warn_for_unused_resource_config_paths(self.used, []) warn_or_error.assert_called_once() - @mock.patch.object(dbt.config.project, 'logger') - def test__warn_for_unused_resource_config_paths_disabled(self, mock_logger): + def test__warn_for_unused_resource_config_paths_disabled(self): project = dbt.config.Project.from_project_config( self.default_project_data ) @@ -923,7 +922,7 @@ def test_from_project_root(self): def test_with_invalid_package(self): self.write_packages({'invalid': ['not a package of any kind']}) - with self.assertRaises(dbt.exceptions.DbtProjectError) as exc: + with self.assertRaises(dbt.exceptions.DbtProjectError): dbt.config.Project.from_project_root(self.project_dir, {}) @@ -971,8 +970,7 @@ def from_parts(self, exc=None): if exc is None: return dbt.config.RuntimeConfig.from_parts(project, profile, self.args) - with self.assertRaises(exc) as raised: - err = raised + with self.assertRaises(exc) as err: dbt.config.RuntimeConfig.from_parts(project, profile, self.args) return err diff --git a/test/unit/test_deps.py b/test/unit/test_deps.py index 1e18856b7f0..06eb163e38d 100644 --- a/test/unit/test_deps.py +++ b/test/unit/test_deps.py @@ -151,8 +151,7 @@ def test_resolve_missing_package(self): package='fishtown-analytics-test/a', version='0.1.2' ) - with self.assertRaises(dbt.exceptions.DependencyException) as e: - exc = e + with self.assertRaises(dbt.exceptions.DependencyException) as exc: a.resolve_version() msg = 'Package fishtown-analytics-test/a was not found in the package index' @@ -169,8 +168,7 @@ def test_resolve_missing_version(self): package='fishtown-analytics-test/a', version='0.1.2' ) - with self.assertRaises(dbt.exceptions.DependencyException) as e: - exc = e + with self.assertRaises(dbt.exceptions.DependencyException) as exc: a.resolve_version() msg = ( "Could not find a matching version for package " @@ -195,8 +193,7 @@ def test_resolve_conflict(self): version='0.1.3' ) c = a.incorporate(b) - with self.assertRaises(dbt.exceptions.DependencyException) as e: - exc = e + with self.assertRaises(dbt.exceptions.DependencyException) as exc: c.resolve_version() msg = ( "Version error for package fishtown-analytics-test/a: Could not " diff --git a/test/unit/test_docs_blocks.py b/test/unit/test_docs_blocks.py index 17fd0e9fd38..104ae251af4 100644 --- a/test/unit/test_docs_blocks.py +++ b/test/unit/test_docs_blocks.py @@ -1,3 +1,4 @@ +import os import mock import unittest @@ -54,6 +55,15 @@ class DocumentationParserTest(unittest.TestCase): def setUp(self): + if os.name == 'nt': + self.root_path = 'C:\\test_root' + self.subdir_path = 'C:\\test_root\\test_subdir' + self.testfile_path = 'C:\\test_root\\test_subdir\\test_file.md' + else: + self.root_path = '/test_root' + self.subdir_path = '/test_root/test_subdir' + self.testfile_path = '/test_root/test_subdir/test_file.md' + profile_data = { 'outputs': { 'test': { @@ -72,14 +82,14 @@ def setUp(self): 'name': 'root', 'version': '0.1', 'profile': 'test', - 'project-root': '/test_root', + 'project-root': self.root_path, } subdir_project = { 'name': 'some_package', 'version': '0.1', 'profile': 'test', - 'project-root': '/test_root/test_subdir', + 'project-root': self.subdir_path, 'quoting': {}, } self.root_project_config = config_from_parts_or_dicts( @@ -88,35 +98,33 @@ def setUp(self): self.subdir_project_config = config_from_parts_or_dicts( project=subdir_project, profile=profile_data ) - - @mock.patch('dbt.clients.system') def test_load_file(self, system): system.load_file_contents.return_value = TEST_DOCUMENTATION_FILE system.find_matching.return_value = [{ 'relative_path': 'test_file.md', - 'absolute_path': '/test_root/test_subdir/test_file.md', - 'searched_path': '/test_root/test_subdir', + 'absolute_path': self.testfile_path, + 'searched_path': self.subdir_path, }] results = list(docs.DocumentationParser.load_file( - 'some_package', '/test_root', ['test_subdir']) + 'some_package', self.root_path, ['test_subdir']) ) self.assertEqual(len(results), 1) result = results[0] self.assertEqual(result.package_name, 'some_package') self.assertEqual(result.file_contents, TEST_DOCUMENTATION_FILE) self.assertEqual(result.original_file_path, - '/test_root/test_subdir/test_file.md') - self.assertEqual(result.root_path, '/test_root') + self.testfile_path) + self.assertEqual(result.root_path, self.root_path) self.assertEqual(result.resource_type, NodeType.Documentation) self.assertEqual(result.path, 'test_file.md') def test_parse(self): docfile = UnparsedDocumentationFile( - root_path='/test_root', + root_path=self.root_path, resource_type=NodeType.Documentation, path='test_file.md', - original_file_path='/test_root/test_subdir/test_file.md', + original_file_path=self.testfile_path, package_name='some_package', file_contents=TEST_DOCUMENTATION_FILE ) diff --git a/test/unit/test_graph.py b/test/unit/test_graph.py index 88f3f15e694..5d34c3d97d5 100644 --- a/test/unit/test_graph.py +++ b/test/unit/test_graph.py @@ -32,6 +32,7 @@ def tearDown(self): self.load_projects_patcher.stop() self.find_matching_patcher.stop() self.load_file_contents_patcher.stop() + self.get_adapter_patcher.stop() def setUp(self): dbt.flags.STRICT_MODE = True @@ -41,6 +42,8 @@ def setUp(self): self.load_projects_patcher = patch('dbt.loader._load_projects') self.find_matching_patcher = patch('dbt.clients.system.find_matching') self.load_file_contents_patcher = patch('dbt.clients.system.load_file_contents') + self.get_adapter_patcher = patch('dbt.context.parser.get_adapter') + self.factory = self.get_adapter_patcher.start() def mock_write_gpickle(graph, outfile): self.graph_result = graph @@ -52,7 +55,7 @@ def mock_write_gpickle(graph, outfile): 'test': { 'type': 'postgres', 'threads': 4, - 'host': 'database', + 'host': 'thishostshouldnotexist', 'port': 5432, 'user': 'root', 'pass': 'password', @@ -205,7 +208,6 @@ def test__model_incremental(self): "test_models_compile": { "model_one": { "materialized": "incremental", - "sql_where": "created_at", "unique_key": "id" }, } diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 21abd573b5b..5bcab016666 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -1,6 +1,9 @@ import unittest from dbt.clients.jinja import get_template +from dbt.clients.jinja import extract_toplevel_blocks +from dbt.exceptions import CompilationException + class TestJinja(unittest.TestCase): def test_do(self): @@ -9,3 +12,316 @@ def test_do(self): template = get_template(s, {}) mod = template.make_module() self.assertEqual(mod.my_dict, {'a': 1}) + + +class TestBlockLexer(unittest.TestCase): + def test_basic(self): + body = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' + block_data = ' \n\r\t{%- mytype foo %}'+body+'{%endmytype -%}' + blocks = extract_toplevel_blocks(block_data) + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'mytype') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].contents, body) + self.assertEqual(blocks[0].full_block, block_data) + + def test_multiple(self): + body_one = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' + body_two = ( + '{{ config(bar=1)}}\r\nselect * from {% if foo %} thing ' + '{% else %} other_thing {% endif %}' + ) + + block_data = ( + ' {% mytype foo %}' + body_one + '{% endmytype %}' + + '\r\n{% othertype bar %}' + body_two + '{% endothertype %}' + ) + all_blocks = extract_toplevel_blocks(block_data) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + + def test_comments(self): + body = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' + comment = '{# my comment #}' + block_data = ' \n\r\t{%- mytype foo %}'+body+'{%endmytype -%}' + all_blocks = extract_toplevel_blocks(comment+block_data) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'mytype') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].contents, body) + self.assertEqual(blocks[0].full_block, block_data) + + def test_evil_comments(self): + body = '{{ config(foo="bar") }}\r\nselect * from this.that\r\n' + comment = '{# external comment {% othertype bar %} select * from thing.other_thing{% endothertype %} #}' + block_data = ' \n\r\t{%- mytype foo %}'+body+'{%endmytype -%}' + all_blocks = extract_toplevel_blocks(comment+block_data) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'mytype') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].contents, body) + self.assertEqual(blocks[0].full_block, block_data) + + def test_nested_comments(self): + body = '{# my comment #} {{ config(foo="bar") }}\r\nselect * from {# my other comment embedding {% endmytype %} #} this.that\r\n' + block_data = ' \n\r\t{%- mytype foo %}'+body+'{% endmytype -%}' + comment = '{# external comment {% othertype bar %} select * from thing.other_thing{% endothertype %} #}' + all_blocks = extract_toplevel_blocks(comment+block_data) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'mytype') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].contents, body) + self.assertEqual(blocks[0].full_block, block_data) + + def test_complex_file(self): + all_blocks = extract_toplevel_blocks(complex_archive_file) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 3) + self.assertEqual(blocks[0].block_type_name, 'mytype') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].full_block, '{% mytype foo %} some stuff {% endmytype %}') + self.assertEqual(blocks[0].contents, ' some stuff ') + self.assertEqual(blocks[1].block_type_name, 'mytype') + self.assertEqual(blocks[1].block_name, 'bar') + self.assertEqual(blocks[1].full_block, bar_block) + self.assertEqual(blocks[1].contents, bar_block[16:-15].rstrip()) + self.assertEqual(blocks[2].block_type_name, 'myothertype') + self.assertEqual(blocks[2].block_name, 'x') + self.assertEqual(blocks[2].full_block, x_block.strip()) + self.assertEqual(blocks[2].contents, x_block[len('\n{% myothertype x %}'):-len('{% endmyothertype %}\n')]) + + def test_peaceful_macro_coexistence(self): + body = '{# my macro #} {% macro foo(a, b) %} do a thing {%- endmacro %} {# my model #} {% a b %} {% enda %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].block_type_name, 'macro') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].contents, ' do a thing') + self.assertEqual(blocks[1].block_type_name, 'a') + self.assertEqual(blocks[1].block_name, 'b') + self.assertEqual(blocks[1].contents, ' ') + + def test_macro_with_crazy_args(self): + body = '''{% macro foo(a, b=asdf("cool this is 'embedded'" * 3) + external_var, c)%}cool{# block comment with {% endmacro %} in it #} stuff here {% endmacro %}''' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'macro') + self.assertEqual(blocks[0].block_name, 'foo') + self.assertEqual(blocks[0].contents, 'cool{# block comment with {% endmacro %} in it #} stuff here ') + + def test_materialization_parse(self): + body = '{% materialization xxx, default %} ... {% endmaterialization %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'materialization') + self.assertEqual(blocks[0].block_name, 'xxx') + self.assertEqual(blocks[0].full_block, body) + + body = '{% materialization xxx, adapter="other" %} ... {% endmaterialization %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'materialization') + self.assertEqual(blocks[0].block_name, 'xxx') + self.assertEqual(blocks[0].full_block, body) + + def test_nested_failure(self): + # we don't allow nesting same blocks + # ideally we would not allow nesting any, but that's much harder + body = '{% myblock a %} {% myblock b %} {% endmyblock %} {% endmyblock %}' + with self.assertRaises(CompilationException): + extract_toplevel_blocks(body) + + def test_incomplete_block_failure(self): + fullbody = '{% myblock foo %} {% endblock %}' + for length in range(1, len(fullbody)-1): + body = fullbody[:length] + with self.assertRaises(CompilationException): + extract_toplevel_blocks(body) + + def test_wrong_end_failure(self): + body = '{% myblock foo %} {% endotherblock %}' + with self.assertRaises(CompilationException): + extract_toplevel_blocks(body) + + def test_comment_no_end_failure(self): + body = '{# ' + with self.assertRaises(CompilationException): + extract_toplevel_blocks(body) + + def test_comment_only(self): + body = '{# myblock #}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 0) + + def test_comment_block_self_closing(self): + # test the case where a comment start looks a lot like it closes itself + # (but it doesn't in jinja!) + body = '{#} {% myblock foo %} {#}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 0) + + def test_embedded_self_closing_comment_block(self): + body = '{% myblock foo %} {#}{% endmyblock %} {#}{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].full_block, body) + self.assertEqual(blocks[0].contents, ' {#}{% endmyblock %} {#}') + + def test_set_statement(self): + body = '{% set x = 1 %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].full_block, '{% set x = 1 %}') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_set_block(self): + body = '{% set x %}1{% endset %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].contents, '1') + self.assertEqual(blocks[0].block_type_name, 'set') + self.assertEqual(blocks[0].block_name, 'x') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_crazy_set_statement(self): + body = '{% set x = (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% set y = otherthing("{% myblock foo %}") %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 3) + self.assertEqual(blocks[0].full_block, '{% set x = (thing("{% myblock foo %}")) %}') + self.assertEqual(blocks[0].block_type_name, 'set') + self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}') + self.assertEqual(blocks[1].block_type_name, 'otherblock') + self.assertEqual(blocks[2].full_block, '{% set y = otherthing("{% myblock foo %}") %}') + self.assertEqual(blocks[2].block_type_name, 'set') + + def test_do_statement(self): + body = '{% do thing.update() %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].full_block, '{% do thing.update() %}') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_deceptive_do_statement(self): + body = '{% do thing %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].full_block, '{% do thing %}') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_do_block(self): + body = '{% do %}thing.update(){% enddo %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].contents, 'thing.update()') + self.assertEqual(blocks[0].block_type_name, 'do') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_crazy_do_statement(self): + body = '{% do (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% do otherthing("{% myblock foo %}") %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 3) + self.assertEqual(blocks[0].full_block, '{% do (thing("{% myblock foo %}")) %}') + self.assertEqual(blocks[0].block_type_name, 'do') + self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}') + self.assertEqual(blocks[1].block_type_name, 'otherblock') + self.assertEqual(blocks[2].full_block, '{% do otherthing("{% myblock foo %}") %}') + self.assertEqual(blocks[2].block_type_name, 'do') + + def test_awful_jinja(self): + all_blocks = extract_toplevel_blocks(if_you_do_this_you_are_awful) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 4) + self.assertEqual(blocks[0].block_type_name, 'do') + self.assertEqual(blocks[0].full_block, '''{% do\n set('foo="bar"')\n%}''') + self.assertEqual(blocks[1].block_type_name, 'set') + self.assertEqual(blocks[1].full_block, '''{% set x = ("100" + "hello'" + '%}') %}''') + self.assertEqual(blocks[2].block_type_name, 'archive') + self.assertEqual(blocks[2].contents, '\n '.join([ + '''{% set x = ("{% endarchive %}" + (40 * '%})')) %}''', + '{# {% endarchive %} #}', + '{% embedded %}', + ' some block data right here', + '{% endembedded %}' + ])) + self.assertEqual(blocks[3].block_type_name, 'materialization') + self.assertEqual(blocks[3].contents, '\nhi\n') + + def test_quoted_endblock_within_block(self): + body = '{% myblock something -%} {% set x = ("{% endmyblock %}") %} {% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'myblock') + self.assertEqual(blocks[0].contents, '{% set x = ("{% endmyblock %}") %} ') + +bar_block = '''{% mytype bar %} +{# a comment + that inside it has + {% mytype baz %} +{% endmyothertype %} +{% endmytype %} +{% endmytype %} + {# +{% endmytype %}#} + +some other stuff + +{%- endmytype%}''' + +x_block = ''' +{% myothertype x %} +before +{##} +and after +{% endmyothertype %} +''' + +complex_archive_file = ''' +{#some stuff {% mytype foo %} #} +{% mytype foo %} some stuff {% endmytype %} + +'''+bar_block+x_block + + +if_you_do_this_you_are_awful = ''' +{#} here is a comment with a block inside {% block x %} asdf {% endblock %} {#} +{% do + set('foo="bar"') +%} +{% set x = ("100" + "hello'" + '%}') %} +{% archive something -%} + {% set x = ("{% endarchive %}" + (40 * '%})')) %} + {# {% endarchive %} #} + {% embedded %} + some block data right here + {% endembedded %} +{%- endarchive %} + +{% raw %} + {% set x = SYNTAX ERROR} +{% endraw %} + + +{% materialization whatever, adapter='thing' %} +hi +{% endmaterialization %} +''' + + diff --git a/test/unit/test_parser.py b/test/unit/test_parser.py index 7cce0ca75e4..854d607dd4e 100644 --- a/test/unit/test_parser.py +++ b/test/unit/test_parser.py @@ -54,7 +54,6 @@ def setUp(self): 'project-root': os.path.abspath('.'), } - self.root_project_config = config_from_parts_or_dicts( project=root_project, profile=profile_data, @@ -76,8 +75,11 @@ def setUp(self): 'root': self.root_project_config, 'snowplow': self.snowplow_project_config } + self.patcher = mock.patch('dbt.context.parser.get_adapter') + self.factory = self.patcher.start() - + def tearDown(self): + self.patcher.stop() class SourceConfigTest(BaseParserTest): def test__source_config_single_call(self): @@ -206,7 +208,11 @@ def setUp(self): database='test', schema='foo', identifier='bar', - resource_type='source' + resource_type='source', + quoting={ + 'schema': True, + 'identifier': False, + } ) self._expected_source_tests = [ @@ -468,6 +474,9 @@ def test__source_schema(self): - name: my_source loader: some_loader description: my source description + quoting: + schema: True + identifier: True freshness: warn_after: count: 10 @@ -476,7 +485,7 @@ def test__source_schema(self): count: 20 period: hour loaded_at_field: something - schema: foo + schema: '{{ var("test_schema_name") }}' tables: - name: my_table description: "my table description" @@ -485,6 +494,8 @@ def test__source_schema(self): warn_after: count: 7 period: hour + quoting: + identifier: False columns: - name: id description: user ID @@ -586,6 +597,8 @@ def test__model_schema(self): def test__mixed_schema(self): test_yml = yaml.safe_load(''' version: 2 + quoting: + database: True models: - name: model_one description: blah blah @@ -609,6 +622,9 @@ def test__mixed_schema(self): - name: my_source loader: some_loader description: my source description + quoting: + schema: True + identifier: True freshness: warn_after: count: 10 @@ -626,6 +642,8 @@ def test__mixed_schema(self): warn_after: count: 7 period: hour + quoting: + identifier: False columns: - name: id description: user ID @@ -681,6 +699,9 @@ def test__source_schema_invalid_test_strict(self): - name: my_source loader: some_loader description: my source description + quoting: + schema: True + identifier: True freshness: warn_after: count: 10 @@ -698,6 +719,8 @@ def test__source_schema_invalid_test_strict(self): warn_after: count: 7 period: hour + quoting: + identifier: False columns: - name: id description: user ID @@ -738,6 +761,9 @@ def test__source_schema_invalid_test_not_strict(self): - name: my_source loader: some_loader description: my source description + quoting: + schema: True + identifier: True freshness: warn_after: count: 10 @@ -755,6 +781,8 @@ def test__source_schema_invalid_test_not_strict(self): warn_after: count: 7 period: hour + quoting: + identifier: False columns: - name: id description: user ID @@ -823,8 +851,8 @@ def test__schema_v2_as_v1(self, mock_logger, find_schema_yml): parser.load_and_parse( 'test', root_dir, relative_dirs ) - self.assertIn('https://docs.getdbt.com/v0.11/docs/schemayml-files', - str(cm.exception)) + self.assertIn('https://docs.getdbt.com/docs/schemayml-files', + str(cm.exception)) @mock.patch.object(SchemaParser, 'find_schema_yml') @mock.patch.object(dbt.parser.schemas, 'logger') @@ -846,8 +874,8 @@ def test__schema_v1_version_model(self, mock_logger, find_schema_yml): parser.load_and_parse( 'test', root_dir, relative_dirs ) - self.assertIn('https://docs.getdbt.com/v0.11/docs/schemayml-files', - str(cm.exception)) + self.assertIn('https://docs.getdbt.com/docs/schemayml-files', + str(cm.exception)) @mock.patch.object(SchemaParser, 'find_schema_yml') @mock.patch.object(dbt.parser.schemas, 'logger') @@ -869,11 +897,15 @@ def test__schema_v1_version_1(self, mock_logger, find_schema_yml): parser.load_and_parse( 'test', root_dir, relative_dirs ) - self.assertIn('https://docs.getdbt.com/v0.11/docs/schemayml-files', - str(cm.exception)) + self.assertIn('https://docs.getdbt.com/docs/schemayml-files', + str(cm.exception)) class ParserTest(BaseParserTest): + def _assert_parsed_sql_nodes(self, parse_result, parsed, disabled): + self.assertEqual(parse_result.parsed, parsed) + self.assertEqual(parse_result.disabled, disabled) + def find_input_by_name(self, models, name): return next( @@ -925,9 +957,9 @@ def test__single_model(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -954,7 +986,8 @@ def test__single_model(self): description='', columns={} ) - }, []) + }, + [] ) def test__single_model__nested_configuration(self): @@ -989,9 +1022,9 @@ def test__single_model__nested_configuration(self): self.all_projects, self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -1018,7 +1051,8 @@ def test__single_model__nested_configuration(self): description='', columns={} ) - }, []) + }, + [] ) def test__empty_model(self): @@ -1039,9 +1073,9 @@ def test__empty_model(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -1068,7 +1102,8 @@ def test__empty_model(self): description='', columns={} ) - }, []) + }, + [] ) def test__simple_dependency(self): @@ -1096,9 +1131,9 @@ def test__simple_dependency(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.base': ParsedNode( alias='base', name='base', @@ -1152,7 +1187,8 @@ def test__simple_dependency(self): description='', columns={} ) - }, []) + }, + [] ) def test__multiple_dependencies(self): @@ -1208,9 +1244,9 @@ def test__multiple_dependencies(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.events': ParsedNode( alias='events', name='events', @@ -1341,7 +1377,8 @@ def test__multiple_dependencies(self): description='', columns={} ), - }, []) + }, + [] ) def test__multiple_dependencies__packages(self): @@ -1399,9 +1436,9 @@ def test__multiple_dependencies__packages(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.snowplow.events': ParsedNode( alias='events', name='events', @@ -1517,7 +1554,7 @@ def test__multiple_dependencies__packages(self): empty=False, package_name='root', refs=[['snowplow', 'sessions_tx'], - ['snowplow', 'events_tx']], + ['snowplow', 'events_tx']], sources=[], depends_on={ 'nodes': [], @@ -1533,7 +1570,8 @@ def test__multiple_dependencies__packages(self): description='', columns={} ), - }, []) + }, + [] ) def test__process_refs__packages(self): @@ -1730,9 +1768,9 @@ def test__in_model_config(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -1759,7 +1797,8 @@ def test__in_model_config(self): description='', columns={} ) - }, []) + }, + [] ) def test__root_project_config(self): @@ -1819,9 +1858,9 @@ def test__root_project_config(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.table': ParsedNode( alias='table', name='table', @@ -1900,7 +1939,8 @@ def test__root_project_config(self): description='', columns={} ), - }, []) + }, + [] ) def test__other_project_config(self): @@ -2027,9 +2067,9 @@ def test__other_project_config(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + parsed={ 'model.root.table': ParsedNode( alias='table', name='table', @@ -2135,7 +2175,7 @@ def test__other_project_config(self): columns={} ), }, - [ + disabled=[ ParsedNode( name='disabled', resource_type='model', @@ -2184,7 +2224,7 @@ def test__other_project_config(self): fqn=['snowplow', 'views', 'package'], columns={} ) - ]) + ] ) def test__simple_data_test(self): @@ -2204,9 +2244,9 @@ def test__simple_data_test(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(tests), - ({ + { 'test.root.no_events': ParsedNode( alias='no_events', name='no_events', @@ -2233,7 +2273,8 @@ def test__simple_data_test(self): description='', columns={} ) - }, []) + }, + [] ) def test__simple_macro(self): @@ -2325,9 +2366,9 @@ def test__simple_macro_used_in_model(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -2354,7 +2395,8 @@ def test__simple_macro_used_in_model(self): description='', columns={} ) - }, []) + }, + [] ) def test__macro_no_explicit_project_used_in_model(self): @@ -2374,9 +2416,9 @@ def test__macro_no_explicit_project_used_in_model(self): self.macro_manifest ) - self.assertEqual( + self._assert_parsed_sql_nodes( parser.parse_sql_nodes(models), - ({ + { 'model.root.model_one': ParsedNode( alias='model_one', name='model_one', @@ -2403,5 +2445,6 @@ def test__macro_no_explicit_project_used_in_model(self): description='', columns={} ) - }, []) + }, + [] ) diff --git a/test/unit/test_postgres_adapter.py b/test/unit/test_postgres_adapter.py index 1677d9e2140..1998ec59a71 100644 --- a/test/unit/test_postgres_adapter.py +++ b/test/unit/test_postgres_adapter.py @@ -8,9 +8,10 @@ from dbt.exceptions import ValidationException from dbt.logger import GLOBAL_LOGGER as logger # noqa from psycopg2 import extensions as psycopg2_extensions +from psycopg2 import DatabaseError, Error import agate -from .utils import config_from_parts_or_dicts, inject_adapter +from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection class TestPostgresAdapter(unittest.TestCase): @@ -29,7 +30,7 @@ def setUp(self): 'type': 'postgres', 'dbname': 'postgres', 'user': 'root', - 'host': 'database', + 'host': 'thishostshouldnotexist', 'pass': 'password', 'port': 5432, 'schema': 'public' @@ -45,40 +46,45 @@ def setUp(self): def adapter(self): if self._adapter is None: self._adapter = PostgresAdapter(self.config) - inject_adapter('postgres', self._adapter) + inject_adapter(self._adapter) return self._adapter - def test_acquire_connection_validations(self): + @mock.patch('dbt.adapters.postgres.connections.psycopg2') + def test_acquire_connection_validations(self, psycopg2): try: connection = self.adapter.acquire_connection('dummy') - self.assertEquals(connection.type, 'postgres') except ValidationException as e: self.fail('got ValidationException: {}'.format(str(e))) except BaseException as e: - self.fail('validation failed with unknown exception: {}' + self.fail('acquiring connection failed with unknown exception: {}' .format(str(e))) + self.assertEquals(connection.type, 'postgres') + psycopg2.connect.assert_called_once() - def test_acquire_connection(self): + @mock.patch('dbt.adapters.postgres.connections.psycopg2') + def test_acquire_connection(self, psycopg2): connection = self.adapter.acquire_connection('dummy') self.assertEquals(connection.state, 'open') self.assertNotEquals(connection.handle, None) + psycopg2.connect.assert_called_once() def test_cancel_open_connections_empty(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): - self.adapter.connections.in_use['master'] = mock.MagicMock() + key = self.adapter.connections.get_thread_identifier() + self.adapter.connections.thread_connections[key] = mock_connection('master') self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): - master = mock.MagicMock() - model = mock.MagicMock() + master = mock_connection('master') + model = mock_connection('model') + key = self.adapter.connections.get_thread_identifier() model.handle.get_backend_pid.return_value = 42 - - self.adapter.connections.in_use.update({ - 'master': master, - 'model': model, + self.adapter.connections.thread_connections.update({ + key: master, + 1: model, }) with mock.patch.object(self.adapter.connections, 'add_query') as add_query: query_result = mock.MagicMock() @@ -86,7 +92,7 @@ def test_cancel_open_connections_single(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 1) - add_query.assert_called_once_with('select pg_terminate_backend(42)', 'master') + add_query.assert_called_once_with('select pg_terminate_backend(42)') master.handle.get_backend_pid.assert_not_called() @@ -98,7 +104,7 @@ def test_default_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='postgres', user='root', - host='database', + host='thishostshouldnotexist', password='password', port=5432, connect_timeout=10) @@ -113,7 +119,7 @@ def test_changed_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='postgres', user='root', - host='database', + host='thishostshouldnotexist', password='password', port=5432, connect_timeout=10, @@ -129,7 +135,7 @@ def test_set_zero_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='postgres', user='root', - host='database', + host='thishostshouldnotexist', password='password', port=5432, connect_timeout=10) @@ -168,7 +174,7 @@ def setUp(self): 'type': 'postgres', 'dbname': 'postgres', 'user': 'root', - 'host': 'database', + 'host': 'thishostshouldnotexist', 'pass': 'password', 'port': 5432, 'schema': 'public' @@ -194,10 +200,14 @@ def setUp(self): self.mock_execute = self.cursor.execute self.patcher = mock.patch('dbt.adapters.postgres.connections.psycopg2') self.psycopg2 = self.patcher.start() + # there must be a better way to do this... + self.psycopg2.DatabaseError = DatabaseError + self.psycopg2.Error = Error self.psycopg2.connect.return_value = self.handle self.adapter = PostgresAdapter(self.config) - inject_adapter('postgres', self.adapter) + self.adapter.acquire_connection() + inject_adapter(self.adapter) def tearDown(self): # we want a unique self.handle every time. diff --git a/test/unit/test_redshift_adapter.py b/test/unit/test_redshift_adapter.py index 5611a2a6efc..63d9dec822b 100644 --- a/test/unit/test_redshift_adapter.py +++ b/test/unit/test_redshift_adapter.py @@ -9,7 +9,7 @@ from dbt.exceptions import ValidationException, FailedToConnectException from dbt.logger import GLOBAL_LOGGER as logger # noqa -from .utils import config_from_parts_or_dicts +from .utils import config_from_parts_or_dicts, mock_connection @classmethod @@ -30,7 +30,7 @@ def setUp(self): 'type': 'redshift', 'dbname': 'redshift', 'user': 'root', - 'host': 'database', + 'host': 'thishostshouldnotexist', 'pass': 'password', 'port': 5439, 'schema': 'public' @@ -106,17 +106,19 @@ def test_cancel_open_connections_empty(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): - self.adapter.connections.in_use['master'] = mock.MagicMock() + key = self.adapter.connections.get_thread_identifier() + self.adapter.connections.thread_connections[key] = mock_connection('master') self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): - master = mock.MagicMock() - model = mock.MagicMock() + master = mock_connection('master') + model = mock_connection('model') model.handle.get_backend_pid.return_value = 42 - self.adapter.connections.in_use.update({ - 'master': master, - 'model': model, + key = self.adapter.connections.get_thread_identifier() + self.adapter.connections.thread_connections.update({ + key: master, + 1: model, }) with mock.patch.object(self.adapter.connections, 'add_query') as add_query: query_result = mock.MagicMock() @@ -124,7 +126,7 @@ def test_cancel_open_connections_single(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 1) - add_query.assert_called_once_with('select pg_terminate_backend(42)', 'master') + add_query.assert_called_once_with('select pg_terminate_backend(42)') master.handle.get_backend_pid.assert_not_called() @@ -135,7 +137,7 @@ def test_default_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='redshift', user='root', - host='database', + host='thishostshouldnotexist', password='password', port=5439, connect_timeout=10, @@ -152,7 +154,7 @@ def test_changed_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='redshift', user='root', - host='database', + host='thishostshouldnotexist', password='password', port=5439, connect_timeout=10, @@ -168,7 +170,7 @@ def test_set_zero_keepalive(self, psycopg2): psycopg2.connect.assert_called_once_with( dbname='redshift', user='root', - host='database', + host='thishostshouldnotexist', password='password', port=5439, connect_timeout=10) diff --git a/test/unit/test_snowflake_adapter.py b/test/unit/test_snowflake_adapter.py index 0ee65d05759..caba79ea2e2 100644 --- a/test/unit/test_snowflake_adapter.py +++ b/test/unit/test_snowflake_adapter.py @@ -11,7 +11,7 @@ from dbt.logger import GLOBAL_LOGGER as logger # noqa from snowflake import connector as snowflake_connector -from .utils import config_from_parts_or_dicts, inject_adapter +from .utils import config_from_parts_or_dicts, inject_adapter, mock_connection class TestSnowflakeAdapter(unittest.TestCase): @@ -54,8 +54,8 @@ def setUp(self): self.snowflake.return_value = self.handle self.adapter = SnowflakeAdapter(self.config) - # patch our new adapter into the factory so macros behave - inject_adapter('snowflake', self.adapter) + self.adapter.acquire_connection() + inject_adapter(self.adapter) def tearDown(self): # we want a unique self.handle every time. @@ -69,7 +69,7 @@ def test_quoting_on_drop_schema(self): ) self.mock_execute.assert_has_calls([ - mock.call('drop schema if exists "test_database"."test_schema" cascade', None) + mock.call('drop schema if exists test_database."test_schema" cascade', None) ]) def test_quoting_on_drop(self): @@ -84,7 +84,7 @@ def test_quoting_on_drop(self): self.mock_execute.assert_has_calls([ mock.call( - 'drop table if exists "test_database"."test_schema".test_table cascade', + 'drop table if exists test_database."test_schema".test_table cascade', None ) ]) @@ -100,7 +100,7 @@ def test_quoting_on_truncate(self): self.adapter.truncate_relation(relation) self.mock_execute.assert_has_calls([ - mock.call('truncate table "test_database"."test_schema".test_table', None) + mock.call('truncate table test_database."test_schema".test_table', None) ]) def test_quoting_on_rename(self): @@ -125,7 +125,7 @@ def test_quoting_on_rename(self): ) self.mock_execute.assert_has_calls([ mock.call( - 'alter table "test_database"."test_schema".table_a rename to table_b', + 'alter table test_database."test_schema".table_a rename to test_database."test_schema".table_b', None ) ]) @@ -134,17 +134,19 @@ def test_cancel_open_connections_empty(self): self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_master(self): - self.adapter.connections.in_use['master'] = mock.MagicMock() + key = self.adapter.connections.get_thread_identifier() + self.adapter.connections.thread_connections[key] = mock_connection('master') self.assertEqual(len(list(self.adapter.cancel_open_connections())), 0) def test_cancel_open_connections_single(self): - master = mock.MagicMock() - model = mock.MagicMock() + master = mock_connection('master') + model = mock_connection('model') model.handle.session_id = 42 - self.adapter.connections.in_use.update({ - 'master': master, - 'model': model, + key = self.adapter.connections.get_thread_identifier() + self.adapter.connections.thread_connections.update({ + key: master, + 1: model, }) with mock.patch.object(self.adapter.connections, 'add_query') as add_query: query_result = mock.MagicMock() @@ -153,11 +155,10 @@ def test_cancel_open_connections_single(self): self.assertEqual( len(list(self.adapter.cancel_open_connections())), 1) - add_query.assert_called_once_with( - 'select system$abort_session(42)', 'master') + add_query.assert_called_once_with('select system$abort_session(42)') def test_client_session_keep_alive_false_by_default(self): - self.adapter.connections.get(name='new_connection_with_new_config') + self.adapter.connections.set_connection_name(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( account='test_account', autocommit=False, @@ -170,7 +171,7 @@ def test_client_session_keep_alive_true(self): self.config.credentials = self.config.credentials.incorporate( client_session_keep_alive=True) self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.get(name='new_connection_with_new_config') + self.adapter.connections.set_connection_name(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -184,7 +185,7 @@ def test_user_pass_authentication(self): self.config.credentials = self.config.credentials.incorporate( password='test_password') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.get(name='new_connection_with_new_config') + self.adapter.connections.set_connection_name(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -198,7 +199,7 @@ def test_authenticator_user_pass_authentication(self): self.config.credentials = self.config.credentials.incorporate( password='test_password', authenticator='test_sso_url') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.get(name='new_connection_with_new_config') + self.adapter.connections.set_connection_name(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -213,7 +214,7 @@ def test_authenticator_externalbrowser_authentication(self): self.config.credentials = self.config.credentials.incorporate( authenticator='externalbrowser') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.get(name='new_connection_with_new_config') + self.adapter.connections.set_connection_name(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( @@ -231,7 +232,7 @@ def test_authenticator_private_key_authentication(self, mock_get_private_key): private_key_passphrase='p@ssphr@se') self.adapter = SnowflakeAdapter(self.config) - self.adapter.connections.get(name='new_connection_with_new_config') + self.adapter.connections.set_connection_name(name='new_connection_with_new_config') self.snowflake.assert_has_calls([ mock.call( diff --git a/test/unit/test_system_client.py b/test/unit/test_system_client.py index 282b957df63..0fc8e3dcfb7 100644 --- a/test/unit/test_system_client.py +++ b/test/unit/test_system_client.py @@ -92,7 +92,12 @@ def test__not_exe(self): dbt.clients.system.run_cmd(self.run_dir, [self.empty_file]) msg = str(exc.exception).lower() - self.assertIn('permissions', msg) + if os.name == 'nt': + # on windows, this means it's not an executable at all! + self.assertIn('not executable', msg) + else: + # on linux, this means you don't have executable permissions on it + self.assertIn('permissions', msg) self.assertIn(self.empty_file.lower(), msg) def test__cwd_does_not_exist(self): diff --git a/test/unit/utils.py b/test/unit/utils.py index fafb89484e7..48a753c2ee1 100644 --- a/test/unit/utils.py +++ b/test/unit/utils.py @@ -3,11 +3,19 @@ Note that all imports should be inside the functions to avoid import/mocking issues. """ +import mock + class Obj(object): which = 'blah' +def mock_connection(name): + conn = mock.MagicMock() + conn.name = name + return conn + + def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): from dbt.config import Project, Profile, RuntimeConfig from dbt.utils import parse_cli_vars @@ -29,10 +37,12 @@ def config_from_parts_or_dicts(project, profile, packages=None, cli_vars='{}'): ) -def inject_adapter(key, value): +def inject_adapter(value): """Inject the given adapter into the adapter factory, so your hand-crafted artisanal adapter will be available from get_adapter() as if dbt loaded it. """ from dbt.adapters import factory + from dbt.adapters.base.connections import BaseConnectionManager + key = value.type() factory._ADAPTERS[key] = value factory.ADAPTER_TYPES[key] = type(value) diff --git a/tox.ini b/tox.ini index ab55c312b26..2134d39fb3c 100644 --- a/tox.ini +++ b/tox.ini @@ -1,23 +1,23 @@ [tox] skipsdist = True -envlist = unit-py27, unit-py36, integration-postgres-py27, integration-postgres-py36, integration-redshift-py27, integration-redshift-py36, integration-snowflake-py27, integration-snowflake-py36, pep8, integration-bigquery-py27, integration-bigquery-py36 +envlist = unit-py27, unit-py36, integration-postgres-py27, integration-postgres-py36, integration-redshift-py27, integration-redshift-py36, integration-snowflake-py27, integration-snowflake-py36, flake8, integration-bigquery-py27, integration-bigquery-py36 -[testenv:pep8] +[testenv:flake8] basepython = python3.6 -commands = /bin/bash -c '$(which pep8) core/dbt plugins/*/dbt' +commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 core/dbt plugins/*/dbt' deps = -r{toxinidir}/dev_requirements.txt [testenv:unit-py27] basepython = python2.7 -commands = /bin/bash -c '$(which nosetests) -v {posargs} test/unit' +commands = /bin/bash -c '{envpython} -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt [testenv:unit-py36] basepython = python3.6 -commands = /bin/bash -c '{envpython} $(which nosetests) -v {posargs} test/unit' +commands = /bin/bash -c '{envpython} -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -27,7 +27,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=postgres {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -38,7 +38,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=snowflake {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -49,7 +49,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=bigquery {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -60,7 +60,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=redshift {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -72,7 +72,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=presto {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/presto @@ -83,7 +83,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=postgres --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov {posargs} test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_postgres --cov=dbt --cov-branch --cov-report html:htmlcov {posargs} test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -94,7 +94,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=snowflake {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/snowflake @@ -105,7 +105,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=bigquery {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/bigquery @@ -116,7 +116,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=redshift {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/postgres @@ -128,7 +128,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v -a type=presto {posargs} --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration/*' +commands = /bin/bash -c '{envpython} -m pytest -v -m profile_presto {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration/*' deps = -e {toxinidir}/core -e {toxinidir}/plugins/presto @@ -139,7 +139,7 @@ basepython = python2.7 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v {posargs}' +commands = /bin/bash -c '{envpython} -m pytest -v {posargs}' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -149,7 +149,7 @@ basepython = python3.6 passenv = * setenv = HOME=/home/dbt_test_user -commands = /bin/bash -c '{envpython} $(which nosetests) -v {posargs}' +commands = /bin/bash -c '{envpython} -m pytest -v {posargs}' deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt @@ -160,7 +160,71 @@ passenv = * setenv = DBT_CONFIG_DIR = ~/.dbt DBT_INVOCATION_ENV = ci-appveyor -commands = nosetests -v -a type=postgres -a type=snowflake -a type=bigquery --with-coverage --cover-branches --cover-html --cover-html-dir=htmlcov test/integration test/unit +commands = pytest -v -m 'profile_postgres or profile_snowflake or profile_bigquery or profile_redshift' --cov=dbt --cov-branch --cov-report html:htmlcov test/integration test/unit deps = -r{toxinidir}/requirements.txt -r{toxinidir}/dev_requirements.txt + +[testenv:pywin-unit] +basepython = python.exe +passenv = * +setenv = + DBT_CONFIG_DIR = ~/.dbt + DBT_INVOCATION_ENV = ci-appveyor +commands = python -m pytest -v {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/unit +deps = + -r{toxinidir}/requirements.txt + -r{toxinidir}/dev_requirements.txt + + +[testenv:pywin-postgres] +basepython = python.exe +passenv = * +setenv = + DBT_CONFIG_DIR = ~/.dbt + DBT_INVOCATION_ENV = ci-appveyor +commands = python -m pytest -v -m profile_postgres {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +deps = + -e {toxinidir}/core + -e {toxinidir}/plugins/postgres + -r{toxinidir}/dev_requirements.txt + + +[testenv:pywin-snowflake] +basepython = python.exe +passenv = * +setenv = + DBT_CONFIG_DIR = ~/.dbt + DBT_INVOCATION_ENV = ci-appveyor +commands = python -m pytest -v -m profile_snowflake {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +deps = + -e {toxinidir}/core + -e {toxinidir}/plugins/snowflake + -r{toxinidir}/dev_requirements.txt + + +[testenv:pywin-bigquery] +basepython = python.exe +passenv = * +setenv = + DBT_CONFIG_DIR = ~/.dbt + DBT_INVOCATION_ENV = ci-appveyor +commands = python -m pytest -v -m profile_bigquery {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +deps = + -e {toxinidir}/core + -e {toxinidir}/plugins/bigquery + -r{toxinidir}/dev_requirements.txt + + +[testenv:pywin-redshift] +basepython = python.exe +passenv = * +setenv = + DBT_CONFIG_DIR = ~/.dbt + DBT_INVOCATION_ENV = ci-appveyor +commands = python -m pytest -v -m profile_redshift {posargs} --cov=dbt --cov-branch --cov-report html:htmlcov test/integration +deps = + -e {toxinidir}/core + -e {toxinidir}/plugins/postgres + -e {toxinidir}/plugins/redshift + -r{toxinidir}/dev_requirements.txt From afe236d9ac9c5ceea53f5f2cafde25f16f6bbb0c Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Apr 2019 10:25:26 -0700 Subject: [PATCH 23/29] cleaning up some commented out stuff --- .../global_project/macros/adapters/common.sql | 12 ------------ .../dbt/include/snowflake/macros/adapters.sql | 17 ----------------- .../macros/materializations/incremental.sql | 2 -- 3 files changed, 31 deletions(-) diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql index a24e707f850..b0e96d7c3b9 100644 --- a/core/dbt/include/global_project/macros/adapters/common.sql +++ b/core/dbt/include/global_project/macros/adapters/common.sql @@ -60,18 +60,6 @@ ); {% endmacro %} -{# {% macro create_or_replace_table_as(relation, sql) -%} - {{ adapter_macro('create_or_replace_table_as', relation, sql) }} -{%- endmacro %} - -{% macro default__create_or_replace_table_as(relation, sql) -%} - create or replace table - {{ relation.include(database=(not temporary), schema=(not temporary)) }} - as ( - {{ sql }} - ); -{% endmacro %} #} - {% macro create_view_as(relation, sql) -%} {{ adapter_macro('create_view_as', relation, sql) }} {%- endmacro %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index a339bb54b12..f5cbd47a728 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -15,23 +15,6 @@ ); {% endmacro %} -{# {% macro snowflake__create_or_replace_table_as(relation, sql) -%} - {% if temporary %} - use schema {{ adapter.quote_as_configured(schema, 'schema') }}; - {% endif %} - - {%- set transient = config.get('transient', default=true) -%} - - create or replace {% if temporary -%} - temporary - {%- elif transient -%} - transient - {%- endif %} table {{ relation.include(database=(not temporary), schema=(not temporary)) }} - as ( - {{ sql }} - ); -{% endmacro %} #} - {% macro snowflake__create_view_as(relation, sql) -%} create or replace view {{ relation }} as ( {{ sql }} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 6a1cebaebaf..2d30564f945 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -10,9 +10,7 @@ {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} - {%- set force_create = full_refresh_mode -%} -- setup From 8af79841f77de8b620d82d7e437ec8d71aafa608 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 28 Apr 2019 07:31:14 -0700 Subject: [PATCH 24/29] remove non-destructive logic --- .../macros/materializations/incremental.sql | 2 -- .../macros/materializations/table.sql | 23 +------------------ 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 2d30564f945..9ea58a5bdbe 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -2,8 +2,6 @@ {% materialization incremental, adapter='snowflake' -%} {%- set unique_key = config.get('unique_key') -%} - {%- set sql_where = config.get('sql_where') -%} - {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set full_refresh_mode = (flags.FULL_REFRESH == True) -%} {%- set identifier = model['alias'] -%} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index 939eb855e20..d27d0f902c4 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -1,6 +1,5 @@ {% materialization table, adapter='snowflake' %} {%- set identifier = model['alias'] -%} - {%- set non_destructive_mode = (flags.NON_DESTRUCTIVE == True) -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(identifier=identifier, @@ -13,26 +12,6 @@ {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} - {%- set create_as_temporary = (exists_as_table and non_destructive_mode) -%} - - - -- drop the temp relations if they exists for some reason - {# {{ adapter.drop_relation(intermediate_relation) }} #} - - --- FIXME: Do we want to put this block all together? I think it serves no purpose, but need to check - -- setup: if the target relation already exists, truncate or drop it (if it's a view) - {# TODO: Would like to check this. New materialsiation makes these tests a bit moot. We should - be able to deprecate non-destructive flag all together here. #} - {% if non_destructive_mode -%} - {% if exists_as_table -%} - --noop we can do away with this step all together since the table can be replaced in Snowflake. - {# {{ adapter.truncate_relation(old_relation) }} #} - {% elif exists_as_view -%} - --noop. I think we should also be able to do away with this and call a replace. - {{ adapter.drop_relation(old_relation) }} - {%- set old_relation = none -%} - {%- endif %} - {%- endif %} {{ run_hooks(pre_hooks, inside_transaction=False) }} @@ -48,7 +27,7 @@ {{ drop_relation_if_exists(old_relation) }} {% endif %} - {{ create_table_as(create_as_temporary, target_relation, sql) }} + {{ create_table_as(false, target_relation, sql) }} {%- endcall %} {{ run_hooks(post_hooks, inside_transaction=True) }} From 85eac05a3861822a1b20cf3a473f43a88bf19a98 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 28 Apr 2019 07:33:49 -0700 Subject: [PATCH 25/29] cleaner select --- .../include/snowflake/macros/materializations/incremental.sql | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 9ea58a5bdbe..a2741e28784 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -16,9 +16,7 @@ {% set source_sql -%} -- wrap sql in parens to make it a subquery -- ( - select * from ( - {{ sql }} - ) + {{ sql }} ) {%- endset -%} From 3ef519d139715719a9816874c70cb0303d1754e6 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 28 Apr 2019 07:43:16 -0700 Subject: [PATCH 26/29] todo and comments clean up --- .../snowflake/macros/materializations/incremental.sql | 11 +++-------- .../snowflake/macros/materializations/table.sql | 4 ---- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index a2741e28784..85669a272ba 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -14,7 +14,7 @@ -- setup {% set source_sql -%} - -- wrap sql in parens to make it a subquery -- + {# wrap sql in parens to make it a subquery #} ( {{ sql }} ) @@ -28,24 +28,20 @@ -- build model {% if force_create or old_relation is none -%} {%- call statement('main') -%} - {# -- create or replace logic because we're in a full refresh or table is non existant. #} {% if old_relation is not none and old_relation.type == 'view' %} - {# -- I'm preserving one of the old checks here for a view, and to make sure Snowflake doesn't - -- complain that we're running a replace table on a view. #} {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} {{ adapter.drop_relation(old_relation) }} {% endif %} - {# -- now create or replace the table because we're in full-refresh #} + {# -- now create (or replace) the table because we're in full-refresh #} {{create_table_as(false, target_relation, source_sql)}} {%- endcall -%} - + {%- else -%} {# -- here is the incremental part #} {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} {% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %} {%- call statement('main') -%} - {%- if unique_key is none -%} {# -- if no unique_key is provided run regular insert as Snowflake may complain #} insert into {{ target_relation }} ({{ dest_cols_csv }}) @@ -58,7 +54,6 @@ {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} {%- endif -%} {% endcall %} - {%- endif %} {{ run_hooks(post_hooks, inside_transaction=True) }} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql index d27d0f902c4..fc284a15c1f 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/table.sql @@ -6,10 +6,6 @@ schema=schema, database=database, type='table') -%} - /* --TODO: Is this still up to date? - See ../view/view.sql for more information about this relation. - */ - {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%} From 7a2279e433c1753629ad7d61a15c2e95b5982804 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 28 Apr 2019 09:59:17 -0700 Subject: [PATCH 27/29] move unique key workaround to snowflake macro --- .../macros/materializations/incremental.sql | 13 +------------ .../macros/materializations/merge.sql | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 85669a272ba..7ca1e48ef2c 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -40,19 +40,8 @@ {%- else -%} {# -- here is the incremental part #} {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} - {% set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') %} {%- call statement('main') -%} - {%- if unique_key is none -%} - {# -- if no unique_key is provided run regular insert as Snowflake may complain #} - insert into {{ target_relation }} ({{ dest_cols_csv }}) - ( - select {{ dest_cols_csv }} - from {{ source_sql }} - ); - {%- else -%} - {# -- use merge if a unique key is provided #} - {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} - {%- endif -%} + {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} {% endcall %} {%- endif %} diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/merge.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/merge.sql index ac92f2ef26e..caa506f7e7b 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/merge.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/merge.sql @@ -1,3 +1,17 @@ -{% macro snowflake__get_merge_sql(target, source, unique_key, dest_columns) %} - {{ common_get_merge_sql(target, source, unique_key, dest_columns) }} +{% macro snowflake__get_merge_sql(target, source_sql, unique_key, dest_columns) %} + {%- set dest_cols_csv = dest_columns | map(attribute="name") | join(', ') -%} + {%- if unique_key is none -%} + {# workaround for Snowflake not being happy with "on false" merge. + when no unique key is provided we'll do a regular insert, other times we'll + use the preferred merge. #} + insert into {{ target }} ({{ dest_cols_csv }}) + ( + select {{ dest_cols_csv }} + from {{ source_sql }} + ); + {%- else -%} + {# call regular merge when a unique key is present. #} + {{ common_get_merge_sql(target, source_sql, unique_key, dest_columns) }} + {%- endif -%} + {% endmacro %} From 8d745506094ee66afc4142acb0081bd704556ada Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Fri, 10 May 2019 19:12:38 -0400 Subject: [PATCH 28/29] fix tests --- .../macros/materializations/incremental.sql | 49 ++++++++++--------- .../test_runtime_materialization.py | 13 ----- 2 files changed, 27 insertions(+), 35 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 7ca1e48ef2c..6147a8653eb 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -7,42 +7,47 @@ {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} - {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%} - {%- set exists_not_as_table = (old_relation is not none and not old_relation.is_table) -%} - {%- set force_create = full_refresh_mode -%} + {%- set tmp_relation = api.Relation.create(identifier=identifier ~ "__dbt_tmp", type='table') -%} -- setup - - {% set source_sql -%} - {# wrap sql in parens to make it a subquery #} - ( - {{ sql }} - ) - {%- endset -%} - {{ run_hooks(pre_hooks, inside_transaction=False) }} -- `BEGIN` happens here: {{ run_hooks(pre_hooks, inside_transaction=True) }} + {# -- If the destination is a view, then we have no choice but to drop it #} + {% if old_relation is not none and old_relation.type == 'view' %} + {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} + {{ adapter.drop_relation(old_relation) }} + {% set old_relation = none %} + {% endif %} + -- build model - {% if force_create or old_relation is none -%} + {% if full_refresh_mode or old_relation is none -%} + {%- call statement('main') -%} - {# -- create or replace logic because we're in a full refresh or table is non existant. #} - {% if old_relation is not none and old_relation.type == 'view' %} - {{ log("Dropping relation " ~ old_relation ~ " because it is a view and this model is a table.") }} - {{ adapter.drop_relation(old_relation) }} - {% endif %} - {# -- now create (or replace) the table because we're in full-refresh #} - {{create_table_as(false, target_relation, source_sql)}} + {{ create_table_as(false, target_relation, sql) }} {%- endcall -%} - + {%- else -%} - {# -- here is the incremental part #} + + {%- call statement() -%} + {{ create_table_as(true, tmp_relation, sql) }} + {%- endcall -%} + + {{ adapter.expand_target_column_types(temp_table=tmp_relation.identifier, + to_relation=target_relation) }} + {% set incremental_sql %} + ( + select * from {{ tmp_relation }} + ) + {% endset %} + {% set dest_columns = adapter.get_columns_in_relation(target_relation) %} {%- call statement('main') -%} - {{ get_merge_sql(target_relation, source_sql, unique_key, dest_columns) }} + {{ get_merge_sql(target_relation, incremental_sql, unique_key, dest_columns) }} {% endcall %} + {%- endif %} {{ run_hooks(post_hooks, inside_transaction=True) }} diff --git a/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py b/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py index 2236fe82fa0..be70d3c3f87 100644 --- a/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py +++ b/test/integration/017_runtime_materialization_tests/test_runtime_materialization.py @@ -52,16 +52,3 @@ def test_postgres_delete__dbt_tmp_relation(self): self.assertTableDoesNotExist('view__dbt_tmp') self.assertTablesEqual("seed","view") - - - @use_profile('snowflake') - def test_snowflake_backup_different_type(self): - self.run_sql_file( - 'test/integration/017_runtime_materialization_tests/create_backup_and_original.sql' - ) - results = self.run_dbt(['run', '--model', 'materialized']) - self.assertEqual(len(results), 1) - - self.assertTableDoesNotExist('materialized__dbt_tmp') - self.assertTableDoesNotExist('materialized__dbt_backup') - self.assertTablesEqual("seed", "materialized") From 90abc2d2f329914ef7a34b145facc5d2d3b473aa Mon Sep 17 00:00:00 2001 From: Drew Banin Date: Mon, 13 May 2019 14:12:30 -0400 Subject: [PATCH 29/29] (closes #1455) Qualify Snowflake temp tables with a database and schema --- .../dbt/include/snowflake/macros/adapters.sql | 6 +----- .../snowflake/macros/materializations/incremental.sql | 11 +++++++++-- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql index f5cbd47a728..644569cbf56 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/adapters.sql @@ -1,15 +1,11 @@ {% macro snowflake__create_table_as(temporary, relation, sql) -%} - {% if temporary %} - use schema {{ adapter.quote_as_configured(schema, 'schema') }}; - {% endif %} - {%- set transient = config.get('transient', default=true) -%} create or replace {% if temporary -%} temporary {%- elif transient -%} transient - {%- endif %} table {{ relation.include(database=(not temporary), schema=(not temporary)) }} + {%- endif %} table {{ relation }} as ( {{ sql }} ); diff --git a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql index 6147a8653eb..ef7a2ec8e35 100644 --- a/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql +++ b/plugins/snowflake/dbt/include/snowflake/macros/materializations/incremental.sql @@ -6,8 +6,15 @@ {%- set identifier = model['alias'] -%} {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%} - {%- set target_relation = api.Relation.create(database=database, identifier=identifier, schema=schema, type='table') -%} - {%- set tmp_relation = api.Relation.create(identifier=identifier ~ "__dbt_tmp", type='table') -%} + {%- set target_relation = api.Relation.create(database=database, + schema=schema, + identifier=identifier, + type='table') -%} + + {%- set tmp_relation = api.Relation.create(database=database, + schema=schema, + identifier=identifier ~ "__dbt_tmp", + type='table') -%} -- setup {{ run_hooks(pre_hooks, inside_transaction=False) }}