From ac71888236406d24b9ffeea6ba271b1d1371d9c9 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 15 Apr 2019 10:58:40 -0600 Subject: [PATCH 1/5] add tests test for comment blocks where the first character of the comment is "}" do/set tests an even trickier test case --- test/unit/test_jinja.py | 129 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 6fee7bfa132..81381819dc9 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -161,6 +161,108 @@ def test_comment_only(self): blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] self.assertEqual(len(blocks), 0) + def test_comment_block_self_closing(self): + # test the case where a comment start looks a lot like it closes itself + # (but it doesn't in jinja!) + body = '{#} {% myblock foo %} {#}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 0) + + def test_embedded_self_closing_comment_block(self): + body = '{% myblock foo %} {#}{% endmyblock %} {#}{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].full_block, body) + self.assertEqual(blocks[0].contents, ' {#}{% endmyblock %} {#}') + + def test_set_statement(self): + body = '{% set x = 1 %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].full_block, '{% set x = 1 %}') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_set_block(self): + body = '{% set x %}1{% endset %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].contents, '1') + self.assertEqual(blocks[0].block_type_name, 'set') + self.assertEqual(blocks[0].block_name, 'x') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_crazy_set_statement(self): + body = '{% set x = (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% set y = otherthing("{% myblock foo %}") %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 3) + self.assertEqual(blocks[0].full_block, '{% set x = (thing("{% myblock foo %}")) %}') + self.assertEqual(blocks[0].block_type_name, 'set') + self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}') + self.assertEqual(blocks[1].block_type_name, 'otherblock') + self.assertEqual(blocks[2].full_block, '{% set y = otherthing("{% myblock foo %}") %}') + self.assertEqual(blocks[2].block_type_name, 'set') + + def test_do_statement(self): + body = '{% do thing.update() %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].full_block, '{% do thing.update() %}') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_do_block(self): + body = '{% do %}thing.update(){% enddo %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].contents, 'thing.update()') + self.assertEqual(blocks[0].block_type_name, 'do') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + + def test_crazy_do_statement(self): + body = '{% do (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% do otherthing("{% myblock foo %}") %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 3) + self.assertEqual(blocks[0].full_block, '{% do (thing("{% myblock foo %}")) %}') + self.assertEqual(blocks[0].block_type_name, 'do') + self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}') + self.assertEqual(blocks[1].block_type_name, 'otherblock') + self.assertEqual(blocks[2].full_block, '{% do otherthing("{% myblock foo %}") %}') + self.assertEqual(blocks[2].block_type_name, 'do') + + def test_awful_jinja(self): + all_blocks = extract_toplevel_blocks(if_you_do_this_you_are_awful) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 4) + self.assertEqual(blocks[0].block_type_name, 'do') + self.assertEqual(blocks[0].full_block, '''{% do\n set('foo="bar"')\n%}''') + self.assertEqual(blocks[1].block_type_name, 'set') + self.assertEqual(blocks[1].full_block, '''{% set x = ("100" + "hello'" + '%}') %}''') + self.assertEqual(blocks[2].block_type_name, 'archive') + self.assertEqual(blocks[2].contents, '\n '.join([ + '''{% set x = ("{% endarchive %}" + (40 * '%})')) %}''', + '{# {% endarchive %} #}', + '{% embedded %}', + ' some block data right here', + '{% endembedded %}' + ])) + self.assertEqual(blocks[3].block_type_name, 'materialization') + self.assertEqual(blocks[3].contents, '\nhi\n') + + def test_quoted_endblock_within_block(self): + body = '{% myblock something -%} {% set x = ("{% endmyblock %}") %} {% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 1) + self.assertEqual(blocks[0].block_type_name, 'myblock') + self.assertEqual(blocks[0].contents, '{% set x = ("{% endmyblock %}") %} ') + bar_block = '''{% mytype bar %} {# a comment that inside it has @@ -188,3 +290,30 @@ def test_comment_only(self): {% mytype foo %} some stuff {% endmytype %} '''+bar_block+x_block + + +if_you_do_this_you_are_awful = ''' +{#} here is a comment with a block inside {% block x %} asdf {% endblock %} {#} +{% do + set('foo="bar"') +%} +{% set x = ("100" + "hello'" + '%}') %} +{% archive something -%} + {% set x = ("{% endarchive %}" + (40 * '%})')) %} + {# {% endarchive %} #} + {% embedded %} + some block data right here + {% endembedded %} +{%- endarchive %} + +{% raw %} + {% set x = SYNTAX ERROR} +{% endraw %} + + +{% materialization whatever, adapter='thing' %} +hi +{% endmaterialization %} +''' + + From 9b1aede911304db8bdf2056a11e7da493795f5ce Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Mon, 15 Apr 2019 11:59:53 -0600 Subject: [PATCH 2/5] Fix a number of bugs After we find the start of a comment block, advance immediately - this is so we do not mistake "{#}" as both open and close of comment support do/set statements (no {% enddo %}/{% endset %}) fix some edge-case bugs around quoting fix a bug around materialization parsing --- core/dbt/clients/_jinja_blocks.py | 136 +++++++++++++++++++++++++----- 1 file changed, 117 insertions(+), 19 deletions(-) diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py index 1da2f5c719a..735a627ce53 100644 --- a/core/dbt/clients/_jinja_blocks.py +++ b/core/dbt/clients/_jinja_blocks.py @@ -16,11 +16,12 @@ def __init__(self, contents): class BlockTag(object): - def __init__(self, block_type_name, block_name, contents=None, **kw): + def __init__(self, block_type_name, block_name, contents=None, + full_block=None, **kw): self.block_type_name = block_type_name self.block_name = block_name self.contents = contents - self.full_block = None + self.full_block = full_block def __str__(self): return 'BlockTag({!r}, {!r})'.format(self.block_type_name, @@ -59,7 +60,7 @@ def end_pat(self): r'(?:\s+(?P({})))?'.format(_NAME_PATTERN), ))) -TAG_CLOSE_PATTERN = regex(r'(?:\-\%\}\s*|\%\})') +TAG_CLOSE_PATTERN = regex(r'(?:(?P(\-\%\}\s*|\%\})))') # if you do {% materialization foo, adapter="myadapter' %} and end up with # mismatched quotes this will still match, but jinja will fail somewhere # since the adapter= argument has to be an adapter name, and none have quotes @@ -103,6 +104,13 @@ def end_pat(self): ) +NON_STRING_DO_BLOCK_MEMBER_PATTERN = regex( + # anything, followed by a quote, paren, or a tag end + r'''(.*?)''' + r'''((?P(['"]))|(?P(\())|(?P(\))))''' +) + + class BlockIterator(object): def __init__(self, data): self.data = data @@ -134,9 +142,7 @@ def _match(self, pattern): def expect_comment_end(self): """Expect a comment end and return the match object. """ - match = self._match(COMMENT_END_PATTERN) - if match is None: - dbt.exceptions.raise_compiler_error('unexpected EOF, expected #}') + match = self._expect_match('#}', COMMENT_END_PATTERN) self.advance(match.end()) def expect_raw_end(self): @@ -195,7 +201,8 @@ def handle_block(self, match, block_start=None): while True: match = self._expect_match( '"{}"'.format(found.end_block_type_name), - found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN + found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN, + regex('''(?P(['"]))''') ) groups = match.groupdict() if groups.get('endblock') is not None: @@ -207,6 +214,10 @@ def handle_block(self, match, block_start=None): self.expect_comment_end() elif groups.get('raw_start') is not None: self.expect_raw_end() + elif groups.get('quote') is not None: + self.rewind() + match = self._expect_match('any string', STRING_PATTERN) + self.advance(match.end()) else: raise dbt.exceptions.InternalException( 'unhandled regex in handle_block, no match: {}' @@ -226,10 +237,49 @@ def handle_block(self, match, block_start=None): def handle_materialization(self, match): self._expect_match('materialization args', MATERIALIZATION_ARGS_PATTERN) - self._expect_match('%}', TAG_CLOSE_PATTERN) + endtag = self._expect_match('%}', TAG_CLOSE_PATTERN) + self.advance(endtag.end()) # handle the block we started with! self.blocks.append(self.handle_block(match)) + def handle_do(self, match, expect_block): + if expect_block: + # we might be wrong to expect a block ({% do (...) %}, for example) + # so see if there's more data before the tag closes. if there + # isn't, we expect a block. + close_match = self._expect_match('%}', TAG_CLOSE_PATTERN) + unprocessed = self.data[match.end():close_match.start()].strip() + expect_block = not unprocessed + + if expect_block: + # if we're here, expect_block is True and we must have set + # close_match + self.advance(close_match.end()) + block = self.handle_block(match) + else: + # we have a do-statement like {% do thing() %}, so no {% enddo %} + # also, we don't want to advance to the end of the match, as it + # might be inside a string or something! So go back and figure out + self._process_rval_components() + block = BlockTag('do', None, + full_block=self.data[match.start():self.pos]) + self.blocks.append(block) + + def handle_set(self, match): + equal_or_close = self._expect_match('%} or =', + TAG_CLOSE_PATTERN, regex(r'=')) + self.advance(equal_or_close.end()) + if equal_or_close.groupdict().get('tag_close') is None: + # it's an equals sign, must be like {% set x = 1 %} + self._process_rval_components() + # watch out, order matters here on python 2 + block = BlockTag(full_block=self.data[match.start():self.pos], + **match.groupdict()) + else: + # it's a tag close, must be like {% set x %}...{% endset %} + block = self.handle_block(match) + self.blocks.append(block) + def find_block(self): open_block = ( r'(?:\s*\{\%\-|\{\%)\s*' @@ -251,21 +301,36 @@ def find_block(self): # comments are easy if matchgroups.get('comment_start') is not None: start = match.start() + self.advance(match.end()) self.expect_comment_end() self.blocks.append(BlockData(self.data[start:self.pos])) return True - if matchgroups.get('block_type_name') == 'raw': + block_type_name = matchgroups.get('block_type_name') + + if block_type_name == 'raw': start = match.start() self.expect_raw_end() self.blocks.append(BlockData(self.data[start:self.pos])) return True - if matchgroups.get('block_type_name') == 'materialization': + if block_type_name == 'materialization': self.advance(match.end()) self.handle_materialization(match) return True + if block_type_name == 'do': + # if there is a "block_name" in the match groups, we don't expect a + # block as the "block name" is actually part of the do-statement. + expect_block = matchgroups.get('block_name') is None + self.handle_do(match, expect_block=expect_block) + return True + + if block_type_name == 'set': + self.advance(match.end()) + self.handle_set(match) + return True + # we're somewhere like this {% block_type_name block_type # we've either got arguments, a close of tag (%}), or bad input. # we've handled materializations already (they're weird!) @@ -284,6 +349,42 @@ def find_block(self): self.blocks.append(self.handle_block(match)) return True + def _process_rval_components(self): + """This is suspiciously similar to _process_macro_default_arg, probably + want to figure out how to merge the two. + + Process the rval of an assignment statement or a do-block + """ + while True: + match = self._expect_match( + 'do block component', + # you could have a string, though that would be weird + STRING_PATTERN, + # a quote or an open/close parenthesis + NON_STRING_DO_BLOCK_MEMBER_PATTERN, + # a tag close + TAG_CLOSE_PATTERN + ) + matchgroups = match.groupdict() + self.advance(match.end()) + if matchgroups.get('string') is not None: + continue + elif matchgroups.get('quote') is not None: + self.rewind() + # now look for a string + match = self._expect_match('any string', STRING_PATTERN) + self.advance(match.end()) + elif matchgroups.get('open'): + self._parenthesis_stack.append(True) + elif matchgroups.get('close'): + self._parenthesis_stack.pop() + elif matchgroups.get('tag_close'): + if self._parenthesis_stack: + msg = ('Found "%}", expected ")"') + dbt.exceptions.raise_compiler_error(msg) + return + # else whitespace + def _process_macro_default_arg(self): """Handle the bit after an '=' in a macro default argument. This is probably the trickiest thing. The goal here is to accept all strings @@ -297,7 +398,7 @@ def _process_macro_default_arg(self): 'macro argument', # you could have a string STRING_PATTERN, - # a quote, a comma, or a close parenthesis + # a quote, a comma, or a open/close parenthesis NON_STRING_MACRO_ARGS_PATTERN, # we want to "match", not "search" method='match' @@ -311,20 +412,17 @@ def _process_macro_default_arg(self): # we got a bunch of data and then a string opening value. # put the quote back on the menu self.rewind() - # if we only got a single quote mark and didn't hit a string - # at all, this file has an unclosed quote. Fail accordingly. - if match.end() - match.start() == 1: - msg = ( - 'Unclosed quotation mark at position {}. Context:\n{}' - .format(self.pos, self.data[self.pos-20:self.pos+20]) - ) - dbt.exceptions.raise_compiler_error(msg) + # now look for a string + match = self._expect_match('any string', STRING_PATTERN) + self.advance(match.end()) elif matchgroups.get('comma') is not None: # small hack: if we hit a comma and there is one parenthesis # left, return to look for a new name. otherwise we're still # looking for the parameter close. if len(self._parenthesis_stack) == 1: return + elif matchgroups.get('open'): + self._parenthesis_stack.append(True) elif matchgroups.get('close'): self._parenthesis_stack.pop() else: From 4e8c7b9216265365274fb175a55e7a53b45d9fd7 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 18 Apr 2019 09:26:55 -0600 Subject: [PATCH 3/5] add a test and a comment about an odd edge case in do-block parsing --- core/dbt/clients/_jinja_blocks.py | 3 +++ test/unit/test_jinja.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py index 735a627ce53..55107335290 100644 --- a/core/dbt/clients/_jinja_blocks.py +++ b/core/dbt/clients/_jinja_blocks.py @@ -322,6 +322,9 @@ def find_block(self): if block_type_name == 'do': # if there is a "block_name" in the match groups, we don't expect a # block as the "block name" is actually part of the do-statement. + # we need to do this to handle the (weird and probably wrong!) case + # of a do-statement that is only a single identifier - techincally + # allowed in jinja. (for example, {% do thing %}) expect_block = matchgroups.get('block_name') is None self.handle_do(match, expect_block=expect_block) return True diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py index 81381819dc9..5bcab016666 100644 --- a/test/unit/test_jinja.py +++ b/test/unit/test_jinja.py @@ -215,6 +215,14 @@ def test_do_statement(self): self.assertEqual(blocks[0].full_block, '{% do thing.update() %}') self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + def test_deceptive_do_statement(self): + body = '{% do thing %}{% myblock foo %}hi{% endmyblock %}' + all_blocks = extract_toplevel_blocks(body) + blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data'] + self.assertEqual(len(blocks), 2) + self.assertEqual(blocks[0].full_block, '{% do thing %}') + self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}') + def test_do_block(self): body = '{% do %}thing.update(){% enddo %}{% myblock foo %}hi{% endmyblock %}' all_blocks = extract_toplevel_blocks(body) From f3449dcfcb680e72bc75677b6162750d9329c6c7 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Wed, 17 Apr 2019 08:19:39 -0600 Subject: [PATCH 4/5] add a flag to wrap models/tests in jinja blocks, to see what happens --- core/dbt/flags.py | 5 ++++- core/dbt/main.py | 11 ++++++++++ core/dbt/parser/base.py | 40 +++++++++++++++++++++++++++++++++++++ core/dbt/parser/base_sql.py | 4 ++++ core/dbt/parser/schemas.py | 2 ++ test/integration/base.py | 17 +++++++++++----- 6 files changed, 73 insertions(+), 6 deletions(-) diff --git a/core/dbt/flags.py b/core/dbt/flags.py index 8bf43049a49..dedcae107d0 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -3,13 +3,16 @@ FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False +WRAP_MODELS_IN_TAGS = False def reset(): - global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR + global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \ + WRAP_MODELS_IN_TAGS STRICT_MODE = False NON_DESTRUCTIVE = False FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False + WRAP_MODELS_IN_TAGS = False diff --git a/core/dbt/main.py b/core/dbt/main.py index dd3520af478..bcc8d140f52 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -221,6 +221,8 @@ def update_flags(parsed): elif arg_full_refresh: flags.FULL_REFRESH = True + flags.WRAP_MODELS_IN_TAGS = getattr(parsed, 'wrap_models_in_tags', False) + def _build_base_subparser(): base_subparser = argparse.ArgumentParser(add_help=False) @@ -637,6 +639,15 @@ def parse_args(args): help=argparse.SUPPRESS, ) + # if set, wrap all models and tests in '{% model name %}...{% endmodel %}' + # tags, extract them with the jinja block extractor, and verify that we + # got the original input. This is just a sanity check. + p.add_argument( + '--wrap-models-in-tags', + action='store_true', + help=argparse.SUPPRESS + ) + subs = p.add_subparsers(title="Available sub-commands") base_subparser = _build_base_subparser() diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 8bd6a51c81b..875f513b4d2 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -265,3 +265,43 @@ def parse_node(self, node, node_path, package_project_config, tags=None, parsed_node.validate() return parsed_node + + def check_block_parsing(self, name, path, contents): + if not dbt.flags.WRAP_MODELS_IN_TAGS: + return + + wrapped = ''.join( + ['{% model ', name, ' %}', contents, '{% endmodel %}'] + ) + blocks = dbt.clients.jinja.extract_toplevel_blocks(wrapped) + if len(blocks) == 0: + raise dbt.exceptions.InternalException( + 'Failed to extract node {} ({}) from its wrapped block!' + .format(name, path) + ) + elif len(blocks) > 1: + raise dbt.exceptions.InternalException( + 'Found multiple blocks when parsing node {} ({}) from its ' + 'wrapped block, expected 1!'.format(name, path) + ) + block = blocks[0] + if block.block_type_name != 'model': + raise dbt.exceptions.InternalException( + 'Found invalid block type ("{}") when parsing node {} ({}) ' + 'from its wrapped block, expected "model"' + .format(block.block_type_name, name, path) + ) + + if block.block_name != name: + raise dbt.exceptions.InternalException( + 'Found invalid block name ("{}") when parsing node {} ({}) ' + 'from its wrapped block, expected "{}"' + .format(block.block_name, name, path, name) + ) + + if blocks[0].contents != contents: + raise dbt.exceptions.InternalException( + 'Found invalid block contents when parsing node {} ({}) ' + 'from its wrapped block, expected it to match original file' + .format(name, path) + ) diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py index d412e2e6f9e..99f58354a56 100644 --- a/core/dbt/parser/base_sql.py +++ b/core/dbt/parser/base_sql.py @@ -9,6 +9,7 @@ from dbt.contracts.graph.unparsed import UnparsedNode from dbt.parser.base import MacrosKnownParser +from dbt.node_types import NodeType class BaseSqlParser(MacrosKnownParser): @@ -50,6 +51,9 @@ def load_and_parse(self, package_name, root_dir, relative_dirs, file_match.get('searched_path'), path) + if resource_type == NodeType.Model: + self.check_block_parsing(name, path, file_contents) + result.append({ 'name': name, 'root_path': root_dir, diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index ba548b4a372..3636cb770a6 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -278,6 +278,8 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, full_path = get_pseudo_test_path(full_name, test_path, 'schema_test') raw_sql = self._build_raw_sql(test_namespace, test_target, test_type, test_args) + + self.check_block_parsing(full_name, test_path, raw_sql) unparsed = UnparsedNode( name=full_name, resource_type=NodeType.Test, diff --git a/test/integration/base.py b/test/integration/base.py index 3dcfa193e5c..6dcbf7e2e6d 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -441,16 +441,23 @@ def project_config(self): def profile_config(self): return {} - def run_dbt(self, args=None, expect_pass=True, strict=True): + def run_dbt(self, args=None, expect_pass=True, strict=True, wrap=True): if args is None: args = ["run"] + final_args = [] + if strict: - args = ["--strict"] + args - args.append('--log-cache-events') - logger.info("Invoking dbt with {}".format(args)) + final_args.append('--strict') + if wrap: + final_args.append('--wrap-models-in-tags') + + final_args.extend(args) + final_args.append('--log-cache-events') + + logger.info("Invoking dbt with {}".format(final_args)) - res, success = dbt.handle_and_check(args) + res, success = dbt.handle_and_check(final_args) self.assertEqual( success, expect_pass, "dbt exit state did not match expected") From 4ffc5cbe6a108471808ddcb914fb2c94a8af95e4 Mon Sep 17 00:00:00 2001 From: Jacob Beck Date: Thu, 18 Apr 2019 10:48:00 -0600 Subject: [PATCH 5/5] PR feedback --- core/dbt/flags.py | 6 ++--- core/dbt/main.py | 10 ++++---- core/dbt/parser/base.py | 49 +++++++++---------------------------- core/dbt/parser/base_sql.py | 17 ++++++++++--- core/dbt/parser/schemas.py | 25 +++++++++++++------ test/integration/base.py | 6 ++--- 6 files changed, 53 insertions(+), 60 deletions(-) diff --git a/core/dbt/flags.py b/core/dbt/flags.py index dedcae107d0..0d905598447 100644 --- a/core/dbt/flags.py +++ b/core/dbt/flags.py @@ -3,16 +3,16 @@ FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False -WRAP_MODELS_IN_TAGS = False +TEST_NEW_PARSER = False def reset(): global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \ - WRAP_MODELS_IN_TAGS + TEST_NEW_PARSER STRICT_MODE = False NON_DESTRUCTIVE = False FULL_REFRESH = False USE_CACHE = True WARN_ERROR = False - WRAP_MODELS_IN_TAGS = False + TEST_NEW_PARSER = False diff --git a/core/dbt/main.py b/core/dbt/main.py index bcc8d140f52..c96fd297bcd 100644 --- a/core/dbt/main.py +++ b/core/dbt/main.py @@ -221,7 +221,7 @@ def update_flags(parsed): elif arg_full_refresh: flags.FULL_REFRESH = True - flags.WRAP_MODELS_IN_TAGS = getattr(parsed, 'wrap_models_in_tags', False) + flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False) def _build_base_subparser(): @@ -639,11 +639,11 @@ def parse_args(args): help=argparse.SUPPRESS, ) - # if set, wrap all models and tests in '{% model name %}...{% endmodel %}' - # tags, extract them with the jinja block extractor, and verify that we - # got the original input. This is just a sanity check. + # if set, extract all models and blocks with the jinja block extractor, and + # verify that we don't fail anywhere the actual jinja parser passes. The + # reverse (passing files that ends up failing jinja) is fine. p.add_argument( - '--wrap-models-in-tags', + '--test-new-parser', action='store_true', help=argparse.SUPPRESS ) diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py index 875f513b4d2..4026d4c2620 100644 --- a/core/dbt/parser/base.py +++ b/core/dbt/parser/base.py @@ -267,41 +267,14 @@ def parse_node(self, node, node_path, package_project_config, tags=None, return parsed_node def check_block_parsing(self, name, path, contents): - if not dbt.flags.WRAP_MODELS_IN_TAGS: - return - - wrapped = ''.join( - ['{% model ', name, ' %}', contents, '{% endmodel %}'] - ) - blocks = dbt.clients.jinja.extract_toplevel_blocks(wrapped) - if len(blocks) == 0: - raise dbt.exceptions.InternalException( - 'Failed to extract node {} ({}) from its wrapped block!' - .format(name, path) - ) - elif len(blocks) > 1: - raise dbt.exceptions.InternalException( - 'Found multiple blocks when parsing node {} ({}) from its ' - 'wrapped block, expected 1!'.format(name, path) - ) - block = blocks[0] - if block.block_type_name != 'model': - raise dbt.exceptions.InternalException( - 'Found invalid block type ("{}") when parsing node {} ({}) ' - 'from its wrapped block, expected "model"' - .format(block.block_type_name, name, path) - ) - - if block.block_name != name: - raise dbt.exceptions.InternalException( - 'Found invalid block name ("{}") when parsing node {} ({}) ' - 'from its wrapped block, expected "{}"' - .format(block.block_name, name, path, name) - ) - - if blocks[0].contents != contents: - raise dbt.exceptions.InternalException( - 'Found invalid block contents when parsing node {} ({}) ' - 'from its wrapped block, expected it to match original file' - .format(name, path) - ) + """Check if we were able to extract toplevel blocks from the given + contents. Return True if extraction was successful (no exceptions), + False if it fails. + """ + if not dbt.flags.TEST_NEW_PARSER: + return True + try: + dbt.clients.jinja.extract_toplevel_blocks(contents) + except Exception: + return False + return True diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py index 99f58354a56..b3baae4a380 100644 --- a/core/dbt/parser/base_sql.py +++ b/core/dbt/parser/base_sql.py @@ -51,9 +51,6 @@ def load_and_parse(self, package_name, root_dir, relative_dirs, file_match.get('searched_path'), path) - if resource_type == NodeType.Model: - self.check_block_parsing(name, path, file_contents) - result.append({ 'name': name, 'root_path': root_dir, @@ -78,7 +75,21 @@ def parse_sql_node(self, node_dict, tags=None): node.name) project = self.all_projects.get(package_name) + + parse_ok = True + if node.resource_type == NodeType.Model: + parse_ok = self.check_block_parsing( + node.name, node.original_file_path, node.raw_sql + ) + node_parsed = self.parse_node(node, unique_id, project, tags=tags) + if not parse_ok: + # if we had a parse error in parse_node, we would not get here. So + # this means we rejected a good file :( + raise dbt.exceptions.InternalException( + 'the block parser rejected a good node: {} was marked invalid ' + 'but is actually valid!'.format(node.original_file_path) + ) return unique_id, node_parsed def parse_sql_nodes(self, nodes, tags=None): diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 3636cb770a6..1ac07bb8417 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -279,7 +279,6 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, raw_sql = self._build_raw_sql(test_namespace, test_target, test_type, test_args) - self.check_block_parsing(full_name, test_path, raw_sql) unparsed = UnparsedNode( name=full_name, resource_type=NodeType.Test, @@ -297,13 +296,23 @@ def build_test_node(self, test_target, package_name, test, root_dir, path, node_path = self.get_path(NodeType.Test, unparsed.package_name, unparsed.name) - return self.parse_node(unparsed, - node_path, - source_package, - tags=['schema'], - fqn_extra=None, - fqn=fqn_override, - column_name=column_name) + result = self.parse_node(unparsed, + node_path, + source_package, + tags=['schema'], + fqn_extra=None, + fqn=fqn_override, + column_name=column_name) + + parse_ok = self.check_block_parsing(full_name, test_path, raw_sql) + if not parse_ok: + # if we had a parse error in parse_node, we would not get here. So + # this means we rejected a good file :( + raise dbt.exceptions.InternalException( + 'the block parser rejected a good node: {} was marked invalid ' + 'but is actually valid!'.format(test_path) + ) + return result class SchemaModelParser(SchemaBaseTestParser): diff --git a/test/integration/base.py b/test/integration/base.py index 6dcbf7e2e6d..21bccb9683c 100644 --- a/test/integration/base.py +++ b/test/integration/base.py @@ -441,7 +441,7 @@ def project_config(self): def profile_config(self): return {} - def run_dbt(self, args=None, expect_pass=True, strict=True, wrap=True): + def run_dbt(self, args=None, expect_pass=True, strict=True, parser=True): if args is None: args = ["run"] @@ -449,8 +449,8 @@ def run_dbt(self, args=None, expect_pass=True, strict=True, wrap=True): if strict: final_args.append('--strict') - if wrap: - final_args.append('--wrap-models-in-tags') + if parser: + final_args.append('--test-new-parser') final_args.extend(args) final_args.append('--log-cache-events')