From ac71888236406d24b9ffeea6ba271b1d1371d9c9 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Mon, 15 Apr 2019 10:58:40 -0600
Subject: [PATCH 1/5] add tests

test for comment blocks where the first character of the comment is "}"
do/set tests
an even trickier test case
---
 test/unit/test_jinja.py | 129 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py
index 6fee7bfa132..81381819dc9 100644
--- a/test/unit/test_jinja.py
+++ b/test/unit/test_jinja.py
@@ -161,6 +161,108 @@ def test_comment_only(self):
         blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
         self.assertEqual(len(blocks), 0)
 
+    def test_comment_block_self_closing(self):
+        # test the case where a comment start looks a lot like it closes itself
+        # (but it doesn't in jinja!)
+        body = '{#} {% myblock foo %} {#}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 0)
+
+    def test_embedded_self_closing_comment_block(self):
+        body = '{% myblock foo %} {#}{% endmyblock %} {#}{% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 1)
+        self.assertEqual(blocks[0].full_block, body)
+        self.assertEqual(blocks[0].contents, ' {#}{% endmyblock %} {#}')
+
+    def test_set_statement(self):
+        body = '{% set x = 1 %}{% myblock foo %}hi{% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 2)
+        self.assertEqual(blocks[0].full_block, '{% set x = 1 %}')
+        self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}')
+
+    def test_set_block(self):
+        body = '{% set x %}1{% endset %}{% myblock foo %}hi{% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 2)
+        self.assertEqual(blocks[0].contents, '1')
+        self.assertEqual(blocks[0].block_type_name, 'set')
+        self.assertEqual(blocks[0].block_name, 'x')
+        self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}')
+
+    def test_crazy_set_statement(self):
+        body = '{% set x = (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% set y = otherthing("{% myblock foo %}") %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 3)
+        self.assertEqual(blocks[0].full_block, '{% set x = (thing("{% myblock foo %}")) %}')
+        self.assertEqual(blocks[0].block_type_name, 'set')
+        self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}')
+        self.assertEqual(blocks[1].block_type_name, 'otherblock')
+        self.assertEqual(blocks[2].full_block, '{% set y = otherthing("{% myblock foo %}") %}')
+        self.assertEqual(blocks[2].block_type_name, 'set')
+
+    def test_do_statement(self):
+        body = '{% do thing.update() %}{% myblock foo %}hi{% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 2)
+        self.assertEqual(blocks[0].full_block, '{% do thing.update() %}')
+        self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}')
+
+    def test_do_block(self):
+        body = '{% do %}thing.update(){% enddo %}{% myblock foo %}hi{% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 2)
+        self.assertEqual(blocks[0].contents, 'thing.update()')
+        self.assertEqual(blocks[0].block_type_name, 'do')
+        self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}')
+
+    def test_crazy_do_statement(self):
+        body = '{% do (thing("{% myblock foo %}")) %}{% otherblock bar %}x{% endotherblock %}{% do otherthing("{% myblock foo %}") %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 3)
+        self.assertEqual(blocks[0].full_block, '{% do (thing("{% myblock foo %}")) %}')
+        self.assertEqual(blocks[0].block_type_name, 'do')
+        self.assertEqual(blocks[1].full_block, '{% otherblock bar %}x{% endotherblock %}')
+        self.assertEqual(blocks[1].block_type_name, 'otherblock')
+        self.assertEqual(blocks[2].full_block, '{% do otherthing("{% myblock foo %}") %}')
+        self.assertEqual(blocks[2].block_type_name, 'do')
+
+    def test_awful_jinja(self):
+        all_blocks = extract_toplevel_blocks(if_you_do_this_you_are_awful)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 4)
+        self.assertEqual(blocks[0].block_type_name, 'do')
+        self.assertEqual(blocks[0].full_block, '''{% do\n    set('foo="bar"')\n%}''')
+        self.assertEqual(blocks[1].block_type_name, 'set')
+        self.assertEqual(blocks[1].full_block, '''{% set x = ("100" + "hello'" + '%}') %}''')
+        self.assertEqual(blocks[2].block_type_name, 'archive')
+        self.assertEqual(blocks[2].contents, '\n    '.join([
+            '''{% set x = ("{% endarchive %}" + (40 * '%})')) %}''',
+            '{# {% endarchive %} #}',
+            '{% embedded %}',
+            '    some block data right here',
+            '{% endembedded %}'
+        ]))
+        self.assertEqual(blocks[3].block_type_name, 'materialization')
+        self.assertEqual(blocks[3].contents, '\nhi\n')
+
+    def test_quoted_endblock_within_block(self):
+        body = '{% myblock something -%}  {% set x = ("{% endmyblock %}") %}  {% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 1)
+        self.assertEqual(blocks[0].block_type_name, 'myblock')
+        self.assertEqual(blocks[0].contents, '{% set x = ("{% endmyblock %}") %}  ')
+
 bar_block = '''{% mytype bar %}
 {# a comment
     that inside it has
@@ -188,3 +290,30 @@ def test_comment_only(self):
 {% mytype foo %} some stuff {% endmytype %}
 
 '''+bar_block+x_block
+
+
+if_you_do_this_you_are_awful = '''
+{#} here is a comment with a block inside {% block x %} asdf {% endblock %} {#}
+{% do
+    set('foo="bar"')
+%}
+{% set x = ("100" + "hello'" + '%}') %}
+{% archive something -%}
+    {% set x = ("{% endarchive %}" + (40 * '%})')) %}
+    {# {% endarchive %} #}
+    {% embedded %}
+        some block data right here
+    {% endembedded %}
+{%- endarchive %}
+
+{% raw %}
+    {% set x = SYNTAX ERROR}
+{% endraw %}
+
+
+{% materialization whatever, adapter='thing' %}
+hi
+{% endmaterialization %}
+'''
+
+

From 9b1aede911304db8bdf2056a11e7da493795f5ce Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Mon, 15 Apr 2019 11:59:53 -0600
Subject: [PATCH 2/5] Fix a number of bugs

After we find the start of a comment block, advance immediately
 - this is so we do not mistake "{#}" as both open and close of comment
support do/set statements (no {% enddo %}/{% endset %})
fix some edge-case bugs around quoting
fix a bug around materialization parsing
---
 core/dbt/clients/_jinja_blocks.py | 136 +++++++++++++++++++++++++-----
 1 file changed, 117 insertions(+), 19 deletions(-)

diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py
index 1da2f5c719a..735a627ce53 100644
--- a/core/dbt/clients/_jinja_blocks.py
+++ b/core/dbt/clients/_jinja_blocks.py
@@ -16,11 +16,12 @@ def __init__(self, contents):
 
 
 class BlockTag(object):
-    def __init__(self, block_type_name, block_name, contents=None, **kw):
+    def __init__(self, block_type_name, block_name, contents=None,
+                 full_block=None, **kw):
         self.block_type_name = block_type_name
         self.block_name = block_name
         self.contents = contents
-        self.full_block = None
+        self.full_block = full_block
 
     def __str__(self):
         return 'BlockTag({!r}, {!r})'.format(self.block_type_name,
@@ -59,7 +60,7 @@ def end_pat(self):
     r'(?:\s+(?P<block_name>({})))?'.format(_NAME_PATTERN),
 )))
 
-TAG_CLOSE_PATTERN = regex(r'(?:\-\%\}\s*|\%\})')
+TAG_CLOSE_PATTERN = regex(r'(?:(?P<tag_close>(\-\%\}\s*|\%\})))')
 # if you do {% materialization foo, adapter="myadapter' %} and end up with
 # mismatched quotes this will still match, but jinja will fail somewhere
 # since the adapter= argument has to be an adapter name, and none have quotes
@@ -103,6 +104,13 @@ def end_pat(self):
 )
 
 
+NON_STRING_DO_BLOCK_MEMBER_PATTERN = regex(
+    # anything, followed by a quote, paren, or a tag end
+    r'''(.*?)'''
+    r'''((?P<quote>(['"]))|(?P<open>(\())|(?P<close>(\))))'''
+)
+
+
 class BlockIterator(object):
     def __init__(self, data):
         self.data = data
@@ -134,9 +142,7 @@ def _match(self, pattern):
     def expect_comment_end(self):
         """Expect a comment end and return the match object.
         """
-        match = self._match(COMMENT_END_PATTERN)
-        if match is None:
-            dbt.exceptions.raise_compiler_error('unexpected EOF, expected #}')
+        match = self._expect_match('#}', COMMENT_END_PATTERN)
         self.advance(match.end())
 
     def expect_raw_end(self):
@@ -195,7 +201,8 @@ def handle_block(self, match, block_start=None):
         while True:
             match = self._expect_match(
                 '"{}"'.format(found.end_block_type_name),
-                found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN
+                found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN,
+                regex('''(?P<quote>(['"]))''')
             )
             groups = match.groupdict()
             if groups.get('endblock') is not None:
@@ -207,6 +214,10 @@ def handle_block(self, match, block_start=None):
                 self.expect_comment_end()
             elif groups.get('raw_start') is not None:
                 self.expect_raw_end()
+            elif groups.get('quote') is not None:
+                self.rewind()
+                match = self._expect_match('any string', STRING_PATTERN)
+                self.advance(match.end())
             else:
                 raise dbt.exceptions.InternalException(
                     'unhandled regex in handle_block, no match: {}'
@@ -226,10 +237,49 @@ def handle_block(self, match, block_start=None):
     def handle_materialization(self, match):
         self._expect_match('materialization args',
                            MATERIALIZATION_ARGS_PATTERN)
-        self._expect_match('%}', TAG_CLOSE_PATTERN)
+        endtag = self._expect_match('%}', TAG_CLOSE_PATTERN)
+        self.advance(endtag.end())
         # handle the block we started with!
         self.blocks.append(self.handle_block(match))
 
+    def handle_do(self, match, expect_block):
+        if expect_block:
+            # we might be wrong to expect a block ({% do (...) %}, for example)
+            # so see if there's more data before the tag closes. if there
+            # isn't, we expect a block.
+            close_match = self._expect_match('%}', TAG_CLOSE_PATTERN)
+            unprocessed = self.data[match.end():close_match.start()].strip()
+            expect_block = not unprocessed
+
+        if expect_block:
+            # if we're here, expect_block is True and we must have set
+            # close_match
+            self.advance(close_match.end())
+            block = self.handle_block(match)
+        else:
+            # we have a do-statement like {% do thing() %}, so no {% enddo %}
+            # also, we don't want to advance to the end of the match, as it
+            # might be inside a string or something! So go back and figure out
+            self._process_rval_components()
+            block = BlockTag('do', None,
+                             full_block=self.data[match.start():self.pos])
+        self.blocks.append(block)
+
+    def handle_set(self, match):
+        equal_or_close = self._expect_match('%} or =',
+                                            TAG_CLOSE_PATTERN, regex(r'='))
+        self.advance(equal_or_close.end())
+        if equal_or_close.groupdict().get('tag_close') is None:
+            # it's an equals sign, must be like {% set x = 1 %}
+            self._process_rval_components()
+            # watch out, order matters here on python 2
+            block = BlockTag(full_block=self.data[match.start():self.pos],
+                             **match.groupdict())
+        else:
+            # it's a tag close, must be like {% set x %}...{% endset %}
+            block = self.handle_block(match)
+        self.blocks.append(block)
+
     def find_block(self):
         open_block = (
             r'(?:\s*\{\%\-|\{\%)\s*'
@@ -251,21 +301,36 @@ def find_block(self):
         # comments are easy
         if matchgroups.get('comment_start') is not None:
             start = match.start()
+            self.advance(match.end())
             self.expect_comment_end()
             self.blocks.append(BlockData(self.data[start:self.pos]))
             return True
 
-        if matchgroups.get('block_type_name') == 'raw':
+        block_type_name = matchgroups.get('block_type_name')
+
+        if block_type_name == 'raw':
             start = match.start()
             self.expect_raw_end()
             self.blocks.append(BlockData(self.data[start:self.pos]))
             return True
 
-        if matchgroups.get('block_type_name') == 'materialization':
+        if block_type_name == 'materialization':
             self.advance(match.end())
             self.handle_materialization(match)
             return True
 
+        if block_type_name == 'do':
+            # if there is a "block_name" in the match groups, we don't expect a
+            # block as the "block name" is actually part of the do-statement.
+            expect_block = matchgroups.get('block_name') is None
+            self.handle_do(match, expect_block=expect_block)
+            return True
+
+        if block_type_name == 'set':
+            self.advance(match.end())
+            self.handle_set(match)
+            return True
+
         # we're somewhere like this {% block_type_name block_type
         # we've either got arguments, a close of tag (%}), or bad input.
         # we've handled materializations already (they're weird!)
@@ -284,6 +349,42 @@ def find_block(self):
         self.blocks.append(self.handle_block(match))
         return True
 
+    def _process_rval_components(self):
+        """This is suspiciously similar to _process_macro_default_arg, probably
+        want to figure out how to merge the two.
+
+        Process the rval of an assignment statement or a do-block
+        """
+        while True:
+            match = self._expect_match(
+                'do block component',
+                # you could have a string, though that would be weird
+                STRING_PATTERN,
+                # a quote or an open/close parenthesis
+                NON_STRING_DO_BLOCK_MEMBER_PATTERN,
+                # a tag close
+                TAG_CLOSE_PATTERN
+            )
+            matchgroups = match.groupdict()
+            self.advance(match.end())
+            if matchgroups.get('string') is not None:
+                continue
+            elif matchgroups.get('quote') is not None:
+                self.rewind()
+                # now look for a string
+                match = self._expect_match('any string', STRING_PATTERN)
+                self.advance(match.end())
+            elif matchgroups.get('open'):
+                self._parenthesis_stack.append(True)
+            elif matchgroups.get('close'):
+                self._parenthesis_stack.pop()
+            elif matchgroups.get('tag_close'):
+                if self._parenthesis_stack:
+                    msg = ('Found "%}", expected ")"')
+                    dbt.exceptions.raise_compiler_error(msg)
+                return
+            # else whitespace
+
     def _process_macro_default_arg(self):
         """Handle the bit after an '=' in a macro default argument. This is
         probably the trickiest thing. The goal here is to accept all strings
@@ -297,7 +398,7 @@ def _process_macro_default_arg(self):
                 'macro argument',
                 # you could have a string
                 STRING_PATTERN,
-                # a quote, a comma, or a close parenthesis
+                # a quote, a comma, or a open/close parenthesis
                 NON_STRING_MACRO_ARGS_PATTERN,
                 # we want to "match", not "search"
                 method='match'
@@ -311,20 +412,17 @@ def _process_macro_default_arg(self):
                 # we got a bunch of data and then a string opening value.
                 # put the quote back on the menu
                 self.rewind()
-                # if we only got a single quote mark and didn't hit a string
-                # at all, this file has an unclosed quote. Fail accordingly.
-                if match.end() - match.start() == 1:
-                    msg = (
-                        'Unclosed quotation mark at position {}. Context:\n{}'
-                        .format(self.pos, self.data[self.pos-20:self.pos+20])
-                    )
-                    dbt.exceptions.raise_compiler_error(msg)
+                # now look for a string
+                match = self._expect_match('any string', STRING_PATTERN)
+                self.advance(match.end())
             elif matchgroups.get('comma') is not None:
                 # small hack: if we hit a comma and there is one parenthesis
                 # left, return to look for a new name. otherwise we're still
                 # looking for the parameter close.
                 if len(self._parenthesis_stack) == 1:
                     return
+            elif matchgroups.get('open'):
+                self._parenthesis_stack.append(True)
             elif matchgroups.get('close'):
                 self._parenthesis_stack.pop()
             else:

From 4e8c7b9216265365274fb175a55e7a53b45d9fd7 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Thu, 18 Apr 2019 09:26:55 -0600
Subject: [PATCH 3/5] add a test and a comment about an odd edge case in
 do-block parsing

---
 core/dbt/clients/_jinja_blocks.py | 3 +++
 test/unit/test_jinja.py           | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/core/dbt/clients/_jinja_blocks.py b/core/dbt/clients/_jinja_blocks.py
index 735a627ce53..55107335290 100644
--- a/core/dbt/clients/_jinja_blocks.py
+++ b/core/dbt/clients/_jinja_blocks.py
@@ -322,6 +322,9 @@ def find_block(self):
         if block_type_name == 'do':
             # if there is a "block_name" in the match groups, we don't expect a
             # block as the "block name" is actually part of the do-statement.
+            # we need to do this to handle the (weird and probably wrong!) case
+            # of a do-statement that is only a single identifier - techincally
+            # allowed in jinja. (for example, {% do thing %})
             expect_block = matchgroups.get('block_name') is None
             self.handle_do(match, expect_block=expect_block)
             return True
diff --git a/test/unit/test_jinja.py b/test/unit/test_jinja.py
index 81381819dc9..5bcab016666 100644
--- a/test/unit/test_jinja.py
+++ b/test/unit/test_jinja.py
@@ -215,6 +215,14 @@ def test_do_statement(self):
         self.assertEqual(blocks[0].full_block, '{% do thing.update() %}')
         self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}')
 
+    def test_deceptive_do_statement(self):
+        body = '{% do thing %}{% myblock foo %}hi{% endmyblock %}'
+        all_blocks = extract_toplevel_blocks(body)
+        blocks = [b for b in all_blocks if b.block_type_name != '__dbt__data']
+        self.assertEqual(len(blocks), 2)
+        self.assertEqual(blocks[0].full_block, '{% do thing %}')
+        self.assertEqual(blocks[1].full_block, '{% myblock foo %}hi{% endmyblock %}')
+
     def test_do_block(self):
         body = '{% do %}thing.update(){% enddo %}{% myblock foo %}hi{% endmyblock %}'
         all_blocks = extract_toplevel_blocks(body)

From f3449dcfcb680e72bc75677b6162750d9329c6c7 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Wed, 17 Apr 2019 08:19:39 -0600
Subject: [PATCH 4/5] add a flag to wrap models/tests in jinja blocks, to see
 what happens

---
 core/dbt/flags.py           |  5 ++++-
 core/dbt/main.py            | 11 ++++++++++
 core/dbt/parser/base.py     | 40 +++++++++++++++++++++++++++++++++++++
 core/dbt/parser/base_sql.py |  4 ++++
 core/dbt/parser/schemas.py  |  2 ++
 test/integration/base.py    | 17 +++++++++++-----
 6 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/core/dbt/flags.py b/core/dbt/flags.py
index 8bf43049a49..dedcae107d0 100644
--- a/core/dbt/flags.py
+++ b/core/dbt/flags.py
@@ -3,13 +3,16 @@
 FULL_REFRESH = False
 USE_CACHE = True
 WARN_ERROR = False
+WRAP_MODELS_IN_TAGS = False
 
 
 def reset():
-    global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR
+    global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \
+        WRAP_MODELS_IN_TAGS
 
     STRICT_MODE = False
     NON_DESTRUCTIVE = False
     FULL_REFRESH = False
     USE_CACHE = True
     WARN_ERROR = False
+    WRAP_MODELS_IN_TAGS = False
diff --git a/core/dbt/main.py b/core/dbt/main.py
index dd3520af478..bcc8d140f52 100644
--- a/core/dbt/main.py
+++ b/core/dbt/main.py
@@ -221,6 +221,8 @@ def update_flags(parsed):
     elif arg_full_refresh:
         flags.FULL_REFRESH = True
 
+    flags.WRAP_MODELS_IN_TAGS = getattr(parsed, 'wrap_models_in_tags', False)
+
 
 def _build_base_subparser():
     base_subparser = argparse.ArgumentParser(add_help=False)
@@ -637,6 +639,15 @@ def parse_args(args):
         help=argparse.SUPPRESS,
     )
 
+    # if set, wrap all models and tests in '{% model name %}...{% endmodel %}'
+    # tags, extract them with the jinja block extractor, and verify that we
+    # got the original input. This is just a sanity check.
+    p.add_argument(
+        '--wrap-models-in-tags',
+        action='store_true',
+        help=argparse.SUPPRESS
+    )
+
     subs = p.add_subparsers(title="Available sub-commands")
 
     base_subparser = _build_base_subparser()
diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py
index 8bd6a51c81b..875f513b4d2 100644
--- a/core/dbt/parser/base.py
+++ b/core/dbt/parser/base.py
@@ -265,3 +265,43 @@ def parse_node(self, node, node_path, package_project_config, tags=None,
         parsed_node.validate()
 
         return parsed_node
+
+    def check_block_parsing(self, name, path, contents):
+        if not dbt.flags.WRAP_MODELS_IN_TAGS:
+            return
+
+        wrapped = ''.join(
+            ['{% model ', name, ' %}', contents, '{% endmodel %}']
+        )
+        blocks = dbt.clients.jinja.extract_toplevel_blocks(wrapped)
+        if len(blocks) == 0:
+            raise dbt.exceptions.InternalException(
+                'Failed to extract node {} ({}) from its wrapped block!'
+                .format(name, path)
+            )
+        elif len(blocks) > 1:
+            raise dbt.exceptions.InternalException(
+                'Found multiple blocks when parsing node {} ({}) from its '
+                'wrapped block, expected 1!'.format(name, path)
+            )
+        block = blocks[0]
+        if block.block_type_name != 'model':
+            raise dbt.exceptions.InternalException(
+                'Found invalid block type ("{}") when parsing node {} ({}) '
+                'from its wrapped block, expected "model"'
+                .format(block.block_type_name, name, path)
+            )
+
+        if block.block_name != name:
+            raise dbt.exceptions.InternalException(
+                'Found invalid block name ("{}") when parsing node {} ({}) '
+                'from its wrapped block, expected "{}"'
+                .format(block.block_name, name, path, name)
+            )
+
+        if blocks[0].contents != contents:
+            raise dbt.exceptions.InternalException(
+                'Found invalid block contents when parsing node {} ({}) '
+                'from its wrapped block, expected it to match original file'
+                .format(name, path)
+            )
diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py
index d412e2e6f9e..99f58354a56 100644
--- a/core/dbt/parser/base_sql.py
+++ b/core/dbt/parser/base_sql.py
@@ -9,6 +9,7 @@
 
 from dbt.contracts.graph.unparsed import UnparsedNode
 from dbt.parser.base import MacrosKnownParser
+from dbt.node_types import NodeType
 
 
 class BaseSqlParser(MacrosKnownParser):
@@ -50,6 +51,9 @@ def load_and_parse(self, package_name, root_dir, relative_dirs,
                 file_match.get('searched_path'),
                 path)
 
+            if resource_type == NodeType.Model:
+                self.check_block_parsing(name, path, file_contents)
+
             result.append({
                 'name': name,
                 'root_path': root_dir,
diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py
index ba548b4a372..3636cb770a6 100644
--- a/core/dbt/parser/schemas.py
+++ b/core/dbt/parser/schemas.py
@@ -278,6 +278,8 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
         full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
         raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
                                       test_args)
+
+        self.check_block_parsing(full_name, test_path, raw_sql)
         unparsed = UnparsedNode(
             name=full_name,
             resource_type=NodeType.Test,
diff --git a/test/integration/base.py b/test/integration/base.py
index 3dcfa193e5c..6dcbf7e2e6d 100644
--- a/test/integration/base.py
+++ b/test/integration/base.py
@@ -441,16 +441,23 @@ def project_config(self):
     def profile_config(self):
         return {}
 
-    def run_dbt(self, args=None, expect_pass=True, strict=True):
+    def run_dbt(self, args=None, expect_pass=True, strict=True, wrap=True):
         if args is None:
             args = ["run"]
 
+        final_args = []
+
         if strict:
-            args = ["--strict"] + args
-        args.append('--log-cache-events')
-        logger.info("Invoking dbt with {}".format(args))
+            final_args.append('--strict')
+        if wrap:
+            final_args.append('--wrap-models-in-tags')
+
+        final_args.extend(args)
+        final_args.append('--log-cache-events')
+
+        logger.info("Invoking dbt with {}".format(final_args))
 
-        res, success = dbt.handle_and_check(args)
+        res, success = dbt.handle_and_check(final_args)
         self.assertEqual(
             success, expect_pass,
             "dbt exit state did not match expected")

From 4ffc5cbe6a108471808ddcb914fb2c94a8af95e4 Mon Sep 17 00:00:00 2001
From: Jacob Beck <jake@fishtownanalytics.com>
Date: Thu, 18 Apr 2019 10:48:00 -0600
Subject: [PATCH 5/5] PR feedback

---
 core/dbt/flags.py           |  6 ++---
 core/dbt/main.py            | 10 ++++----
 core/dbt/parser/base.py     | 49 +++++++++----------------------------
 core/dbt/parser/base_sql.py | 17 ++++++++++---
 core/dbt/parser/schemas.py  | 25 +++++++++++++------
 test/integration/base.py    |  6 ++---
 6 files changed, 53 insertions(+), 60 deletions(-)

diff --git a/core/dbt/flags.py b/core/dbt/flags.py
index dedcae107d0..0d905598447 100644
--- a/core/dbt/flags.py
+++ b/core/dbt/flags.py
@@ -3,16 +3,16 @@
 FULL_REFRESH = False
 USE_CACHE = True
 WARN_ERROR = False
-WRAP_MODELS_IN_TAGS = False
+TEST_NEW_PARSER = False
 
 
 def reset():
     global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \
-        WRAP_MODELS_IN_TAGS
+        TEST_NEW_PARSER
 
     STRICT_MODE = False
     NON_DESTRUCTIVE = False
     FULL_REFRESH = False
     USE_CACHE = True
     WARN_ERROR = False
-    WRAP_MODELS_IN_TAGS = False
+    TEST_NEW_PARSER = False
diff --git a/core/dbt/main.py b/core/dbt/main.py
index bcc8d140f52..c96fd297bcd 100644
--- a/core/dbt/main.py
+++ b/core/dbt/main.py
@@ -221,7 +221,7 @@ def update_flags(parsed):
     elif arg_full_refresh:
         flags.FULL_REFRESH = True
 
-    flags.WRAP_MODELS_IN_TAGS = getattr(parsed, 'wrap_models_in_tags', False)
+    flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False)
 
 
 def _build_base_subparser():
@@ -639,11 +639,11 @@ def parse_args(args):
         help=argparse.SUPPRESS,
     )
 
-    # if set, wrap all models and tests in '{% model name %}...{% endmodel %}'
-    # tags, extract them with the jinja block extractor, and verify that we
-    # got the original input. This is just a sanity check.
+    # if set, extract all models and blocks with the jinja block extractor, and
+    # verify that we don't fail anywhere the actual jinja parser passes. The
+    # reverse (passing files that ends up failing jinja) is fine.
     p.add_argument(
-        '--wrap-models-in-tags',
+        '--test-new-parser',
         action='store_true',
         help=argparse.SUPPRESS
     )
diff --git a/core/dbt/parser/base.py b/core/dbt/parser/base.py
index 875f513b4d2..4026d4c2620 100644
--- a/core/dbt/parser/base.py
+++ b/core/dbt/parser/base.py
@@ -267,41 +267,14 @@ def parse_node(self, node, node_path, package_project_config, tags=None,
         return parsed_node
 
     def check_block_parsing(self, name, path, contents):
-        if not dbt.flags.WRAP_MODELS_IN_TAGS:
-            return
-
-        wrapped = ''.join(
-            ['{% model ', name, ' %}', contents, '{% endmodel %}']
-        )
-        blocks = dbt.clients.jinja.extract_toplevel_blocks(wrapped)
-        if len(blocks) == 0:
-            raise dbt.exceptions.InternalException(
-                'Failed to extract node {} ({}) from its wrapped block!'
-                .format(name, path)
-            )
-        elif len(blocks) > 1:
-            raise dbt.exceptions.InternalException(
-                'Found multiple blocks when parsing node {} ({}) from its '
-                'wrapped block, expected 1!'.format(name, path)
-            )
-        block = blocks[0]
-        if block.block_type_name != 'model':
-            raise dbt.exceptions.InternalException(
-                'Found invalid block type ("{}") when parsing node {} ({}) '
-                'from its wrapped block, expected "model"'
-                .format(block.block_type_name, name, path)
-            )
-
-        if block.block_name != name:
-            raise dbt.exceptions.InternalException(
-                'Found invalid block name ("{}") when parsing node {} ({}) '
-                'from its wrapped block, expected "{}"'
-                .format(block.block_name, name, path, name)
-            )
-
-        if blocks[0].contents != contents:
-            raise dbt.exceptions.InternalException(
-                'Found invalid block contents when parsing node {} ({}) '
-                'from its wrapped block, expected it to match original file'
-                .format(name, path)
-            )
+        """Check if we were able to extract toplevel blocks from the given
+        contents. Return True if extraction was successful (no exceptions),
+        False if it fails.
+        """
+        if not dbt.flags.TEST_NEW_PARSER:
+            return True
+        try:
+            dbt.clients.jinja.extract_toplevel_blocks(contents)
+        except Exception:
+            return False
+        return True
diff --git a/core/dbt/parser/base_sql.py b/core/dbt/parser/base_sql.py
index 99f58354a56..b3baae4a380 100644
--- a/core/dbt/parser/base_sql.py
+++ b/core/dbt/parser/base_sql.py
@@ -51,9 +51,6 @@ def load_and_parse(self, package_name, root_dir, relative_dirs,
                 file_match.get('searched_path'),
                 path)
 
-            if resource_type == NodeType.Model:
-                self.check_block_parsing(name, path, file_contents)
-
             result.append({
                 'name': name,
                 'root_path': root_dir,
@@ -78,7 +75,21 @@ def parse_sql_node(self, node_dict, tags=None):
                                   node.name)
 
         project = self.all_projects.get(package_name)
+
+        parse_ok = True
+        if node.resource_type == NodeType.Model:
+            parse_ok = self.check_block_parsing(
+                node.name, node.original_file_path, node.raw_sql
+            )
+
         node_parsed = self.parse_node(node, unique_id, project, tags=tags)
+        if not parse_ok:
+            # if we had a parse error in parse_node, we would not get here. So
+            # this means we rejected a good file :(
+            raise dbt.exceptions.InternalException(
+                'the block parser rejected a good node: {} was marked invalid '
+                'but is actually valid!'.format(node.original_file_path)
+            )
         return unique_id, node_parsed
 
     def parse_sql_nodes(self, nodes, tags=None):
diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py
index 3636cb770a6..1ac07bb8417 100644
--- a/core/dbt/parser/schemas.py
+++ b/core/dbt/parser/schemas.py
@@ -279,7 +279,6 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
         raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
                                       test_args)
 
-        self.check_block_parsing(full_name, test_path, raw_sql)
         unparsed = UnparsedNode(
             name=full_name,
             resource_type=NodeType.Test,
@@ -297,13 +296,23 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
         node_path = self.get_path(NodeType.Test, unparsed.package_name,
                                   unparsed.name)
 
-        return self.parse_node(unparsed,
-                               node_path,
-                               source_package,
-                               tags=['schema'],
-                               fqn_extra=None,
-                               fqn=fqn_override,
-                               column_name=column_name)
+        result = self.parse_node(unparsed,
+                                 node_path,
+                                 source_package,
+                                 tags=['schema'],
+                                 fqn_extra=None,
+                                 fqn=fqn_override,
+                                 column_name=column_name)
+
+        parse_ok = self.check_block_parsing(full_name, test_path, raw_sql)
+        if not parse_ok:
+            # if we had a parse error in parse_node, we would not get here. So
+            # this means we rejected a good file :(
+            raise dbt.exceptions.InternalException(
+                'the block parser rejected a good node: {} was marked invalid '
+                'but is actually valid!'.format(test_path)
+            )
+        return result
 
 
 class SchemaModelParser(SchemaBaseTestParser):
diff --git a/test/integration/base.py b/test/integration/base.py
index 6dcbf7e2e6d..21bccb9683c 100644
--- a/test/integration/base.py
+++ b/test/integration/base.py
@@ -441,7 +441,7 @@ def project_config(self):
     def profile_config(self):
         return {}
 
-    def run_dbt(self, args=None, expect_pass=True, strict=True, wrap=True):
+    def run_dbt(self, args=None, expect_pass=True, strict=True, parser=True):
         if args is None:
             args = ["run"]
 
@@ -449,8 +449,8 @@ def run_dbt(self, args=None, expect_pass=True, strict=True, wrap=True):
 
         if strict:
             final_args.append('--strict')
-        if wrap:
-            final_args.append('--wrap-models-in-tags')
+        if parser:
+            final_args.append('--test-new-parser')
 
         final_args.extend(args)
         final_args.append('--log-cache-events')