Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add a flag to wrap models/tests in jinja blocks #1407

Merged
merged 5 commits into from
Apr 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 120 additions & 19 deletions core/dbt/clients/_jinja_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ def __init__(self, contents):


class BlockTag(object):
def __init__(self, block_type_name, block_name, contents=None, **kw):
def __init__(self, block_type_name, block_name, contents=None,
full_block=None, **kw):
self.block_type_name = block_type_name
self.block_name = block_name
self.contents = contents
self.full_block = None
self.full_block = full_block

def __str__(self):
return 'BlockTag({!r}, {!r})'.format(self.block_type_name,
Expand Down Expand Up @@ -59,7 +60,7 @@ def end_pat(self):
r'(?:\s+(?P<block_name>({})))?'.format(_NAME_PATTERN),
)))

TAG_CLOSE_PATTERN = regex(r'(?:\-\%\}\s*|\%\})')
TAG_CLOSE_PATTERN = regex(r'(?:(?P<tag_close>(\-\%\}\s*|\%\})))')
# if you do {% materialization foo, adapter="myadapter' %} and end up with
# mismatched quotes this will still match, but jinja will fail somewhere
# since the adapter= argument has to be an adapter name, and none have quotes
Expand Down Expand Up @@ -103,6 +104,13 @@ def end_pat(self):
)


NON_STRING_DO_BLOCK_MEMBER_PATTERN = regex(
# anything, followed by a quote, paren, or a tag end
r'''(.*?)'''
r'''((?P<quote>(['"]))|(?P<open>(\())|(?P<close>(\))))'''
)


class BlockIterator(object):
def __init__(self, data):
self.data = data
Expand Down Expand Up @@ -134,9 +142,7 @@ def _match(self, pattern):
def expect_comment_end(self):
"""Expect a comment end and return the match object.
"""
match = self._match(COMMENT_END_PATTERN)
if match is None:
dbt.exceptions.raise_compiler_error('unexpected EOF, expected #}')
match = self._expect_match('#}', COMMENT_END_PATTERN)
self.advance(match.end())

def expect_raw_end(self):
Expand Down Expand Up @@ -195,7 +201,8 @@ def handle_block(self, match, block_start=None):
while True:
match = self._expect_match(
'"{}"'.format(found.end_block_type_name),
found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN
found.end_pat(), COMMENT_START_PATTERN, RAW_START_PATTERN,
regex('''(?P<quote>(['"]))''')
)
groups = match.groupdict()
if groups.get('endblock') is not None:
Expand All @@ -207,6 +214,10 @@ def handle_block(self, match, block_start=None):
self.expect_comment_end()
elif groups.get('raw_start') is not None:
self.expect_raw_end()
elif groups.get('quote') is not None:
self.rewind()
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
else:
raise dbt.exceptions.InternalException(
'unhandled regex in handle_block, no match: {}'
Expand All @@ -226,10 +237,49 @@ def handle_block(self, match, block_start=None):
def handle_materialization(self, match):
self._expect_match('materialization args',
MATERIALIZATION_ARGS_PATTERN)
self._expect_match('%}', TAG_CLOSE_PATTERN)
endtag = self._expect_match('%}', TAG_CLOSE_PATTERN)
self.advance(endtag.end())
# handle the block we started with!
self.blocks.append(self.handle_block(match))

def handle_do(self, match, expect_block):
if expect_block:
# we might be wrong to expect a block ({% do (...) %}, for example)
# so see if there's more data before the tag closes. if there
# isn't, we expect a block.
close_match = self._expect_match('%}', TAG_CLOSE_PATTERN)
unprocessed = self.data[match.end():close_match.start()].strip()
expect_block = not unprocessed

if expect_block:
# if we're here, expect_block is True and we must have set
# close_match
self.advance(close_match.end())
block = self.handle_block(match)
else:
# we have a do-statement like {% do thing() %}, so no {% enddo %}
# also, we don't want to advance to the end of the match, as it
# might be inside a string or something! So go back and figure out
self._process_rval_components()
block = BlockTag('do', None,
full_block=self.data[match.start():self.pos])
self.blocks.append(block)

def handle_set(self, match):
equal_or_close = self._expect_match('%} or =',
TAG_CLOSE_PATTERN, regex(r'='))
self.advance(equal_or_close.end())
if equal_or_close.groupdict().get('tag_close') is None:
# it's an equals sign, must be like {% set x = 1 %}
self._process_rval_components()
# watch out, order matters here on python 2
block = BlockTag(full_block=self.data[match.start():self.pos],
**match.groupdict())
else:
# it's a tag close, must be like {% set x %}...{% endset %}
block = self.handle_block(match)
self.blocks.append(block)

def find_block(self):
open_block = (
r'(?:\s*\{\%\-|\{\%)\s*'
Expand All @@ -251,21 +301,39 @@ def find_block(self):
# comments are easy
if matchgroups.get('comment_start') is not None:
start = match.start()
self.advance(match.end())
self.expect_comment_end()
self.blocks.append(BlockData(self.data[start:self.pos]))
return True

if matchgroups.get('block_type_name') == 'raw':
block_type_name = matchgroups.get('block_type_name')

if block_type_name == 'raw':
start = match.start()
self.expect_raw_end()
self.blocks.append(BlockData(self.data[start:self.pos]))
return True

if matchgroups.get('block_type_name') == 'materialization':
if block_type_name == 'materialization':
self.advance(match.end())
self.handle_materialization(match)
return True

if block_type_name == 'do':
# if there is a "block_name" in the match groups, we don't expect a
# block as the "block name" is actually part of the do-statement.
# we need to do this to handle the (weird and probably wrong!) case
# of a do-statement that is only a single identifier - techincally
# allowed in jinja. (for example, {% do thing %})
expect_block = matchgroups.get('block_name') is None
self.handle_do(match, expect_block=expect_block)
return True

if block_type_name == 'set':
self.advance(match.end())
self.handle_set(match)
return True

# we're somewhere like this {% block_type_name block_type
# we've either got arguments, a close of tag (%}), or bad input.
# we've handled materializations already (they're weird!)
Expand All @@ -284,6 +352,42 @@ def find_block(self):
self.blocks.append(self.handle_block(match))
return True

def _process_rval_components(self):
"""This is suspiciously similar to _process_macro_default_arg, probably
want to figure out how to merge the two.

Process the rval of an assignment statement or a do-block
"""
while True:
match = self._expect_match(
'do block component',
# you could have a string, though that would be weird
STRING_PATTERN,
# a quote or an open/close parenthesis
NON_STRING_DO_BLOCK_MEMBER_PATTERN,
# a tag close
TAG_CLOSE_PATTERN
)
matchgroups = match.groupdict()
self.advance(match.end())
if matchgroups.get('string') is not None:
continue
elif matchgroups.get('quote') is not None:
self.rewind()
# now look for a string
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
elif matchgroups.get('open'):
self._parenthesis_stack.append(True)
elif matchgroups.get('close'):
self._parenthesis_stack.pop()
elif matchgroups.get('tag_close'):
if self._parenthesis_stack:
msg = ('Found "%}", expected ")"')
dbt.exceptions.raise_compiler_error(msg)
return
# else whitespace

def _process_macro_default_arg(self):
"""Handle the bit after an '=' in a macro default argument. This is
probably the trickiest thing. The goal here is to accept all strings
Expand All @@ -297,7 +401,7 @@ def _process_macro_default_arg(self):
'macro argument',
# you could have a string
STRING_PATTERN,
# a quote, a comma, or a close parenthesis
# a quote, a comma, or a open/close parenthesis
NON_STRING_MACRO_ARGS_PATTERN,
# we want to "match", not "search"
method='match'
Expand All @@ -311,20 +415,17 @@ def _process_macro_default_arg(self):
# we got a bunch of data and then a string opening value.
# put the quote back on the menu
self.rewind()
# if we only got a single quote mark and didn't hit a string
# at all, this file has an unclosed quote. Fail accordingly.
if match.end() - match.start() == 1:
msg = (
'Unclosed quotation mark at position {}. Context:\n{}'
.format(self.pos, self.data[self.pos-20:self.pos+20])
)
dbt.exceptions.raise_compiler_error(msg)
# now look for a string
match = self._expect_match('any string', STRING_PATTERN)
self.advance(match.end())
elif matchgroups.get('comma') is not None:
# small hack: if we hit a comma and there is one parenthesis
# left, return to look for a new name. otherwise we're still
# looking for the parameter close.
if len(self._parenthesis_stack) == 1:
return
elif matchgroups.get('open'):
self._parenthesis_stack.append(True)
elif matchgroups.get('close'):
self._parenthesis_stack.pop()
else:
Expand Down
5 changes: 4 additions & 1 deletion core/dbt/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@
FULL_REFRESH = False
USE_CACHE = True
WARN_ERROR = False
TEST_NEW_PARSER = False


def reset():
global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR
global STRICT_MODE, NON_DESTRUCTIVE, FULL_REFRESH, USE_CACHE, WARN_ERROR, \
TEST_NEW_PARSER

STRICT_MODE = False
NON_DESTRUCTIVE = False
FULL_REFRESH = False
USE_CACHE = True
WARN_ERROR = False
TEST_NEW_PARSER = False
11 changes: 11 additions & 0 deletions core/dbt/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ def update_flags(parsed):
elif arg_full_refresh:
flags.FULL_REFRESH = True

flags.TEST_NEW_PARSER = getattr(parsed, 'test_new_parser', False)


def _build_base_subparser():
base_subparser = argparse.ArgumentParser(add_help=False)
Expand Down Expand Up @@ -637,6 +639,15 @@ def parse_args(args):
help=argparse.SUPPRESS,
)

# if set, extract all models and blocks with the jinja block extractor, and
# verify that we don't fail anywhere the actual jinja parser passes. The
# reverse (passing files that ends up failing jinja) is fine.
p.add_argument(
'--test-new-parser',
action='store_true',
help=argparse.SUPPRESS
)

subs = p.add_subparsers(title="Available sub-commands")

base_subparser = _build_base_subparser()
Expand Down
13 changes: 13 additions & 0 deletions core/dbt/parser/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,16 @@ def parse_node(self, node, node_path, package_project_config, tags=None,
parsed_node.validate()

return parsed_node

def check_block_parsing(self, name, path, contents):
"""Check if we were able to extract toplevel blocks from the given
contents. Return True if extraction was successful (no exceptions),
False if it fails.
"""
if not dbt.flags.TEST_NEW_PARSER:
return True
try:
dbt.clients.jinja.extract_toplevel_blocks(contents)
except Exception:
return False
return True
15 changes: 15 additions & 0 deletions core/dbt/parser/base_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from dbt.contracts.graph.unparsed import UnparsedNode
from dbt.parser.base import MacrosKnownParser
from dbt.node_types import NodeType


class BaseSqlParser(MacrosKnownParser):
Expand Down Expand Up @@ -74,7 +75,21 @@ def parse_sql_node(self, node_dict, tags=None):
node.name)

project = self.all_projects.get(package_name)

parse_ok = True
if node.resource_type == NodeType.Model:
parse_ok = self.check_block_parsing(
node.name, node.original_file_path, node.raw_sql
)

node_parsed = self.parse_node(node, unique_id, project, tags=tags)
if not parse_ok:
# if we had a parse error in parse_node, we would not get here. So
# this means we rejected a good file :(
raise dbt.exceptions.InternalException(
'the block parser rejected a good node: {} was marked invalid '
'but is actually valid!'.format(node.original_file_path)
)
return unique_id, node_parsed

def parse_sql_nodes(self, nodes, tags=None):
Expand Down
25 changes: 18 additions & 7 deletions core/dbt/parser/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
full_path = get_pseudo_test_path(full_name, test_path, 'schema_test')
raw_sql = self._build_raw_sql(test_namespace, test_target, test_type,
test_args)

unparsed = UnparsedNode(
name=full_name,
resource_type=NodeType.Test,
Expand All @@ -295,13 +296,23 @@ def build_test_node(self, test_target, package_name, test, root_dir, path,
node_path = self.get_path(NodeType.Test, unparsed.package_name,
unparsed.name)

return self.parse_node(unparsed,
node_path,
source_package,
tags=['schema'],
fqn_extra=None,
fqn=fqn_override,
column_name=column_name)
result = self.parse_node(unparsed,
node_path,
source_package,
tags=['schema'],
fqn_extra=None,
fqn=fqn_override,
column_name=column_name)

parse_ok = self.check_block_parsing(full_name, test_path, raw_sql)
if not parse_ok:
# if we had a parse error in parse_node, we would not get here. So
# this means we rejected a good file :(
raise dbt.exceptions.InternalException(
'the block parser rejected a good node: {} was marked invalid '
'but is actually valid!'.format(test_path)
)
return result


class SchemaModelParser(SchemaBaseTestParser):
Expand Down
17 changes: 12 additions & 5 deletions test/integration/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,16 +441,23 @@ def project_config(self):
def profile_config(self):
return {}

def run_dbt(self, args=None, expect_pass=True, strict=True):
def run_dbt(self, args=None, expect_pass=True, strict=True, parser=True):
if args is None:
args = ["run"]

final_args = []

if strict:
args = ["--strict"] + args
args.append('--log-cache-events')
logger.info("Invoking dbt with {}".format(args))
final_args.append('--strict')
if parser:
final_args.append('--test-new-parser')

final_args.extend(args)
final_args.append('--log-cache-events')

logger.info("Invoking dbt with {}".format(final_args))

res, success = dbt.handle_and_check(args)
res, success = dbt.handle_and_check(final_args)
self.assertEqual(
success, expect_pass,
"dbt exit state did not match expected")
Expand Down
Loading