Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add parser #292

Merged
merged 27 commits into from
Mar 2, 2017
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
78e2353
first pass at wiring in parser
Feb 20, 2017
39e2c1c
add fqn to model representation, rewiring some of the compiler
Feb 20, 2017
6658d87
Merge branch 'development' of github.com:analyst-collective/dbt into …
Feb 22, 2017
beedfd5
almost there
Feb 23, 2017
2473b12
down to 10 integration test failures
Feb 23, 2017
50d9896
schema and data tests running in parser
Feb 26, 2017
a125043
archive passing, hooks not so much
Feb 26, 2017
5a64113
integration tests passing!
Feb 26, 2017
f34892f
remove runners (they are unused now)
Feb 26, 2017
1a2ada7
remove get_compiled_models -- unused
Feb 26, 2017
c7dd776
ripping things out, part 1: compiled_model.py
Feb 26, 2017
8dc998b
ripping stuff out, part 2: archival and other unused model types
Feb 26, 2017
b3b17ee
pep8 compliance
Feb 27, 2017
7db4b1d
remove print() call from runner.py
Feb 27, 2017
7a0039d
remove print() calls from selector.py
Feb 27, 2017
6a3202e
remove schema_tester, dbt.archival import
Feb 27, 2017
2a2aec2
fix unit tests, compile cmd
Feb 27, 2017
305c80c
functional test improvements
Feb 27, 2017
77c480a
fix skipping, functional testing w/ revzilla
Feb 27, 2017
ac3e5ee
hooks work... finishing up?
Feb 27, 2017
102e8df
add compat module to deal with str/unicode/basestring diffs in 2 vs 3
Feb 27, 2017
f24c0b1
switch compilation import
Feb 27, 2017
99b9ea1
fun with string compatibility
Feb 28, 2017
d96913d
write_file is necessary
Feb 28, 2017
96b3d49
merged master
Mar 2, 2017
caf6105
re-add analyses
Mar 2, 2017
d3142cb
update changelog
Mar 2, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
421 changes: 328 additions & 93 deletions dbt/compilation.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions dbt/compiled_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def contents(self):
if self._contents is None:
with open(self.data['build_path']) as fh:
self._contents = to_unicode(fh.read(), 'utf-8')

return self._contents

def compile(self, context, profile, existing):
Expand Down
Empty file added dbt/contracts/graph/__init__.py
Empty file.
44 changes: 44 additions & 0 deletions dbt/contracts/graph/compiled.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from voluptuous import Schema, Required, All, Any, Extra, Range, Optional, \
Length
from voluptuous.error import Invalid, MultipleInvalid

from dbt.exceptions import ValidationException
from dbt.logger import GLOBAL_LOGGER as logger

from dbt.contracts.graph.parsed import parsed_graph_item_contract

compiled_graph_item_contract = parsed_graph_item_contract.extend({
# compiled fields
Required('compiled'): bool,
Required('compiled_sql'): Any(str, None),

# injected fields
Required('extra_ctes_injected'): bool,
Required('extra_cte_ids'): All(list, [str]),
Required('extra_cte_sql'): All(list, [str]),
Required('injected_sql'): Any(str, None),
})


def validate_one(compiled_graph_item):
try:
compiled_graph_item_contract(compiled_graph_item)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))


def validate(compiled_graph):
try:
for k, v in compiled_graph.items():
compiled_graph_item_contract(v)

if v.get('unique_id') != k:
error_msg = 'unique_id must match key name in compiled graph!'
logger.info(error_msg)
raise ValidationException(error_msg)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))
66 changes: 66 additions & 0 deletions dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from voluptuous import Schema, Required, All, Any, Extra, Range, Optional, \
Length
from voluptuous.error import Invalid, MultipleInvalid

from dbt.exceptions import ValidationException
from dbt.logger import GLOBAL_LOGGER as logger

from dbt.contracts.graph.unparsed import unparsed_graph_item_contract

config_contract = {
Required('enabled'): bool,
Required('materialized'): Any('table', 'view', 'ephemeral', 'incremental'),
Required('post-hook'): list,
Required('pre-hook'): list,
Required('vars'): dict,
Optional('sql_where'): str,
Optional('unique_key'): str,
}

parsed_graph_item_contract = unparsed_graph_item_contract.extend({
# identifiers
Required('unique_id'): All(str, Length(min=1, max=255)),
Required('fqn'): All(list, [All(str)]),

# parsed fields
Required('depends_on'): All(list, [All(str, Length(min=1, max=255))]),
Required('empty'): bool,
Required('config'): config_contract,
})

def validate_one(parsed_graph_item):
try:
parsed_graph_item_contract(parsed_graph_item)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))

materialization = parsed_graph_item.get('config', {}) \
.get('materialized')

if materialization == 'incremental' and \
parsed_graph_item.get('config', {}).get('sql_where') is None:
raise ValidationException(
'missing `sql_where` for an incremental model')
elif materialization != 'incremental' and \
parsed_graph_item.get('config', {}).get('sql_where') is not None:
raise ValidationException(
'invalid field `sql_where` for a non-incremental model')


def validate(parsed_graph):
try:
for k, v in parsed_graph.items():
parsed_graph_item_contract(v)

if v.get('unique_id') != k:
error_msg = ('unique_id must match key name in parsed graph!'
'key: {}, model: {}'
.format(k, v))
logger.info(error_msg)
raise ValidationException(error_msg)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))
27 changes: 27 additions & 0 deletions dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from voluptuous import Schema, Required, All, Any, Extra, Range, Optional, \
Length
from voluptuous.error import Invalid, MultipleInvalid

from dbt.exceptions import ValidationException
from dbt.logger import GLOBAL_LOGGER as logger

unparsed_graph_item_contract = Schema({
# identifiers
Required('name'): All(str, Length(min=1, max=63)),
Required('package_name'): str,

# filesystem
Required('root_path'): str,
Required('path'): str,
Required('raw_sql'): str,
})


def validate(unparsed_graph):
try:
for item in unparsed_graph:
unparsed_graph_item_contract(item)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))
28 changes: 28 additions & 0 deletions dbt/contracts/project.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from voluptuous import Schema, Required, All, Any, Extra, Range, Optional, \
Length, ALLOW_EXTRA
from voluptuous.error import Invalid, MultipleInvalid

from dbt.exceptions import ValidationException
from dbt.logger import GLOBAL_LOGGER as logger

project_contract = Schema({
Required('name'): str
}, extra=ALLOW_EXTRA)

projects_list_contract = Schema({str: project_contract})

def validate(project):
try:
project_contract(project)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))

def validate_list(projects):
try:
projects_list_contract(projects)

except Invalid as e:
logger.info(e)
raise ValidationException(str(e))
10 changes: 8 additions & 2 deletions dbt/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class SourceConfig(object):
]

def __init__(self, active_project, own_project, fqn):
self._config = None
self.active_project = active_project
self.own_project = own_project
self.fqn = fqn
Expand Down Expand Up @@ -101,7 +102,8 @@ def config(self):
return cfg

def is_full_refresh(self):
if hasattr(self.active_project.args, 'full_refresh'):
if hasattr(self.active_project, 'args') and \
hasattr(self.active_project.args, 'full_refresh'):
return self.active_project.args.full_refresh
else:
return False
Expand Down Expand Up @@ -169,7 +171,7 @@ def get_project_config(self, project):
for k in SourceConfig.ExtendDictFields:
config[k] = {}

model_configs = project['models']
model_configs = project.get('models')

if model_configs is None:
return config
Expand Down Expand Up @@ -208,6 +210,7 @@ class DBTSource(object):
dbt_run_type = NodeType.Base

def __init__(self, project, top_dir, rel_filepath, own_project):
self._config = None
self.project = project
self.own_project = own_project

Expand Down Expand Up @@ -256,6 +259,9 @@ def contents(self):

@property
def config(self):
if self._config is not None:
return self._config

return self.source_config.config

def update_in_model_config(self, config):
Expand Down
160 changes: 160 additions & 0 deletions dbt/parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import copy
import jinja2
import jinja2.sandbox
import os

import dbt.flags
import dbt.model
import dbt.utils

import dbt.contracts.graph.parsed
import dbt.contracts.graph.unparsed
import dbt.contracts.project

class SilentUndefined(jinja2.Undefined):
"""
This class sets up the parser to just ignore undefined jinja2 calls. So,
for example, `env` is not defined here, but will not make the parser fail
with a fatal error.
"""
def _fail_with_undefined_error(self, *args, **kwargs):
return None

__add__ = __radd__ = __mul__ = __rmul__ = __div__ = __rdiv__ = \
__truediv__ = __rtruediv__ = __floordiv__ = __rfloordiv__ = \
__mod__ = __rmod__ = __pos__ = __neg__ = __call__ = \
__getitem__ = __lt__ = __le__ = __gt__ = __ge__ = __int__ = \
__float__ = __complex__ = __pow__ = __rpow__ = \
_fail_with_undefined_error


def get_path(resource_type, package_name, resource_name):
return "{}.{}.{}".format(resource_type, package_name, resource_name)

def get_model_path(package_name, resource_name):
return get_path('models', package_name, resource_name)

def get_macro_path(package_name, resource_name):
return get_path('macros', package_name, resource_name)

def __ref(model):

def ref(*args):
pass

return ref


def __config(model, cfg):

def config(*args, **kwargs):
if len(args) == 1 and len(kwargs) == 0:
opts = args[0]
elif len(args) == 0 and len(kwargs) > 0:
opts = kwargs
else:
dbt.utils.compiler_error(
model.get('name'),
"Invalid model config given inline in {}".format(model))

cfg.update_in_model_config(opts)

return config


def parse_model(model, model_path, root_project_config,
package_project_config):
parsed_model = copy.deepcopy(model)

parsed_model.update({
'depends_on': [],
})

parts = dbt.utils.split_path(model.get('path', ''))
name, _ = os.path.splitext(parts[-1])
fqn = ([package_project_config.get('name')] +
parts[1:-1] +
[model.get('name')])

config = dbt.model.SourceConfig(
root_project_config, package_project_config, fqn)

context = {
'ref': __ref(parsed_model),
'config': __config(parsed_model, config),
}

env = jinja2.sandbox.SandboxedEnvironment(
undefined=SilentUndefined)

env.from_string(model.get('raw_sql')).render(context)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


parsed_model['unique_id'] = model_path
parsed_model['config'] = config.config
parsed_model['empty'] = (len(model.get('raw_sql').strip()) == 0)
parsed_model['fqn'] = fqn

return parsed_model


def parse_models(models, projects):
to_return = {}

if dbt.flags.STRICT_MODE:
dbt.contracts.graph.unparsed.validate(models)

for model in models:
package_name = model.get('package_name', 'root')

model_path = get_model_path(package_name, model.get('name'))

# TODO if this is set, raise a compiler error
to_return[model_path] = parse_model(model,
model_path,
projects.get('root'),
projects.get(package_name))

if dbt.flags.STRICT_MODE:
dbt.contracts.graph.parsed.validate(to_return)

return to_return


def load_and_parse_files(package_name, all_projects, root_dir, relative_dirs,
extension, resource_type):
file_matches = dbt.clients.system.find_matching(
root_dir,
relative_dirs,
extension)

models = []

for file_match in file_matches:
file_contents = dbt.clients.system.load_file_contents(
file_match.get('absolute_path'))

parts = dbt.utils.split_path(file_match.get('relative_path', ''))
name, _ = os.path.splitext(parts[-1])

# TODO: support more than just models
models.append({
'name': name,
'root_path': root_dir,
'path': file_match.get('relative_path'),
'package_name': package_name,
'raw_sql': file_contents
})

return parse_models(models, all_projects)


def load_and_parse_models(package_name, all_projects, root_dir, relative_dirs):
if dbt.flags.STRICT_MODE:
dbt.contracts.project.validate_list(all_projects)

return load_and_parse_files(package_name,
all_projects,
root_dir,
relative_dirs,
extension="[!.#~]*.sql",
resource_type='models')
1 change: 1 addition & 0 deletions dbt/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ def get_nodes_to_run(self, graph, include_spec, exclude_spec, model_type):

def get_compiled_models(self, linker, nodes, node_type):
compiled_models = []

for fqn in nodes:
compiled_model = make_compiled_model(fqn, linker.get_node(fqn))

Expand Down
Loading